<a href="https://colab.research.google.com/github/Jeremy-su1/ai-algorithm/blob/main/final/single_multi_cls_llm_embed_mamba2_streamlit.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install torch transformers tqdm datasets
!pip install -q streamlit
!pip install torch==2.1.1 torchvision==0.16.1 torchaudio==2.1.1 --index-url https://download.pytorch.org/whl/cu118
!pip install causal-conv1d>=1.1.0
!pip install mamba-ssm
!pip install datasets evaluate accelerate
!pip install huggingface_hub
!export LC_ALL="en_US.UTF-8"
!export LD_LIBRARY_PATH="/usr/lib64-nvidia"
!export LIBRARY_PATH="/usr/local/cuda/lib64/stubs"
!ldconfig /usr/lib64-nvidia

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec (from torch)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (179 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
%%writefile app.py

import streamlit as st
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BertTokenizer, BertModel, RobertaTokenizer, RobertaModel
from transformers import AutoConfig

import torch
import torch.nn as nn
import torch.nn.functional as F

import os
import random
import json
from collections import namedtuple
from dataclasses import dataclass, field, asdict
from mamba_ssm.models.mixer_seq_simple import MambaLMHeadModel
from mamba_ssm.utils.hf import load_config_hf, load_state_dict_hf

import evaluate
import numpy as np
from datasets import load_dataset
from transformers import Trainer
from transformers import AutoTokenizer, TrainingArguments
from huggingface_hub import notebook_login
notebook_login()

from dataclasses import dataclass, asdict
import json
import streamlit as st
from transformers import AutoTokenizer


# Streamlit 앱의 제목 설정
st.title('Classification Fine-tuning App')

# 사용자가 선택한 classification 유형
classification_type = st.selectbox('Select Classification Type', ['Single-Label Classification', 'Multi-Label Classification'])


if classification_type == 'Single-Label Classification':

    class DownstreamModel(nn.Module):
        def __init__(self, class_num, SIGMA):
            super(DownstreamModel, self).__init__()
            self.SIGMA = SIGMA
            self.compress_layers = nn.ModuleList()
            for _ in range(5):
                layers = []
                layers.append(nn.Linear(2048, 1024))
                layers.append(nn.ReLU())
                layers.append(nn.Dropout(0.5))
                self.compress_layers.append(nn.Sequential(*layers))

            self.fc1 = nn.Linear(2097, 1024)
            self.relu1 = nn.ReLU()
            self.dropout1 = nn.Dropout(0.5)
            self.fc2 = nn.Linear(1024, 256)
            self.relu2 = nn.ReLU()
            self.dropout2 = nn.Dropout(0.5)
            self.fc3 = nn.Linear(256, class_num)
            self.softmax = nn.Softmax(dim=1)

        def forward(self, input_l, input_b, input_r):
            batch_size = input_l.shape[0]

            # input_l 텐서를 첫 번째 차원을 기준으로 1 크기의 텐서로 분할
            split_tensors = torch.split(input_l, 1, dim=1)
            input = []

            # 분할된 텐서들을 순회
            for i, split_tensor in enumerate(split_tensors):
                # 각 split_tensor를 배치 크기에 맞게 2차원으로 재구성
                split_tensor = split_tensor.reshape(batch_size,-1)
                # 재구성된 텐서를 압축(compress) layer를 거쳐 변환
                input.append(self.compress_layers[i](split_tensor))

            # input_b(bert 임베딩)와 input_r(Roberta 임베딩)을 input에 추가
            input.append(input_b)
            input.append(input_r)
            input = torch.stack(input, dim=1)
            # X * X^T
            input_T = input.transpose(1, 2)
            input_P = torch.matmul(input, input_T)
            input_P = input_P.reshape(batch_size, -1)
            # PN func
            input_P = 2*F.sigmoid(self.SIGMA * input_P) - 1

            a = torch.mean(input_l, dim=1)
            input = torch.cat([input_P, a], dim=1)
            # print(input.shape)

            output = self.fc1(input)
            output = self.relu1(output)
            output = self.dropout1(output)
            output = self.fc2(output)
            output = self.relu2(output)
            output = self.dropout2(output)
            output = self.fc3(output)

            # 소프트맥스 활성화 함수를 적용하여 클래스 확률을 출력
            output = self.softmax(output)

            return output



    # Load the tokenizers and models for Llama2, BERT, and Roberta
    llama2_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B", use_auth_token="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", trust_remote_code=True)
    llama2_tokenizer.pad_token = llama2_tokenizer.eos_token  # 패딩 토큰 설정
    llama2_config = AutoConfig.from_pretrained("meta-llama/Llama-3.2-1B",use_auth_token="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", output_hidden_states=True)
    llama2_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-1B",use_auth_token="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", config=llama2_config)

    bert_tokenizer = BertTokenizer.from_pretrained('google-bert/bert-large-uncased')
    bert_model = BertModel.from_pretrained('google-bert/bert-large-uncased')

    roberta_tokenizer = RobertaTokenizer.from_pretrained('FacebookAI/roberta-large')
    roberta_model = RobertaModel.from_pretrained('FacebookAI/roberta-large')

    # Make sure all models are in evaluation mode and moved to the appropriate device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    llama2_model.eval().to(device)
    bert_model.eval().to(device)
    roberta_model.eval().to(device)

    # Initialize the downstream model
    class_num = 5  # For example, if you have 8 classes
    SIGMA = 0.1  # SIGMA value for your downstream model
    downstream_model = DownstreamModel(class_num, SIGMA).to(device)

    model_load_path = "/content/drive/MyDrive/LLMEmbed/model_weights_stackexchange_llama3_2.pth"

    # 가중치 로드
    downstream_model.load_state_dict(torch.load(model_load_path, map_location=device))
    downstream_model.eval()

    def get_llama2_embedding(text, tokenizer, model, device):
        inputs = tokenizer(text, return_tensors='pt', max_length=128, padding="max_length", truncation=True).to(device)
        with torch.no_grad():
            outputs = model(**inputs)
            # Average the last 5 layers
            embedding = torch.stack([torch.mean(outputs.hidden_states[i], dim=1) for i in range(-1, -6, -1)], dim=1)
        return embedding

    def get_bert_embedding(text, tokenizer, model, device):
        inputs = tokenizer(text, return_tensors='pt', max_length=128, padding="max_length", truncation=True).to(device)
        with torch.no_grad():
            outputs = model(**inputs)
            # Use pooler_output for BERT embeddings
            embedding = outputs.pooler_output
        return embedding

    def get_roberta_embedding(text, tokenizer, model, device):
        inputs = tokenizer(text, return_tensors='pt', max_length=128, padding="max_length", truncation=True).to(device)
        with torch.no_grad():
            outputs = model(**inputs)
            # Use the first token ([CLS] token) representation
            embedding = outputs.last_hidden_state[:, 0, :]
        return embedding

    def infer(text, downstream_model, device):
        # 각 모델로부터 임베딩을 추출
        llama2_emb = get_llama2_embedding(text, llama2_tokenizer, llama2_model, device)
        bert_emb = get_bert_embedding(text, bert_tokenizer, bert_model, device)
        roberta_emb = get_roberta_embedding(text, roberta_tokenizer, roberta_model, device)

        # Forward pass through the downstream model
        with torch.no_grad():
            prediction = downstream_model(llama2_emb, bert_emb, roberta_emb)

        return prediction


    # 레이블 맵
    label_map = {
        0: "biology",
        1: "cooking",
        2: "diy",
        3: "travel",
        4: "stackoverflow"
    }

    # Streamlit 앱의 레이아웃 설정
    st.markdown('##### Single-Label Classification - LLMEmbed')
    # st.title('LLMEmbed - llama3.2, roBERTa, BERT')


    # 기본 텍스트 샘플
    default_texts = [
        "What are the theoretical and actual (measured) minimum water potentials in plants?",
        "What tastes like marigold? I'm looking to make Georgian Satsivi",
        "How can I intentionally make my toilet make this noise? For Halloween I want to haunt my toilets and have them make the noise heard here:",
        "What are the hours of operation of the Ankara Metro? What are the times of the first and last trains on the Ankara Metro?",
        "How to use \"HTML form target self\" ? I am new to HTML and need to complete a simple task.",
    ]



    # 입력 텍스트와 예측 결과 컨테이너
    text_containers = []
    result_label_containers = []
    result_score_containers = []

    # 컨테이너 생성 및 텍스트 입력
    for i in range(5):
        with st.container():
            col1, col2, col3 = st.columns([3, 1, 1])
            text = col1.text_area("Text", value=default_texts[i], height=100, key=f"text_{i}")
            text_containers.append(text)
            # 예측 결과를 저장할 텍스트 박스 생성
            result_label_container = col2.empty()
            result_label_container.text_area("Predicted label", "", height=100, key=f"label_{i}", disabled=True)
            result_score_container = col3.empty()
            result_score_container.text_area("Score", "", height=100, key=f"score_{i}", disabled=True)
            # 결과 컨테이너를 리스트에 추가
            result_label_containers.append(result_label_container)
            result_score_containers.append(result_score_container)

    # 예측 버튼
    if st.button('Classify All Texts'):
        # 모든 텍스트에 대해 예측 실행
        for i, text in enumerate(text_containers):
            prediction = infer(text, downstream_model, device)
            predicted_label = torch.argmax(prediction, dim=1).item()
            predicted_score = torch.max(prediction).item()
            label_name = label_map[predicted_label]

            # 예측 결과를 각 텍스트 박스에 작성 (수정된 코드)
            result_label_containers[i].text_area("Category", value=label_name, height=100, key=f"updated_label_{i}", disabled=True)
            result_score_containers[i].text_area("Score", value=f"{predicted_score:.4f}", height=100, key=f"updated_score_{i}", disabled=True)

elif classification_type == 'Multi-Label Classification':


    @dataclass
    class MambaConfig:
        d_model: int = 768
        d_intermediate: int = 0
        n_layer: int = 24
        vocab_size: int = 50277
        ssm_cfg: dict = None
        attn_layer_idx: list = None
        attn_cfg: dict = None
        rms_norm: bool = True
        residual_in_fp32: bool = True
        fused_add_norm: bool = True
        pad_vocab_size_multiple: int = 16
        tie_embeddings: bool = True

        def __post_init__(self):
            # 기본값으로 설정된 None 타입을 빈 딕셔너리와 빈 리스트로 초기화
            if self.ssm_cfg is None:
                self.ssm_cfg = {"layer": "Mamba2"}
            if self.attn_layer_idx is None:
                self.attn_layer_idx = []
            if self.attn_cfg is None:
                self.attn_cfg = {}

        def to_json_string(self):
            return json.dumps(asdict(self))

        def to_dict(self):
            return asdict(self)


    classes = ['Algorithms', 'Backend', 'Data Science', 'Databases', 'Dev Tools', 'Frontend', 'Mobile', 'Systems', 'iOS/macOS']
    class2id = {'Algorithms' :0, 'Backend' : 1, 'Data Science' : 2, 'Databases' : 3, 'Dev Tools' : 4, 'Frontend' : 5, 'Mobile' :6, 'Systems' : 7, 'iOS/macOS' : 8}
    id2class = {0 : 'Algorithms', 1: 'Backend', 2 : 'Data Science', 3 : 'Databases', 4 : 'Dev Tools', 5 : 'Frontend', 6 : 'Mobile', 7 : 'Systems', 8 :'iOS/macOS'}


    class MambaClassificationHead(nn.Module):
        def __init__(self, d_model, num_classes, **kwargs):
            super(MambaClassificationHead, self).__init__()
            self.classification_head = nn.Linear(d_model, num_classes, **kwargs)

        def forward(self, hidden_states):
            return self.classification_head(hidden_states)

    class MambaTextClassification(MambaLMHeadModel):
        def __init__(
            self,
            config: MambaConfig,
            initializer_cfg=None,
            device=None,
            dtype=None,
        ) -> None:
            super().__init__(config, initializer_cfg, device, dtype)

            self.classification_head = MambaClassificationHead(d_model=config.d_model, num_classes=len(classes))
            del self.lm_head
            self.multi_label = True
            self.id2label = id2class
            self.class2id = class2id

        @classmethod
        def addMambaClassificationHead(cls, num_classes, id2label, class2id, multi_label):
            cls.classification_head = MambaClassificationHead
            del self.lm_head

        def forward(self, input_ids, attention_mask=None, labels=None):
            hidden_states = self.backbone(input_ids)
            mean_hidden_states = hidden_states.mean(dim=1)

            logits = self.classification_head(mean_hidden_states)
            if labels is None:
              ClassificationOutput = namedtuple("ClassificationOutput", ["logits"])
              return ClassificationOutput(logits=logits)
            else:
              ClassificationOutput = namedtuple("ClassificationOutput", ["loss", "logits"])
              if self.multi_label:
                loss_fct = nn.BCEWithLogitsLoss()
                loss = loss_fct(logits, labels)
              else:
                loss_fct = nn.CrossEntropyLoss()
                loss = loss_fct(logits, labels)
              return ClassificationOutput(loss=loss, logits=logits)

        def predict(self, text, tokenizer):
            input_ids = torch.tensor(tokenizer(text)['input_ids'], device='cuda')[None]
            with torch.no_grad():
              logits = self.forward(input_ids).logits[0]

            if self.multi_label:
              probabilities = torch.sigmoid(logits).cpu().numpy()
              predictions = (probabilities > 0.5).astype(int)
              return [self.id2label[i] for i, value in enumerate(predictions) if value == 1]
            else:
              label = np.argmax(logits.cpu().numpy())
              return self.id2label[label]

        @classmethod
        def from_pretrained(cls, pretrained_model_name, device=None, dtype=None, **kwargs):
            config_data = load_config_hf(pretrained_model_name)
            config = MambaConfig(**config_data)

            model = cls(config, device=device, dtype=dtype, **kwargs)

            model_state_dict = load_state_dict_hf(pretrained_model_name, device=device, dtype=dtype)
            model.load_state_dict(model_state_dict, strict=False)

            print("Newly initialized embedding:", set(model.state_dict().keys()) - set(model_state_dict.keys()))
            return model

    # Streamlit 앱의 제목 설정
    st.markdown('##### Multi-Label Classification - mamba2')
    # st.title('Mamba2')

    # 레이아웃을 2개의 컬럼으로 분할
    col1, col2 = st.columns(2)

    # 사용자로부터 입력 받을 텍스트의 예시
    example_text = """OAuth 2.0 Not Working Properly with Kakao Login in React App
    I'm a computer science student

    I'm sorry if there are any typos in my english

    If you need more information my code ask to me

    I’m implementing Kakao and Naver login with OAuth 2.0 in my React app. The login process appears to succeed, and the URL changes to indicate successful login (with the code parameter in the URL), but no user information is being logged to the console, and nothing is stored in local storage. Additionally, I’m not seeing any console output from console.log statements in my callback component.
    """

    with col1:  # 첫 번째 컬럼에 입력 필드 생성
        text = st.text_area("Text", value=example_text, height=300)

    # 모델 및 토크나이저 로드
    # @st.cache(allow_output_mutation=True)  # Streamlit 캐시를 사용하여 모델을 한 번만 로드
    def load_model_and_tokenizer():
        model = MambaTextClassification.from_pretrained("ebinna/multi_cls_mamba2-130m")
        model.to("cuda")
        tokenizer = AutoTokenizer.from_pretrained("ebinna/multi_cls_mamba2-130m")
        tokenizer.pad_token_id = tokenizer.eos_token_id
        return model, tokenizer

    model, tokenizer = load_model_and_tokenizer()

    # 초기 결과값을 빈 문자열로 설정
    result_text = ""

    # 분류 실행 버튼
    if st.button('Classify Text'):
        # 텍스트 예측
        prediction = model.predict(text, tokenizer)
        result_text = str(prediction)

    with col2:  # 두 번째 컬럼에 결과 레이블과 텍스트 영역을 항상 표시
        st.text_area("Tags", value=result_text, height=300, disabled=True)

Writing app.py


In [None]:
!npm install localtunnel

[K[?25h
added 22 packages, and audited 23 packages in 1s

3 packages are looking for funding
  run `npm fund` for details

2 [33m[1mmoderate[22m[39m severity vulnerabilities

To address all issues, run:
  npm audit fix

Run `npm audit` for details.


In [None]:
!streamlit run /content/app.py &>/content/drive/MyDrive/logs.txt & npx localtunnel --port 8501 & curl ipv4.icanhazip.com

35.227.33.188
your url is: https://pink-actors-go.loca.lt
