<a href="https://colab.research.google.com/github/Jeremy-su1/ai-algorithm/blob/main/tag_classifier_inference.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%%capture
!pip install datasets>=2.18.0 transformers>=4.38.2 sentence-transformers>=2.5.1 setfit>=1.0.3 accelerate>=0.27.2 seqeval>=1.2.2

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import numpy as np
# 모델과 토크나이저 가져오기

model_path = '/content/drive/My Drive/AiExpertCource/pj/tag/multi-label-bert'  # 모델과 토크나이저가 저장된 경로
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)

In [None]:
# 모델을 평가 모드로 설정
model.eval()

MegatronBertForSequenceClassification(
  (bert): MegatronBertModel(
    (embeddings): MegatronBertEmbeddings(
      (word_embeddings): Embedding(50048, 768, padding_idx=0)
      (position_embeddings): Embedding(2048, 768)
      (token_type_embeddings): Embedding(2, 768)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): MegatronBertEncoder(
      (layer): ModuleList(
        (0-11): 12 x MegatronBertLayer(
          (attention): MegatronBertAttention(
            (ln): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (self): MegatronBertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): MegatronBertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bi

In [None]:
# 예측 함수 정의
def predict(texts):
    # 텍스트를 토큰화하고 텐서로 변환
    inputs = tokenizer(texts, padding='max_length', truncation=True, max_length=2048, return_tensors='pt')

    # 모델을 사용해 예측 수행
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits

    # 시그모이드를 사용해 확률로 변환
    probabilities = torch.sigmoid(logits).numpy()

    # 각 클래스에 대해 threshold를 0.5로 설정하여 예측값(0 또는 1)으로 변환
    predictions = (probabilities > 0.5).astype(int)

    return predictions, probabilities

In [None]:
# 샘플 텍스트 입력 및 예측 수행
sample_texts = [
  "React Grid2 not respecting row/column definitions and missing responsiveness"
]

# 예측 수행
predictions, probabilities = predict(sample_texts)

In [None]:
# 결과 인덱스를 문자 태그로 변환하기 위해서 100개 태그 리스트 가져옴

import json

with open('/content/drive/My Drive/AiExpertCource/pj/tag/top_100_labels.json', 'r') as f:
    classes = json.load(f)

# 100개 tag에 들어가지 않은 데이터는 other tag로 변경했음으로 태그 리스트에 other태그 추가
classes.append('other')
class2id = {class_:id for id, class_ in enumerate(classes)}
id2class = {id:class_ for class_, id in class2id.items()}

In [None]:
probabilities

array([[3.1522599e-01, 1.8041983e-03, 4.5674555e-03, 3.7946519e-03,
        1.1263339e-02, 3.2363955e-03, 6.3383949e-01, 3.5166661e-03,
        6.3759780e-01, 9.3468872e-04, 2.0843460e-03, 5.4257460e-02,
        2.5104363e-03, 3.2017741e-03, 1.5025787e-03, 1.3209330e-03,
        1.9090371e-03, 9.3450129e-04, 4.8495824e-03, 1.6677649e-03,
        2.2678012e-03, 2.3911735e-03, 9.1022305e-04, 1.2132094e-03,
        1.9570664e-03, 1.6971366e-03, 9.5163396e-04, 1.0741921e-03,
        2.9225310e-04, 3.5156761e-04, 1.0066819e-03, 1.2858546e-03,
        1.0301881e-03, 1.4172783e-03, 3.2645228e-04, 6.2668853e-04,
        3.9186599e-04, 1.7356706e-03, 8.3336572e-04, 3.9102431e-04,
        4.0053434e-04, 5.0224288e-04, 4.8807482e-04, 1.9534931e-03,
        6.5219059e-04, 1.2630585e-04, 7.6241628e-04, 1.1845772e-03,
        2.6106951e-03, 2.9852961e-03, 5.2139221e-04, 1.0064644e-03,
        2.7148807e-04, 1.2404710e-03, 1.5921679e-03, 1.1628291e-02,
        5.6831149e-04, 1.5193396e-03, 2.0316530e

In [None]:
predictions[0]

array([0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [None]:
arr = np.array(predictions[0])

# 값이 1인 인덱스에 해당하는 한글 매핑 가져오기
indices = np.where(arr == 1)[0]
indices

for idx in indices:
    tag = id2class[idx]
    print(tag)

reactjs
