In [None]:
!pip install streamlit
!pip install pyngrok

# KoBERT를 활용한 문장감정분석 모델

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
## KoBERT 깃허브에서 불러오기
!pip install git+https://git@github.com/SKTBrain/KoBERT.git@master

## 필요 라이브러리 설치
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://****@github.com/SKTBrain/KoBERT.git@master
  Cloning https://****@github.com/SKTBrain/KoBERT.git (to revision master) to /tmp/pip-req-build-qtf3_z4q
  Running command git clone --filter=blob:none --quiet 'https://****@github.com/SKTBrain/KoBERT.git' /tmp/pip-req-build-qtf3_z4q
  Resolved https://****@github.com/SKTBrain/KoBERT.git to commit 47a69af87928fc24e20f571fe10c3cc9dd9af9a3
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting boto3<=1.15.18
  Downloading boto3-1.15.18-py2.py3-none-any.whl (129 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 KB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting gluonnlp<=0.10.0,>=0.6.0
  Downloading gluonnlp-0.10.0.tar.gz (344 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m344.5/344.5 KB[0m [31m34.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata 

In [None]:
cd "/content/drive/MyDrive/final project/code/streamlit/"

/content/drive/MyDrive/final project/code/streamlit


In [None]:
%%writefile KoBERT.py

import streamlit as st

## KoBERT
from kobert.utils import get_tokenizer
from kobert.pytorch_kobert import get_pytorch_kobert_model

from torch.utils.data import Dataset, DataLoader
from torch import nn
import torch
import gluonnlp as nlp
import numpy as np

## GPU 설정
device = torch.device("cuda:0")  # GPU

class BERTClassifier(nn.Module):
    def __init__(self,
                 bert,
                 hidden_size = 768,
                 num_classes=7,
                 dr_rate=None,
                 params=None):
        super(BERTClassifier, self).__init__()
        self.bert = bert
        self.dr_rate = dr_rate
                 
        self.classifier = nn.Linear(hidden_size , num_classes)
        if dr_rate:
            self.dropout = nn.Dropout(p=dr_rate)
    
    def gen_attention_mask(self, token_ids, valid_length):
        attention_mask = torch.zeros_like(token_ids)
        for i, v in enumerate(valid_length):
            attention_mask[i][:v] = 1
        return attention_mask.float()

    def forward(self, token_ids, valid_length, segment_ids):
        attention_mask = self.gen_attention_mask(token_ids, valid_length)
        
        _, pooler = self.bert(input_ids = token_ids, token_type_ids = segment_ids.long(), attention_mask = attention_mask.float().to(token_ids.device))
        if self.dr_rate:
            out = self.dropout(pooler)
        return self.classifier(out)

## bertmodel의 vocabulary
bertmodel, vocab = get_pytorch_kobert_model()

## 모델 불러오기
model = torch.load('/content/drive/MyDrive/final project/data/models/7emotions_model.pt')  # GPU 사용
# model = torch.load('/content/drive/MyDrive/Colab Notebooks/감정분석기/model/7emotions_model.pt', map_location=torch.device('cpu'))  # CPU 사용

## 4. 데이터 전처리(토큰화, 정수 인코딩, 패딩)
class BERTDataset(Dataset):
    def __init__(self, dataset, sent_idx, label_idx, bert_tokenizer, max_len,
                 pad, pair):
        transform = nlp.data.BERTSentenceTransform(
            bert_tokenizer, max_seq_length=max_len, pad=pad, pair=pair)

        self.sentences = [transform([i[sent_idx]]) for i in dataset]
        self.labels = [np.int32(i[label_idx]) for i in dataset]

    def __getitem__(self, i):
        return (self.sentences[i] + (self.labels[i], ))

    def __len__(self):
        return (len(self.labels))

## Setting parameters
max_len = 64
batch_size = 64
warmup_ratio = 0.1
num_epochs = 5
max_grad_norm = 1
log_interval = 200
learning_rate =  5e-5

## 8. 결과물 테스트
## 토큰화
tokenizer = get_tokenizer()
tok = nlp.data.BERTSPTokenizer(tokenizer, vocab, lower=False)

## 감정 예측
def predict(predict_sentence):

    data = [predict_sentence, '0']
    dataset_another = [data]

    another_test = BERTDataset(dataset_another, 0, 1, tok, max_len, True, False)
    test_dataloader = torch.utils.data.DataLoader(another_test, batch_size=batch_size, num_workers=5)
    
    model.eval()
 
    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(test_dataloader):
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)

        valid_length= valid_length
        label = label.long().to(device)

        out = model(token_ids, valid_length, segment_ids)

        test_eval=[]
        for i in out:
            logits=i
            logits = logits.detach().cpu().numpy()

            if np.argmax(logits) == 0:
                test_eval.append("공포가")
            elif np.argmax(logits) == 1:
                test_eval.append("놀람이")
            elif np.argmax(logits) == 2:
                test_eval.append("분노가")
            elif np.argmax(logits) == 3:
                test_eval.append("슬픔이")
            elif np.argmax(logits) == 4:
                test_eval.append("중립이")
            elif np.argmax(logits) == 5:
                test_eval.append("행복이")
            elif np.argmax(logits) == 6:
                test_eval.append("혐오가")

        return (test_eval[0] + " 느껴집니다.")

## Text Input
message = st.text_area("감정을 기록해주세요")

if st.button("기록", key='message'):
    result = message.title()
    st.success(result)
    st.text("기록이 완료됐습니다.")

    ## 문장감정분석 출력
    st.write(predict(result))

Writing KoBERT.py


In [None]:
## ngrok 토큰 설정
# !ngrok authtoken 2KqC6FInSv5YFHXU6mnORM4NadD_76hbXWt9hmxC9HjWksch
# from pyngrok import ngrok
# !streamlit run test.py&>/dev/null&
# publ_url = ngrok.connect(addr='8501')
# publ_url

Authtoken saved to configuration file: /root/.ngrok2/ngrok.yml


<NgrokTunnel: "http://8117-34-87-4-99.ngrok.io" -> "http://localhost:8501">

In [None]:
# ## streamlit 종료 후 초기화 해주기!!
# ngrok.kill()  # url 초기화

In [None]:
from pyngrok import ngrok
ngrok.set_auth_token('2KqC6FInSv5YFHXU6mnORM4NadD_76hbXWt9hmxC9HjWksch')



In [None]:
!streamlit run KoBERT.py & npx localtunnel --port 8501

[..................] | fetchMetadata: sill resolveWithNewModule localtunnel@2.0[0m[K
Collecting usage statistics. To deactivate, set browser.gatherUsageStats to False.
[0m
[K[?25hnpx: installed 22 in 5.089s
your url is: https://smooth-ravens-report-34-90-227-214.loca.lt
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.90.227.214:8501[0m
[0m
/content/drive/MyDrive/final project/code/streamlit/.cache/kobert_v1.zip[██████████████████████████████████████████████████]
/content/drive/MyDrive/final project/code/streamlit/.cache/kobert_news_wiki_ko_cased-1087f8699e.spiece[██████████████████████████████████████████████████]
using cached model. /content/drive/MyDrive/final project/code/streamlit/.cache/kobert_news_wiki_ko_cased-1087f8699e.spiece
using cached model. /content/drive/MyDrive/final project/code/streamlit/.cache/kobert_v1.zip
using cached model. /content/d