In [1]:
# Kobert on Google Colab
!pip install mxnet
!pip install gluonnlp
!pip install transformers==3.0.2
!pip install git+https://git@github.com/SKTBrain/KoBERT.git@master

# Second Server: Flask on Google Colab
!pip install pyngrok==4.1.1
!pip install flask==0.12.2
!pip install flask-ngrok

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting mxnet
  Downloading mxnet-1.9.1-py3-none-manylinux2014_x86_64.whl (49.1 MB)
[K     |████████████████████████████████| 49.1 MB 1.5 MB/s 
Collecting graphviz<0.9.0,>=0.8.1
  Downloading graphviz-0.8.4-py2.py3-none-any.whl (16 kB)
Installing collected packages: graphviz, mxnet
  Attempting uninstall: graphviz
    Found existing installation: graphviz 0.10.1
    Uninstalling graphviz-0.10.1:
      Successfully uninstalled graphviz-0.10.1
Successfully installed graphviz-0.8.4 mxnet-1.9.1
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting gluonnlp
  Downloading gluonnlp-0.10.0.tar.gz (344 kB)
[K     |████████████████████████████████| 344 kB 24.2 MB/s 
Building wheels for collected packages: gluonnlp
  Building wheel for gluonnlp (setup.py) ... [?25l[?25hdone
  Created wheel for gluonnlp: filename=gluonnlp-0.10.0-cp37-cp37

In [2]:
import torch #
from torch import nn # 파이토치 신경망 라이브러리
from torch.utils.data import Dataset, DataLoader
import gluonnlp as nlp #
import numpy as np

#kobert
from kobert.utils import get_tokenizer
from kobert.pytorch_kobert import get_pytorch_kobert_model

In [3]:
# Setting parameters
max_len = 64
batch_size = 64  # 과적합의 원인일 수 있음
warmup_ratio = 0.1
num_epochs = 100
max_grad_norm = 1
log_interval = 200  # 훈련 로그를 출력할 간격
learning_rate =  5e-5  # 과적합의 원인일 수 있음

In [4]:
class BERTDataset(Dataset):
    def __init__(self, dataset, sent_idx, label_idx, bert_tokenizer, max_len, pad, pair):
        transform = nlp.data.BERTSentenceTransform(
            bert_tokenizer, max_seq_length=max_len, pad=pad, pair=pair)

        self.sentences = [transform([i[sent_idx]]) for i in dataset]
        self.labels = [np.int32(i[label_idx]) for i in dataset]

    def __getitem__(self, i):
        return (self.sentences[i] + (self.labels[i], ))

    def __len__(self):
        return (len(self.labels))

In [5]:
class BERTClassifier(nn.Module):
    def __init__(self,
                 bert,
                 hidden_size = 768,
                 num_classes=7,   ##클래스 수 조정##
                 dr_rate=None,
                 params=None):
        super(BERTClassifier, self).__init__()
        self.bert = bert
        self.dr_rate = dr_rate
                 
        self.classifier = nn.Linear(hidden_size , num_classes)
        if dr_rate:
            self.dropout = nn.Dropout(p=dr_rate)
    
    def gen_attention_mask(self, token_ids, valid_length):
        attention_mask = torch.zeros_like(token_ids)
        for i, v in enumerate(valid_length):
            attention_mask[i][:v] = 1
        return attention_mask.float()

    def forward(self, token_ids, valid_length, segment_ids):
        attention_mask = self.gen_attention_mask(token_ids, valid_length)
        
        _, pooler = self.bert(input_ids = token_ids, token_type_ids = segment_ids.long(), attention_mask = attention_mask.float().to(token_ids.device))
        if self.dr_rate:
            out = self.dropout(pooler)
        return self.classifier(out)

In [6]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
#CPU 사용
device = torch.device("cpu")
model = torch.load('/content/drive/MyDrive/Colab Notebooks/(AI_Project) unemployedStation/model/bestModel_state548_epoch6_acc0.786_221101.h5', map_location=torch.device('cpu'))

##GPU 사용
# device = torch.device("cuda:0")
# model = torch.load('/content/drive/MyDrive/Colab Notebooks/(AI_Project) unemployedStation/model/bestModel_state548_epoch6_acc0.786_221101.h5')

#BERT 모델, Vocabulary 불러오기
_, vocab = get_pytorch_kobert_model()

#토큰화
tokenizer = get_tokenizer()
tok = nlp.data.BERTSPTokenizer(tokenizer, vocab, lower=False)

/content/.cache/kobert_v1.zip[██████████████████████████████████████████████████]
/content/.cache/kobert_news_wiki_ko_cased-1087f8699e.spiece[██████████████████████████████████████████████████]
using cached model. /content/.cache/kobert_news_wiki_ko_cased-1087f8699e.spiece


In [8]:
def predict(predict_sentence):

    data = [predict_sentence, '0']
    dataset_another = [data]

    another_test = BERTDataset(dataset_another, 0, 1, tok, max_len, True, False)
    test_dataloader = torch.utils.data.DataLoader(another_test, batch_size=batch_size, num_workers=5)
    
    model.eval()

    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(test_dataloader):
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)

        valid_length= valid_length
        label = label.long().to(device)

        out = model(token_ids, valid_length, segment_ids)


        test_eval=[]
        for i in out:
            logits=i
            logits = logits.detach().cpu().numpy()

            return np.argmax(logits)
        #     if np.argmax(logits) == 0:
        #         test_eval.append("공포가")
        #     elif np.argmax(logits) == 1:
        #         test_eval.append("놀람이")
        #     elif np.argmax(logits) == 2:
        #         test_eval.append("분노가")
        #     elif np.argmax(logits) == 3:
        #         test_eval.append("슬픔이")
        #     elif np.argmax(logits) == 4:
        #         test_eval.append("중립이")
        #     elif np.argmax(logits) == 5:
        #         test_eval.append("행복이")
        #     elif np.argmax(logits) == 6:
        #         test_eval.append("혐오가")

        # print(">> 입력하신 내용에서 " + test_eval[0] + " 느껴집니다.")

In [9]:
# ngrok.io 가입하면 => 토큰 발급됨
!ngrok authtoken "2H4GL4qr8oM2t1Pf2dGfPc6o30n_2JruEgpiY2sVH2KhYmHt7"

Authtoken saved to configuration file: /root/.ngrok2/ngrok.yml


In [10]:
# flask_ngrok_example.py
from flask import Flask, render_template, jsonify, request
from flask_ngrok import run_with_ngrok

# step 2
app = Flask(__name__)
run_with_ngrok(app)  # Start ngrok when app is run
app.config['JSONIFY_PRETTYPRINT_REGULAR'] = False
print( __name__ )

# step 3 : 라우팅 : 요청이 들어오면 누가 응답할지 패턴 설정
# @ -> 데코레이터 -> 함수 안에 함수를 구현하는 클로저가 적용된 기술
@app.route('/predictKOBERT', methods=['POST'])
def home():
    req = request.form.get('userText')
    print('(Local Server) Client 입력:', req)
    result = predict(req)
    res = "NONE"
    if result == 0:
        res = "SCARED"
    elif result == 1:
        res = "AMAZE"
    elif result == 2:
        res = "ANGRY"
    elif result == 3:
        res = "SAD"
    elif result == 4:
        res = "NEUTRALITY"
    elif result == 5:
        res = "HAPPY"
    elif result == 6:
        res = "DISGUST"
    print('--> ',res,'\n')
    return jsonify({'emotion': res})
# step 4 : 엔트리 포인트 : 프로그램의 시작점
if __name__ == '__main__':
    app.run()  # If address is in use, may need to terminate other sessions:
               # Runtime > Manage Sessions > Terminate Other Sessions

INFO:werkzeug: * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)


__main__
 * Running on http://9c33-35-236-224-165.ngrok.io
 * Traffic stats available on http://127.0.0.1:4040
(Local Server) Client 입력: 내 여자친구는 어디에 있을까


  cpuset_checked))
INFO:werkzeug:127.0.0.1 - - [20/Nov/2022 11:21:45] "[37mPOST /predictKOBERT HTTP/1.1[0m" 200 -


-->  SAD 

(Local Server) Client 입력: 내일 출근이다ㅎㅎ


INFO:werkzeug:127.0.0.1 - - [20/Nov/2022 11:22:37] "[37mPOST /predictKOBERT HTTP/1.1[0m" 200 -


-->  HAPPY 

(Local Server) Client 입력: 배부르다


INFO:werkzeug:127.0.0.1 - - [20/Nov/2022 11:23:05] "[37mPOST /predictKOBERT HTTP/1.1[0m" 200 -


-->  AMAZE 

(Local Server) Client 입력: 정도야 우리 행복하자


INFO:werkzeug:127.0.0.1 - - [20/Nov/2022 11:23:47] "[37mPOST /predictKOBERT HTTP/1.1[0m" 200 -


-->  NEUTRALITY 

(Local Server) Client 입력: 수고했다 정도르


INFO:werkzeug:127.0.0.1 - - [20/Nov/2022 11:24:18] "[37mPOST /predictKOBERT HTTP/1.1[0m" 200 -


-->  NEUTRALITY 

(Local Server) Client 입력: 어디 소주없나


INFO:werkzeug:127.0.0.1 - - [20/Nov/2022 11:24:50] "[37mPOST /predictKOBERT HTTP/1.1[0m" 200 -


-->  DISGUST 

(Local Server) Client 입력: 시간이 너무빠르네


INFO:werkzeug:127.0.0.1 - - [20/Nov/2022 11:26:10] "[37mPOST /predictKOBERT HTTP/1.1[0m" 200 -


-->  AMAZE 

(Local Server) Client 입력: 경비원 아저씨 눈치 좀 많이 보이네


INFO:werkzeug:127.0.0.1 - - [20/Nov/2022 11:27:15] "[37mPOST /predictKOBERT HTTP/1.1[0m" 200 -


-->  SCARED 

(Local Server) Client 입력: 안진뉴


INFO:werkzeug:127.0.0.1 - - [20/Nov/2022 11:28:23] "[37mPOST /predictKOBERT HTTP/1.1[0m" 200 -


-->  NEUTRALITY 

(Local Server) Client 입력: 당신에게 추천해드리고 싶은 곡은


INFO:werkzeug:127.0.0.1 - - [20/Nov/2022 11:28:52] "[37mPOST /predictKOBERT HTTP/1.1[0m" 200 -


-->  HAPPY 

(Local Server) Client 입력: 차 사고 싶다


INFO:werkzeug:127.0.0.1 - - [20/Nov/2022 11:29:54] "[37mPOST /predictKOBERT HTTP/1.1[0m" 200 -


-->  NEUTRALITY 

(Local Server) Client 입력: 어우 열받아


INFO:werkzeug:127.0.0.1 - - [20/Nov/2022 11:30:17] "[37mPOST /predictKOBERT HTTP/1.1[0m" 200 -


-->  ANGRY 

(Local Server) Client 입력: 아무것도 안하고 놀고싶다


INFO:werkzeug:127.0.0.1 - - [20/Nov/2022 11:32:01] "[37mPOST /predictKOBERT HTTP/1.1[0m" 200 -


-->  SAD 

(Local Server) Client 입력: 스파크타고 여행이나 가고싶다


INFO:werkzeug:127.0.0.1 - - [20/Nov/2022 11:34:10] "[37mPOST /predictKOBERT HTTP/1.1[0m" 200 -


-->  NEUTRALITY 

