In [1]:
import torch

In [2]:
from transformers import ElectraTokenizer, ElectraForSequenceClassification

model_name = "monologg/koelectra-v3-small-nsmc"
tokenizer = ElectraTokenizer.from_pretrained(model_name)
model = ElectraForSequenceClassification.from_pretrained(model_name)

In [3]:
device = torch.device("cuda")
model.to(device)

ElectraForSequenceClassification(
  (electra): ElectraModel(
    (embeddings): ElectraEmbeddings(
      (word_embeddings): Embedding(35000, 128, padding_idx=0)
      (position_embeddings): Embedding(512, 128)
      (token_type_embeddings): Embedding(2, 128)
      (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (embeddings_project): Linear(in_features=128, out_features=256, bias=True)
    (encoder): ElectraEncoder(
      (layer): ModuleList(
        (0): ElectraLayer(
          (attention): ElectraAttention(
            (self): ElectraSelfAttention(
              (query): Linear(in_features=256, out_features=256, bias=True)
              (key): Linear(in_features=256, out_features=256, bias=True)
              (value): Linear(in_features=256, out_features=256, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): ElectraSelfOutput(
              (dense): Linear(in_

In [4]:
import pandas as pd
test_data = pd.read_csv('ratings_test.txt', delimiter='\t')

In [5]:
test_data = pd.DataFrame(test_data)

In [14]:
data_list = test_data.head(2)['document'].tolist()
label_list = test_data.head(2)['label'].values.tolist()

In [15]:
texts = []
textsLabel = []
for tmp_label, tmp_text in zip(label_list, data_list):
    if type(tmp_text) != type('str') :
        continue
    texts.append(tmp_text)
    textsLabel.append(tmp_label)
print(texts)

['굳 ㅋ', 'GDNTOPCLASSINTHECLUB']


In [16]:
inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
inputs.to(device)

{'input_ids': tensor([[    2,  2104,   287,     3,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0],
        [    2,    43,  4090,  4253, 13927,  4105,  4091, 19802,  4130, 32150,
          4253,  4169,  4013, 13352,  4053,  4101,  4015,     3]],
       device='cuda:0'), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
       device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]],
       device='cuda:0')}

In [19]:
import time
def sentimentalAnalysis(text):
    with torch.no_grad():
        encodedInput = tokenizer.encode_plus(text, return_tensors="pt").to(device)
        output = model(encodedInput['input_ids'], attention_mask = encodedInput['attention_mask'])
        logits = output.logits
        probabilities = torch.softmax(logits, dim=1)
        predictedLabel = torch.argmax(probabilities, dim=1)
        return predictedLabel.item()

In [4]:
import time
def sentimentalAnalysis(text):
    with torch.no_grad():
        encodedInput = tokenizer.encode_plus(text, return_tensors="pt").to(device)
        output = model(encodedInput['input_ids'], attention_mask = encodedInput['attention_mask'])
        logits = output.logits
        probabilities = torch.softmax(logits, dim=1)
        #predictedLabel = torch.argmax(probabilities, dim=1)
        return probabilities[0][1].item()

In [20]:
import time
result = 0

start = time.time()
for text, label in zip(texts, textsLabel) :
    if sentimentalAnalysis(text)==label :
        result+=1
        
processTime = time.time() - start

In [21]:
print(str(float(result)/float(len(texts))*100.0) + "%의 확률로 감정분석에 성공합니다.")

0.0%의 확률로 감정분석에 성공합니다.


In [12]:
print(str(len(texts)) + "개의 데이터를 감정분석하는데 " + str(processTime) + "초가 소요됩니다.")

500개의 데이터를 감정분석하는데 4.079816579818726초가 소요됩니다.


In [47]:
print(sentimentalAnalysis("너무 애매해요"))

tensor(0.0018, device='cuda:0')
0.0017901455285027623


In [None]:
from flask import Flask, request, jsonify
import threading 

app = Flask(__name__)

@app.route('/predict', methods=['POST'])
def predict():
    data = request.get_json(force=True)
    print(data)
    result = sentimentalAnalysis(data)
    return jsonify(result)

def runFlaskApp():
    app.run(host='0.0.0.0', port=5000)
    
flaskThread = threading.Thread(target=runFlaskApp)
flaskThread.start()    

input("Press Enter to stop the Flask app...")

flask_thread.raise_keyboard_interrupt()

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://163.239.26.24:5000
Press CTRL+C to quit


{"inputData": "안녕하세요"}


127.0.0.1 - - [05/May/2023 22:27:03] "POST /predict HTTP/1.1" 200 -
127.0.0.1 - - [05/May/2023 22:27:39] "POST /predict HTTP/1.1" 200 -


{"inputData": "기분이 나쁘네요"}


127.0.0.1 - - [05/May/2023 22:28:56] "POST /predict HTTP/1.1" 200 -


{"inputData": "날씨가 맑아서 기분이 좋네요"}


127.0.0.1 - - [05/May/2023 22:29:05] "POST /predict HTTP/1.1" 200 -


{"inputData": "유용한 정보 감사합니다"}


127.0.0.1 - - [05/May/2023 22:29:26] "POST /predict HTTP/1.1" 200 -


{"inputData": "시연 중인데 좋은 점수 주시면 감사하겠습니다"}


127.0.0.1 - - [05/May/2023 22:29:48] "POST /predict HTTP/1.1" 200 -


{"inputData": "안녕하세요"}


127.0.0.1 - - [05/May/2023 22:30:01] "POST /predict HTTP/1.1" 200 -


{"inputData": "힘들게 서버랑 모델 구현했습니다"}


127.0.0.1 - - [05/May/2023 22:30:11] "POST /predict HTTP/1.1" 200 -


{"inputData": "좋게 봐주세요~"}


127.0.0.1 - - [05/May/2023 22:30:28] "POST /predict HTTP/1.1" 200 -


{"inputData": "그래도 중간평가 때 시연할 수 있어서 기쁘네요!"}


127.0.0.1 - - [05/May/2023 22:30:43] "POST /predict HTTP/1.1" 200 -


{"inputData": "오늘 하루 잘 보내세요"}
