# 0. 들어가기 앞서

* 고객 질문: 1, 상담원 질문: 2, 고객 및 상담원 대답: 0
* 'AS':0, '주문':1, '배송':2, '업무처리':3, '교환':4, '반품':5, '결제':6
* 참고: https://velog.io/@seolini43/KOBERT%EB%A1%9C-%EB%8B%A4%EC%A4%91-%EB%B6%84%EB%A5%98-%EB%AA%A8%EB%8D%B8-%EB%A7%8C%EB%93%A4%EA%B8%B0-%ED%8C%8C%EC%9D%B4%EC%8D%ACColab

* 한국어언어모델 다양하게 사용해보기 : https://littlefoxdiary.tistory.com/81

# 1. 라이브러리 로드

In [1]:
# !pip install gluonnlp
# !pip install mxnet
# !pip install git+https://git@github.com/SKTBrain/KoBERT.git@master

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split

import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import gluonnlp as nlp
import numpy as np
from tqdm import tqdm, tqdm_notebook

from kobert.utils import get_tokenizer
from kobert.pytorch_kobert import get_pytorch_kobert_model

from transformers import AdamW
from transformers.optimization import get_cosine_schedule_with_warmup

Using TensorFlow backend.


In [3]:
##GPU 사용 시
device = torch.device("cuda:0")

# 2. 모델, 사전, 데이터셋 불러오기

In [4]:
bertmodel, vocab = get_pytorch_kobert_model()

using cached model. /home/adminuser/notebooks/modeling/question/.cache/kobert_v1.zip
using cached model. /home/adminuser/notebooks/modeling/question/.cache/kobert_news_wiki_ko_cased-1087f8699e.spiece


In [5]:
import os
os.listdir("../raw_data")

['.ipynb_checkpoints',
 '5movies.csv',
 '통합_민원(콜센터)_질의응답_K쇼핑.csv',
 'crawling',
 '한국어_단발성_대화_데이터셋.csv',
 'naver_shopping_all_220427.csv',
 '한국어_단발성_대화_데이터셋.xlsx',
 'test.csv',
 '크롤링데이터 합치기.ipynb',
 'bad_words.txt',
 '일간베스트_댓글.txt',
 'youtube_220504.csv',
 'test.csv.lock']

In [6]:
df= pd.read_csv("../raw_data/통합_민원(콜센터)_질의응답_K쇼핑.csv", index_col=0)

**라벨링 처리된 csv였기에 라벨링 과정 skip**

In [7]:
# 질분 다중분류
df1= df[["대화내용","분류"]]
df1.columns=["comment","QA"]

In [8]:
df1.head()

Unnamed: 0,comment,QA
0,저는 ㅇㅇㅇ입니다.,0
1,네. 아쿠아 청소기를 샀었는데요.,1
2,아. 그러십니까?,2
3,네. 얼마 전에도 전화 한 번 드렸던 적이 있어요.,1
4,아. 네.,0


In [9]:
# 질문 카테고리 다중분류
df2= df[(df.분류==1)][["대화내용","카테고리"]]
df2.columns=["comment","category"]

In [10]:
df2.head()

Unnamed: 0,comment,category
1,네. 아쿠아 청소기를 샀었는데요.,0
3,네. 얼마 전에도 전화 한 번 드렸던 적이 있어요.,0
5,그런데 지금 너무 화가 나네요.,0
7,아쿠아 청소기에서 걸레 꽂는 부분이요.,0
9,청소가 제대로 안되는 거 같아서 청소기 아래를 다 분해해 봤거든요.,0


# 3. 질문분류(df1)부터 시작

* 고객 질문: 1, 상담원 질문: 2, 고객 및 상담원 대답: 0

In [11]:
# 2중 리스트로 변환됨

data_list = []
for q, label in zip(df1["comment"],df1["QA"])  :
    data = []
    data.append(q)
    data.append(str(label))

    data_list.append(data)

## 3-1. Train / Test set 분리

* 라벨링은 이미 진행했으므로, 바로 train/ test 분리 진행

In [12]:
dataset_train, dataset_test = train_test_split(data_list, test_size=0.25, random_state=0)

In [13]:
print(len(dataset_train))
print(len(dataset_test))

753924
251309


## 3-2. KoBERT 입력 데이터로 만들기

* 데이터를 train data와 test data로 나누었다면 각 데이터가 KoBERT 모델의 입력으로 들어갈 수 있는 형태가 되도록 토큰화, 정수 인코딩, 패딩 등을 해주어야 한다

In [14]:
class BERTDataset(Dataset):
    def __init__(self, dataset, sent_idx, label_idx, bert_tokenizer, max_len,
                 pad, pair):
        transform = nlp.data.BERTSentenceTransform(
            bert_tokenizer, max_seq_length=max_len, pad=pad, pair=pair)

        self.sentences = [transform([i[sent_idx]]) for i in dataset]
        self.labels = [np.int32(i[label_idx]) for i in dataset]

    def __getitem__(self, i):
        return (self.sentences[i] + (self.labels[i], ))

    def __len__(self):
        return (len(self.labels))

In [15]:
# Setting parameters

max_len = 64 # 해당 길이를 초과하는 단어에 대해선 bert가 학습하지 않음
batch_size = 64
warmup_ratio = 0.1
num_epochs = 5
max_grad_norm = 1
log_interval = 200
learning_rate = 5e-5

In [16]:
# 토큰화
tokenizer= get_tokenizer()
tok = nlp.data.BERTSPTokenizer(tokenizer, vocab, lower=False)

data_train = BERTDataset(dataset_train, 0, 1, tok, max_len, True, False)
data_test = BERTDataset(dataset_test, 0, 1, tok, max_len, True, False)

using cached model. /home/adminuser/notebooks/modeling/question/.cache/kobert_news_wiki_ko_cased-1087f8699e.spiece


In [17]:
# 첫 번째는 패딩된 시퀀스
# 두 번째는 길이와 타입에 대한 내용
# 세 번재는 어텐션 마스크 시퀀스

data_train[0]

(array([   2, 2847, 4103, 5130,  793, 5925,  517,   54, 2926, 6141, 6050,
        2822, 5330, 7287,  517, 7707, 7494,  517, 7710, 7753, 6664,  517,
        6539, 5931, 3647, 6314, 2650, 6749, 6964, 4227, 1767, 3861,  994,
        5778, 2207, 7748, 4089, 6116, 4924, 6733,  889, 6079, 5130, 5906,
         517,   54,    3,    1,    1,    1,    1,    1,    1,    1,    1,
           1,    1,    1,    1,    1,    1,    1,    1,    1], dtype=int32),
 array(47, dtype=int32),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       dtype=int32),
 2)

In [None]:
from transformers import AutoModel, AutoTokenizer


In [18]:
# pytorch용 DataLoader 사용(torch 형식의 dataset을 만들어주기)
train_dataloader = torch.utils.data.DataLoader(data_train, batch_size=batch_size, num_workers=5)
test_dataloader = torch.utils.data.DataLoader(data_test, batch_size=batch_size, num_workers=5)

## 3-3. KoBERT 학습모델 만들기

* 고객 질문: 1, 상담원 질문: 2, 고객 및 상담원 대답: 0 
* 3가지의 class를 분류하기 때문에 num_classes는 3으로 입력

In [19]:
class BERTClassifier(nn.Module):
    def __init__(self,
                 bert,
                 hidden_size = 768,
                 num_classes=3,   ##클래스 수 조정##
                 dr_rate=None,
                 params=None):
        super(BERTClassifier, self).__init__()
        self.bert = bert
        self.dr_rate = dr_rate
                 
        self.classifier = nn.Linear(hidden_size , num_classes)
        if dr_rate:
            self.dropout = nn.Dropout(p=dr_rate)
    
    def gen_attention_mask(self, token_ids, valid_length):
        attention_mask = torch.zeros_like(token_ids)
        for i, v in enumerate(valid_length):
            attention_mask[i][:v] = 1
        return attention_mask.float()

    def forward(self, token_ids, valid_length, segment_ids):
        attention_mask = self.gen_attention_mask(token_ids, valid_length)
        
        _, pooler = self.bert(input_ids = token_ids, token_type_ids = segment_ids.long(), attention_mask = attention_mask.float().to(token_ids.device))
        if self.dr_rate:
            out = self.dropout(pooler)
        return self.classifier(out)

In [20]:
#BERT 모델 불러오기
model = BERTClassifier(bertmodel,  dr_rate=0.5).to(device)

#optimizer와 schedule 설정
no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]

optimizer = AdamW(optimizer_grouped_parameters, lr=learning_rate)
loss_fn = nn.CrossEntropyLoss()

t_total = len(train_dataloader) * num_epochs
warmup_step = int(t_total * warmup_ratio)

scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=warmup_step, num_training_steps=t_total)

#정확도 측정을 위한 함수 정의
def calc_accuracy(X,Y):
    max_vals, max_indices = torch.max(X, 1)
    train_acc = (max_indices == Y).sum().data.cpu().numpy()/max_indices.size()[0]
    return train_acc



## 3-4. KoBERT 모델 학습시키기

In [21]:
for e in range(num_epochs):
    train_acc = 0.0
    test_acc = 0.0
    model.train()
    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(tqdm_notebook(train_dataloader)):
        optimizer.zero_grad()
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length= valid_length
        label = label.long().to(device)
        out = model(token_ids, valid_length, segment_ids)
        loss = loss_fn(out, label)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
        optimizer.step()
        scheduler.step()  # Update learning rate schedule
        train_acc += calc_accuracy(out, label)
        if batch_id % log_interval == 0:
            print("epoch {} batch id {} loss {} train acc {}".format(e+1, batch_id+1, loss.data.cpu().numpy(), train_acc / (batch_id+1)))
    print("epoch {} train acc {}".format(e+1, train_acc / (batch_id+1)))
    
    model.eval()
    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(tqdm_notebook(test_dataloader)):
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length= valid_length
        label = label.long().to(device)
        out = model(token_ids, valid_length, segment_ids)
        test_acc += calc_accuracy(out, label)
    print("epoch {} test acc {}".format(e+1, test_acc / (batch_id+1)))

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(tqdm_notebook(train_dataloader)):


  0%|          | 0/11781 [00:00<?, ?it/s]

epoch 1 batch id 1 loss 1.1340909004211426 train acc 0.359375
epoch 1 batch id 201 loss 0.8274189829826355 train acc 0.486318407960199
epoch 1 batch id 401 loss 0.41078007221221924 train acc 0.6704332917705735
epoch 1 batch id 601 loss 0.15138986706733704 train acc 0.7529378119800333
epoch 1 batch id 801 loss 0.2737046778202057 train acc 0.7971676029962547
epoch 1 batch id 1001 loss 0.292715847492218 train acc 0.8248001998001998
epoch 1 batch id 1201 loss 0.20305629074573517 train acc 0.8426571606994172
epoch 1 batch id 1401 loss 0.14970576763153076 train acc 0.8558730371163454
epoch 1 batch id 1601 loss 0.30628061294555664 train acc 0.8652795128044972
epoch 1 batch id 1801 loss 0.14616483449935913 train acc 0.8732301499167129
epoch 1 batch id 2001 loss 0.37702062726020813 train acc 0.8797476261869065
epoch 1 batch id 2201 loss 0.06964673101902008 train acc 0.8853362108132667
epoch 1 batch id 2401 loss 0.1443168818950653 train acc 0.8896683673469388
epoch 1 batch id 2601 loss 0.1600094

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



epoch 1 batch id 11601 loss 0.17070648074150085 train acc 0.9315751336091717
epoch 1 train acc 0.9318592967490026


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(tqdm_notebook(test_dataloader)):


  0%|          | 0/3927 [00:00<?, ?it/s]

epoch 1 test acc 0.9473210748946043


  0%|          | 0/11781 [00:00<?, ?it/s]

epoch 2 batch id 1 loss 0.09879152476787567 train acc 0.96875
epoch 2 batch id 601 loss 0.05269382521510124 train acc 0.9432716306156406
epoch 2 batch id 801 loss 0.1758647859096527 train acc 0.9437421972534332
epoch 2 batch id 1001 loss 0.21402721107006073 train acc 0.9447895854145855
epoch 2 batch id 1201 loss 0.14432626962661743 train acc 0.944291215653622
epoch 2 batch id 1401 loss 0.08141428232192993 train acc 0.9447269807280514
epoch 2 batch id 1601 loss 0.17691278457641602 train acc 0.9444390224859462
epoch 2 batch id 1801 loss 0.14520104229450226 train acc 0.944596751804553


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



epoch 2 batch id 5401 loss 0.11089872568845749 train acc 0.9462773097574523
epoch 2 batch id 5601 loss 0.18080779910087585 train acc 0.9462038028923406
epoch 2 batch id 5801 loss 0.21295979619026184 train acc 0.9461434450956732
epoch 2 batch id 6001 loss 0.06724787503480911 train acc 0.9461574112647892
epoch 2 batch id 6201 loss 0.14642691612243652 train acc 0.9462914247701983
epoch 2 batch id 6401 loss 0.1124415397644043 train acc 0.9462925714732073
epoch 2 batch id 6601 loss 0.059106580913066864 train acc 0.9464072678382063


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



epoch 2 batch id 8201 loss 0.10173861682415009 train acc 0.9469329197658822
epoch 2 batch id 8401 loss 0.06043674796819687 train acc 0.9469724586358766
epoch 2 batch id 8601 loss 0.16371043026447296 train acc 0.9470065254040227
epoch 2 batch id 8801 loss 0.201140895485878 train acc 0.9471011816838996
epoch 2 batch id 9001 loss 0.09816337376832962 train acc 0.9471621208754583
epoch 2 batch id 9201 loss 0.15074287354946136 train acc 0.9471677670905336
epoch 2 batch id 9401 loss 0.1249326691031456 train acc 0.9472679103286884
epoch 2 batch id 9601 loss 0.11082640290260315 train acc 0.9473606264972398
epoch 2 batch id 9801 loss 0.15082241594791412 train acc 0.9474846316702378
epoch 2 batch id 10001 loss 0.08238112181425095 train acc 0.9474911883811619
epoch 2 batch id 10201 loss 0.09957218170166016 train acc 0.9476108347220861
epoch 2 batch id 10401 loss 0.08832509070634842 train acc 0.9476778074223632
epoch 2 batch id 10601 loss 0.06968457251787186 train acc 0.9476803485520234
epoch 2 bat

  0%|          | 0/3927 [00:00<?, ?it/s]

epoch 2 test acc 0.9483754739269445


  0%|          | 0/11781 [00:00<?, ?it/s]

epoch 3 batch id 1 loss 0.0965975821018219 train acc 0.96875
epoch 3 batch id 201 loss 0.10601777583360672 train acc 0.949160447761194
epoch 3 batch id 401 loss 0.08694331347942352 train acc 0.9486440149625935
epoch 3 batch id 601 loss 0.0479179322719574 train acc 0.9485232945091514
epoch 3 batch id 801 loss 0.17610467970371246 train acc 0.9492821473158551
epoch 3 batch id 1001 loss 0.18764249980449677 train acc 0.9499562937062938
epoch 3 batch id 1201 loss 0.1428525149822235 train acc 0.9495862822647794
epoch 3 batch id 1401 loss 0.08443325757980347 train acc 0.9500356887937188
epoch 3 batch id 1601 loss 0.1548403650522232 train acc 0.9499921923797626
epoch 3 batch id 1801 loss 0.15062370896339417 train acc 0.9502967101610217
epoch 3 batch id 2001 loss 0.22977964580059052 train acc 0.9502123938030984
epoch 3 batch id 2201 loss 0.055925920605659485 train acc 0.9504202635165834
epoch 3 batch id 2401 loss 0.15633553266525269 train acc 0.9505674718867139
epoch 3 batch id 2601 loss 0.13540

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



epoch 3 batch id 7201 loss 0.17792055010795593 train acc 0.9521963095403416
epoch 3 batch id 7401 loss 0.126146137714386 train acc 0.9523438555600594
epoch 3 batch id 7601 loss 0.10990075767040253 train acc 0.9524055222996974
epoch 3 batch id 7801 loss 0.16812662780284882 train acc 0.9524439975644148
epoch 3 batch id 8001 loss 0.15038210153579712 train acc 0.9525039838770154
epoch 3 batch id 8201 loss 0.0708112046122551 train acc 0.9526067705157908
epoch 3 batch id 8401 loss 0.03416600450873375 train acc 0.9526842042613974
epoch 3 batch id 8601 loss 0.16428349912166595 train acc 0.9527235205208696
epoch 3 batch id 8801 loss 0.14700011909008026 train acc 0.9527947818429724
epoch 3 batch id 9001 loss 0.15346020460128784 train acc 0.9528576685923786
epoch 3 batch id 9201 loss 0.13523462414741516 train acc 0.9528940468427345
epoch 3 batch id 9401 loss 0.11257615685462952 train acc 0.9529704286778002
epoch 3 batch id 9601 loss 0.11329488456249237 train acc 0.9530712946568066
epoch 3 batch i

  0%|          | 0/3927 [00:00<?, ?it/s]

epoch 3 test acc 0.9490677963104434


  0%|          | 0/11781 [00:00<?, ?it/s]

epoch 4 batch id 1 loss 0.07779673486948013 train acc 0.96875
epoch 4 batch id 201 loss 0.07237216085195541 train acc 0.9565453980099502
epoch 4 batch id 401 loss 0.09005780518054962 train acc 0.9552291147132169
epoch 4 batch id 601 loss 0.042296119034290314 train acc 0.955464850249584
epoch 4 batch id 801 loss 0.14479804039001465 train acc 0.9564021535580525
epoch 4 batch id 1001 loss 0.17304658889770508 train acc 0.9567932067932068
epoch 4 batch id 1201 loss 0.12064414471387863 train acc 0.9565336178184846
epoch 4 batch id 1401 loss 0.0747576504945755 train acc 0.9570061563169164
epoch 4 batch id 1601 loss 0.1328076869249344 train acc 0.956521314803248
epoch 4 batch id 1801 loss 0.13245196640491486 train acc 0.9566039700166574
epoch 4 batch id 2001 loss 0.2002410888671875 train acc 0.9567247626186907
epoch 4 batch id 2201 loss 0.06313017755746841 train acc 0.9569016924125398
epoch 4 batch id 2401 loss 0.09970775246620178 train acc 0.9568734381507705
epoch 4 batch id 2601 loss 0.09140

  0%|          | 0/3927 [00:00<?, ?it/s]

epoch 4 test acc 0.9498357171151288


  0%|          | 0/11781 [00:00<?, ?it/s]

epoch 5 batch id 1 loss 0.07168957591056824 train acc 0.96875
epoch 5 batch id 201 loss 0.039368774741888046 train acc 0.9624533582089553
epoch 5 batch id 401 loss 0.05005868151783943 train acc 0.9613466334164589
epoch 5 batch id 601 loss 0.03857196867465973 train acc 0.9610024958402662
epoch 5 batch id 801 loss 0.11838959902524948 train acc 0.9626638576779026
epoch 5 batch id 1001 loss 0.2145552635192871 train acc 0.9631306193806194
epoch 5 batch id 1201 loss 0.053156740963459015 train acc 0.9632467735220649
epoch 5 batch id 1401 loss 0.06967693567276001 train acc 0.9636866523911491
epoch 5 batch id 1601 loss 0.12547457218170166 train acc 0.9633041848844472
epoch 5 batch id 1801 loss 0.12356951087713242 train acc 0.9636660188784009
epoch 5 batch id 2001 loss 0.190815269947052 train acc 0.9637837331334332
epoch 5 batch id 2201 loss 0.05238257348537445 train acc 0.963865856428896
epoch 5 batch id 2401 loss 0.09244471788406372 train acc 0.9636870054144107
epoch 5 batch id 2601 loss 0.067

  0%|          | 0/3927 [00:00<?, ?it/s]

epoch 5 test acc 0.9497243089154853


## 3-5.새로운 문장 테스트

In [22]:
#토큰화
tokenizer = get_tokenizer()
tok = nlp.data.BERTSPTokenizer(tokenizer, vocab, lower=False)

def predict(predict_sentence):

    data = [predict_sentence, '0']
    dataset_another = [data]

    another_test = BERTDataset(dataset_another, 0, 1, tok, max_len, True, False)
    test_dataloader = torch.utils.data.DataLoader(another_test, batch_size=batch_size, num_workers=5)
    
    model.eval()

    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(test_dataloader):
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)

        valid_length= valid_length
        label = label.long().to(device)

        out = model(token_ids, valid_length, segment_ids)


        test_eval=[]
        for i in out:
            logits=i
            logits = logits.detach().cpu().numpy()
            # 고객 질문: 1, 상담원 질문: 2, 고객 및 상담원 대답: 0

            if np.argmax(logits) == 0:
                test_eval.append("대답")
            elif np.argmax(logits) == 1:
                test_eval.append("고객 질문")
            elif np.argmax(logits) == 2:
                test_eval.append("상담원 질문")

        print(">> 입력하신 내용은 " + test_eval[0] + " 라고 판단됩니다.")

using cached model. /home/adminuser/notebooks/modeling/question/.cache/kobert_news_wiki_ko_cased-1087f8699e.spiece


In [23]:
end = 1
while end == 1 :
    sentence = input("하고싶은 말을 입력해주세요 : ")
    if sentence == 0 :
        break
    predict(sentence)
    print("\n")

하고싶은 말을 입력해주세요 : 66사이즈 있나요?
>> 입력하신 내용은 고객 질문 라고 판단됩니다.


하고싶은 말을 입력해주세요 : 핑크색입니다
>> 입력하신 내용은 대답 라고 판단됩니다.


하고싶은 말을 입력해주세요 : 물어보고싶어요
>> 입력하신 내용은 고객 질문 라고 판단됩니다.


하고싶은 말을 입력해주세요 : 고객님 지금 없습니다
>> 입력하신 내용은 대답 라고 판단됩니다.


하고싶은 말을 입력해주세요 : 다시한번 알려주세요
>> 입력하신 내용은 고객 질문 라고 판단됩니다.


하고싶은 말을 입력해주세요 : 리모컨을 놓을 수가 없네요
>> 입력하신 내용은 고객 질문 라고 판단됩니다.




KeyboardInterrupt: Interrupted by user

In [25]:
import torch
import torchvision.models as models


#모델의 형태를 포함하여 저장하
torch.save(model, 'KoBERT_v.0.0.1_sona.pth')

#불러오기
# model = torch.load('model.pth')