In [1]:
from IPython.display import clear_output
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
from typing import List

## 모델 진행상황 결과 확인하는 라이브러리
from tqdm.notebook import tqdm

## Huggingface 라이브러리 받아오는 코드
!pip install transformers SentencePiece
!wget https://raw.githubusercontent.com/monologg/KoBERT-Transformers/master/kobert_transformers/tokenization_kobert.py
from transformers import DistilBertModel
from tokenization_kobert import KoBertTokenizer

## 기타 사용할 머신러닝 알고리즘
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

clear_output()

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
train_path = '/content/drive/MyDrive/의현/018_감성대화/Training_221115_add/원천데이터/감성대화말뭉치(최종데이터)_Training.zip'
val_path = '/content/drive/MyDrive/의현/018_감성대화/Validation_221115_add/원천데이터/감성대화말뭉치(최종데이터)_Validation.zip'
!cp -r "$train_path" ./
!cp -r "$val_path" ./
drive.flush_and_unmount()

In [4]:
!unzip './감성대화말뭉치(최종데이터)_Training.zip'
!unzip './감성대화말뭉치(최종데이터)_Validation.zip'
clear_output()

In [5]:
import os
import shutil
from sys import platform
from glob import glob

train_data_path = './감성대화말뭉치(최종데이터)_Training.xlsx'
val_data_path = './감성대화말뭉치(최종데이터)_Validation.xlsx'

if platform == "linux" or platform == "linux2":
    pass
elif platform == "darwin":
    train_data_path = os.path.join("dset", train_data_path)
    val_data_path = os.path.join("dset", val_data_path)

In [6]:
import numpy as np
import pandas as pd

train_dataset = pd.read_excel(train_data_path, index_col = 'Unnamed: 0')
val_dataset = pd.read_excel(val_data_path, index_col = 'Unnamed: 0')

In [7]:
train_dataset[['사람문장1', '사람문장2', '사람문장3']] = train_dataset[['사람문장1', '사람문장2', '사람문장3']].fillna('').astype(str)
train_dataset['sentence'] = train_dataset[['사람문장1', '사람문장2', '사람문장3']].apply(lambda x: ' '.join(x), axis=1)
train_dataset = train_dataset.drop(['사람문장1', '사람문장2', '사람문장3', '시스템문장1', '시스템문장2', '시스템문장3'], axis=1)
train_dataset['sentiment'] = train_dataset[['감정_대분류', '감정_소분류']].apply(lambda x: ' '.join(x), axis=1)
train_dataset = train_dataset.drop(['감정_대분류', '감정_소분류'], axis=1)

In [8]:
val_dataset[['사람문장1', '사람문장2', '사람문장3']] = val_dataset[['사람문장1', '사람문장2', '사람문장3']].fillna('').astype(str)
val_dataset['sentence'] = val_dataset[['사람문장1', '사람문장2', '사람문장3']].apply(lambda x: ' '.join(x), axis=1)
val_dataset = val_dataset.drop(['사람문장1', '사람문장2', '사람문장3', '시스템문장1', '시스템문장2', '시스템문장3'], axis=1)
val_dataset['sentiment'] = val_dataset[['감정_대분류', '감정_소분류']].apply(lambda x: ' '.join(x), axis=1)
val_dataset = val_dataset.drop(['감정_대분류', '감정_소분류'], axis=1)

In [9]:
X_train = train_dataset.drop('sentiment', axis = 1)
y_train = train_dataset['sentiment']
X_val = val_dataset.drop('sentiment', axis = 1)
y_val = val_dataset['sentiment']

In [10]:
le = preprocessing.LabelEncoder()

train_labels = le.fit_transform(y_train.values)

In [11]:
num_unique_labels = len(le.classes_)
print("독립적인 레이블의 개수:", num_unique_labels)

독립적인 레이블의 개수: 60


In [12]:
MAX_LEN = 512
TRAIN_BATCH_SIZE = 32
VALID_BATCH_SIZE = 32
EPOCHS = 50
LEARNING_RATE = 1e-04
tokenizer = KoBertTokenizer.from_pretrained('monologg/kobert')
clear_output()

In [67]:
class Dataset_Generation:
    def __init__(self, df, labels, tokenizer, max_len):
        self.len = len(df)
        self.data = df
        self.tokenizer = tokenizer
        self.labels = labels
        self.max_len = max_len

    def __getitem__(self, idx):
        title = ", ".join(str(_) for _ in self.data.loc[idx, :].values)
        inputs = self.tokenizer.encode_plus(
            title,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            padding = 'max_length',
            return_token_type_ids=True,
            truncation=True
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']

        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'targets': torch.tensor(self.labels[idx], dtype=torch.long)
        }

    def __len__(self):
        return self.len

In [68]:
train_dataset = X_train
test_dataset = X_val
train_dataset = train_dataset.reset_index(drop=True)
test_dataset = test_dataset.reset_index(drop=True)

print("TRAIN Dataset: {}".format(train_dataset.shape))
print("TEST Dataset: {}".format(test_dataset.shape))

test_labels = le.transform(y_val.values)
training_set = Dataset_Generation(train_dataset, train_labels, tokenizer, MAX_LEN)
testing_set = Dataset_Generation(test_dataset, test_labels, tokenizer, MAX_LEN)

TRAIN Dataset: (51630, 5)
TEST Dataset: (6641, 5)


In [15]:
first_item = training_set[0]

for key, value in first_item.items():
    print(f"{key}: {value}")

ids: tensor([   2, 4485,   46, 3312,   46, 4360, 6079,   46, 7476, 6873,   46, 7342,
        7178,   46, 5000, 6881, 7089,   46, 3803, 7086, 3466, 5002, 5002, 1363,
        7096, 3282, 5591,  258, 5112, 5330, 1406, 5782,   54, 1189, 1435, 4999,
        7794,  921, 1370, 6797,   54, 1409, 5931, 7835, 2426, 4217,  517, 6751,
        7328, 3149,   54,    3,    1,    1,    1,    1,    1,    1,    1,    1,
           1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
           1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
           1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
           1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
           1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
           1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
           1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
           1,    1,    1,    1,    

In [16]:
", ".join(str(_) for _ in train_dataset.loc[0, :].values)

'청년, 여성, 진로,취업,직장, 해당없음, 일은 왜 해도 해도 끝이 없을까? 화가 난다. 그냥 내가 해결하는 게 나아. 남들한테 부담 주고 싶지도 않고. '

In [17]:
decoded_text = tokenizer.decode(first_item['ids'])
print(decoded_text)

[CLS] 청년, 여성, 진로,취업,직장, 해당없음, 일은 왜 해도 해도 끝이 없을까? 화가 난다. 그냥 내가 해결하는 게 나아. 남들한테 부담 주고 싶지도 않고.[SEP][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD

In [69]:
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 4
                }

test_params = {'batch_size': VALID_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 4
                }

training_loader = DataLoader(training_set, **train_params)
testing_loader = DataLoader(testing_set, **test_params)

In [19]:
class classification_with_BERTmodel(torch.nn.Module):
    def __init__(self):
        super(classification_with_BERTmodel, self).__init__()
        self.l1 = DistilBertModel.from_pretrained('monologg/distilkobert')
        self.pre_classifier = torch.nn.Linear(768, 768)
        self.dropout = torch.nn.Dropout(0.3)
        self.classifier = torch.nn.Linear(768, num_unique_labels)

    def forward(self, input_ids, attention_mask):
        output_1 = self.l1(input_ids=input_ids, attention_mask=attention_mask)
        hidden_state = output_1[0]
        pooler = hidden_state[:, 0]
        pooler = self.pre_classifier(pooler)
        pooler = torch.nn.ReLU()(pooler)
        pooler = self.dropout(pooler)
        output = self.classifier(pooler)
        return output

In [76]:
torch.cuda.empty_cache()

RuntimeError: ignored

In [20]:
model = classification_with_BERTmodel()
model.to(device)
clear_output()

In [21]:
loss_function = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params =  model.parameters(), lr=LEARNING_RATE)

In [22]:
def calcuate_accu(big_idx, targets):
    n_correct = (big_idx==targets).sum().item()
    return n_correct

In [23]:
def train(epoch):
    tr_loss = 0
    n_correct = 0
    nb_tr_steps = 0
    nb_tr_examples = 0
    model.train()
    for _,data in tqdm(enumerate(training_loader, 0), total = len(training_loader)):
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.long)

        outputs = model(ids, mask)
        loss = loss_function(outputs, targets)
        tr_loss += loss.item()
        big_val, big_idx = torch.max(outputs.data, dim=1)
        n_correct += calcuate_accu(big_idx, targets)

        nb_tr_steps += 1
        nb_tr_examples+=targets.size(0)

        if _%1000==0:
            loss_step = tr_loss/nb_tr_steps
            accu_step = (n_correct*100)/nb_tr_examples
            print(f"Training Loss per 100 steps: {loss_step}")
            print(f"Training Accuracy per 100 steps: {accu_step}")

        optimizer.zero_grad()
        loss.backward()
        # # When using GPU
        optimizer.step()

    print(f'The Total Accuracy for Epoch {epoch}: {(n_correct*100)/nb_tr_examples}')
    epoch_loss = tr_loss/nb_tr_steps
    epoch_accu = (n_correct*100)/nb_tr_examples
    print(f"Training Loss Epoch: {epoch_loss}")
    print(f"Training Accuracy Epoch: {epoch_accu}%")

    return

In [24]:
for epoch in range(EPOCHS):
    train(epoch)

  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 4.0947394371032715
Training Accuracy per 100 steps: 0.0
Training Loss per 100 steps: 3.2634146882818413
Training Accuracy per 100 steps: 20.526348651348652
The Total Accuracy for Epoch 0: 26.064303699399574
Training Loss Epoch: 3.027980426873416
Training Accuracy Epoch: 26.064303699399574%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 1.9820060729980469
Training Accuracy per 100 steps: 43.75
Training Loss per 100 steps: 2.417321338639274
Training Accuracy per 100 steps: 39.44805194805195
The Total Accuracy for Epoch 1: 39.754018981212475
Training Loss Epoch: 2.402009084026757
Training Accuracy Epoch: 39.754018981212475%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 1.7928858995437622
Training Accuracy per 100 steps: 50.0
Training Loss per 100 steps: 2.1866824020753493
Training Accuracy per 100 steps: 43.821803196803195
The Total Accuracy for Epoch 2: 44.07127638969591
Training Loss Epoch: 2.1798816177038454
Training Accuracy Epoch: 44.07127638969591%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 2.4726812839508057
Training Accuracy per 100 steps: 34.375
Training Loss per 100 steps: 1.977641201519466
Training Accuracy per 100 steps: 48.26111388611388
The Total Accuracy for Epoch 3: 48.18903738136742
Training Loss Epoch: 1.9857563411140677
Training Accuracy Epoch: 48.18903738136742%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 1.9892939329147339
Training Accuracy per 100 steps: 53.125
Training Loss per 100 steps: 1.774091621498009
Training Accuracy per 100 steps: 52.89085914085914
The Total Accuracy for Epoch 4: 52.58376912647685
Training Loss Epoch: 1.7858099925252142
Training Accuracy Epoch: 52.58376912647685%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 1.554400086402893
Training Accuracy per 100 steps: 62.5
Training Loss per 100 steps: 1.562447241017154
Training Accuracy per 100 steps: 57.75474525474525
The Total Accuracy for Epoch 5: 57.23222932403641
Training Loss Epoch: 1.5841757532167613
Training Accuracy Epoch: 57.23222932403641%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 1.3617078065872192
Training Accuracy per 100 steps: 59.375
Training Loss per 100 steps: 1.3729461800682914
Training Accuracy per 100 steps: 62.375124875124875
The Total Accuracy for Epoch 6: 61.731551423590936
Training Loss Epoch: 1.391179388820608
Training Accuracy Epoch: 61.731551423590936%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 1.0308114290237427
Training Accuracy per 100 steps: 75.0
Training Loss per 100 steps: 1.1843237254348074
Training Accuracy per 100 steps: 66.57405094905094
The Total Accuracy for Epoch 7: 66.14371489444122
Training Loss Epoch: 1.2078988903480243
Training Accuracy Epoch: 66.14371489444122%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.8263868689537048
Training Accuracy per 100 steps: 78.125
Training Loss per 100 steps: 1.023320772133388
Training Accuracy per 100 steps: 70.8198051948052
The Total Accuracy for Epoch 8: 70.19949641681193
Training Loss Epoch: 1.0451163891818296
Training Accuracy Epoch: 70.19949641681193%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.6995623707771301
Training Accuracy per 100 steps: 65.625
Training Loss per 100 steps: 0.8729216028135139
Training Accuracy per 100 steps: 74.43806193806194
The Total Accuracy for Epoch 9: 73.6742204144877
Training Loss Epoch: 0.9015391913359198
Training Accuracy Epoch: 73.6742204144877%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.6184077262878418
Training Accuracy per 100 steps: 78.125
Training Loss per 100 steps: 0.7504329777115232
Training Accuracy per 100 steps: 77.96266233766234
The Total Accuracy for Epoch 10: 77.25934534185551
Training Loss Epoch: 0.7739710646601857
Training Accuracy Epoch: 77.25934534185551%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.8340936303138733
Training Accuracy per 100 steps: 81.25
Training Loss per 100 steps: 0.6376467214657234
Training Accuracy per 100 steps: 81.08766233766234
The Total Accuracy for Epoch 11: 80.26147588611272
Training Loss Epoch: 0.6620668769390535
Training Accuracy Epoch: 80.26147588611272%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.4104311466217041
Training Accuracy per 100 steps: 90.625
Training Loss per 100 steps: 0.5465470878453879
Training Accuracy per 100 steps: 83.22302697302698
The Total Accuracy for Epoch 12: 82.6786751888437
Training Loss Epoch: 0.5686423229111319
Training Accuracy Epoch: 82.6786751888437%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.39591655135154724
Training Accuracy per 100 steps: 90.625
Training Loss per 100 steps: 0.4698647833564184
Training Accuracy per 100 steps: 85.49887612387613
The Total Accuracy for Epoch 13: 84.75305055200465
Training Loss Epoch: 0.494598325009572
Training Accuracy Epoch: 84.75305055200465%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.4548971354961395
Training Accuracy per 100 steps: 81.25
Training Loss per 100 steps: 0.41123402432038947
Training Accuracy per 100 steps: 87.22215284715284
The Total Accuracy for Epoch 14: 86.65891923300407
Training Loss Epoch: 0.4292459259319778
Training Accuracy Epoch: 86.65891923300407%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.42102134227752686
Training Accuracy per 100 steps: 81.25
Training Loss per 100 steps: 0.3578476996554599
Training Accuracy per 100 steps: 88.76748251748252
The Total Accuracy for Epoch 15: 88.11737361998838
Training Loss Epoch: 0.3773686690198297
Training Accuracy Epoch: 88.11737361998838%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.1273621916770935
Training Accuracy per 100 steps: 96.875
Training Loss per 100 steps: 0.3161033692338935
Training Accuracy per 100 steps: 89.94443056943057
The Total Accuracy for Epoch 16: 89.22525663374007
Training Loss Epoch: 0.3392095177975637
Training Accuracy Epoch: 89.22525663374007%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.3240804076194763
Training Accuracy per 100 steps: 90.625
Training Loss per 100 steps: 0.2835853568154511
Training Accuracy per 100 steps: 91.15259740259741
The Total Accuracy for Epoch 17: 90.5597520821228
Training Loss Epoch: 0.2994597607206835
Training Accuracy Epoch: 90.5597520821228%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.06659956276416779
Training Accuracy per 100 steps: 96.875
Training Loss per 100 steps: 0.2567790737273274
Training Accuracy per 100 steps: 91.72077922077922
The Total Accuracy for Epoch 18: 91.24539996126283
Training Loss Epoch: 0.27459818040294276
Training Accuracy Epoch: 91.24539996126283%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.38299453258514404
Training Accuracy per 100 steps: 84.375
Training Loss per 100 steps: 0.22536216346969407
Training Accuracy per 100 steps: 92.78846153846153
The Total Accuracy for Epoch 19: 92.0414487700949
Training Loss Epoch: 0.2494379285557941
Training Accuracy Epoch: 92.0414487700949%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.3073410391807556
Training Accuracy per 100 steps: 90.625
Training Loss per 100 steps: 0.23171638587413432
Training Accuracy per 100 steps: 92.54183316683317
The Total Accuracy for Epoch 20: 92.22545031958164
Training Loss Epoch: 0.24229417820755378
Training Accuracy Epoch: 92.22545031958164%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.06998621672391891
Training Accuracy per 100 steps: 96.875
Training Loss per 100 steps: 0.21247877578412022
Training Accuracy per 100 steps: 93.25674325674326
The Total Accuracy for Epoch 21: 92.78713926012009
Training Loss Epoch: 0.22579918242875566
Training Accuracy Epoch: 92.78713926012009%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.22162453830242157
Training Accuracy per 100 steps: 93.75
Training Loss per 100 steps: 0.19134880847611152
Training Accuracy per 100 steps: 93.93106893106894
The Total Accuracy for Epoch 22: 93.34689134224288
Training Loss Epoch: 0.20733800149364617
Training Accuracy Epoch: 93.34689134224288%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.26991161704063416
Training Accuracy per 100 steps: 90.625
Training Loss per 100 steps: 0.18293022749141663
Training Accuracy per 100 steps: 94.24013486513486
The Total Accuracy for Epoch 23: 93.80980050358319
Training Loss Epoch: 0.19540513015601466
Training Accuracy Epoch: 93.80980050358319%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.4229254722595215
Training Accuracy per 100 steps: 87.5
Training Loss per 100 steps: 0.16722252952366637
Training Accuracy per 100 steps: 94.73026973026973
The Total Accuracy for Epoch 24: 94.1952353282975
Training Loss Epoch: 0.18325414564870204
Training Accuracy Epoch: 94.1952353282975%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.4010557532310486
Training Accuracy per 100 steps: 96.875
Training Loss per 100 steps: 0.16468936282225868
Training Accuracy per 100 steps: 94.75836663336663
The Total Accuracy for Epoch 25: 94.48382723222933
Training Loss Epoch: 0.17639670429562013
Training Accuracy Epoch: 94.48382723222933%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.08193992078304291
Training Accuracy per 100 steps: 96.875
Training Loss per 100 steps: 0.1505168658261607
Training Accuracy per 100 steps: 95.06743256743256
The Total Accuracy for Epoch 26: 94.6503970559752
Training Loss Epoch: 0.16587015260772633
Training Accuracy Epoch: 94.6503970559752%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.10723085701465607
Training Accuracy per 100 steps: 96.875
Training Loss per 100 steps: 0.15937528614332522
Training Accuracy per 100 steps: 94.95816683316683
The Total Accuracy for Epoch 27: 94.789850861902
Training Loss Epoch: 0.1637571726125334
Training Accuracy Epoch: 94.789850861902%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.021681800484657288
Training Accuracy per 100 steps: 100.0
Training Loss per 100 steps: 0.1359477599949709
Training Accuracy per 100 steps: 95.65122377622377
The Total Accuracy for Epoch 28: 95.32442378462135
Training Loss Epoch: 0.14687646029354332
Training Accuracy Epoch: 95.32442378462135%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.14247561991214752
Training Accuracy per 100 steps: 96.875
Training Loss per 100 steps: 0.14259696596503987
Training Accuracy per 100 steps: 95.5201048951049
The Total Accuracy for Epoch 29: 95.19078055394151
Training Loss Epoch: 0.15255001155212938
Training Accuracy Epoch: 95.19078055394151%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.039577558636665344
Training Accuracy per 100 steps: 96.875
Training Loss per 100 steps: 0.13344140941827007
Training Accuracy per 100 steps: 95.76673326673327
The Total Accuracy for Epoch 30: 95.50261475886113
Training Loss Epoch: 0.14294077580217088
Training Accuracy Epoch: 95.50261475886113%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.0842544361948967
Training Accuracy per 100 steps: 96.875
Training Loss per 100 steps: 0.12786440098924295
Training Accuracy per 100 steps: 95.95092407592408
The Total Accuracy for Epoch 31: 95.60139453805927
Training Loss Epoch: 0.13695401052757983
Training Accuracy Epoch: 95.60139453805927%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.11806952208280563
Training Accuracy per 100 steps: 93.75
Training Loss per 100 steps: 0.12361284887155781
Training Accuracy per 100 steps: 95.96965534465535
The Total Accuracy for Epoch 32: 95.7505326360643
Training Loss Epoch: 0.13343848088725044
Training Accuracy Epoch: 95.7505326360643%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.1094839945435524
Training Accuracy per 100 steps: 96.875
Training Loss per 100 steps: 0.11960102520049452
Training Accuracy per 100 steps: 96.29120879120879
The Total Accuracy for Epoch 33: 95.89579701723804
Training Loss Epoch: 0.130223444920631
Training Accuracy Epoch: 95.89579701723804%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.06686954200267792
Training Accuracy per 100 steps: 100.0
Training Loss per 100 steps: 0.11401367740466964
Training Accuracy per 100 steps: 96.25686813186813
The Total Accuracy for Epoch 34: 96.01781909742398
Training Loss Epoch: 0.12326528576222598
Training Accuracy Epoch: 96.01781909742398%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.04700431227684021
Training Accuracy per 100 steps: 96.875
Training Loss per 100 steps: 0.11885750900847478
Training Accuracy per 100 steps: 96.27247752247752
The Total Accuracy for Epoch 35: 96.08754600038738
Training Loss Epoch: 0.12412391629759881
Training Accuracy Epoch: 96.08754600038738%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.029336825013160706
Training Accuracy per 100 steps: 100.0
Training Loss per 100 steps: 0.11141983672944396
Training Accuracy per 100 steps: 96.50661838161838
The Total Accuracy for Epoch 36: 96.22312608948286
Training Loss Epoch: 0.11904753227203177
Training Accuracy Epoch: 96.22312608948286%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.15584416687488556
Training Accuracy per 100 steps: 93.75
Training Loss per 100 steps: 0.10920999597732003
Training Accuracy per 100 steps: 96.55032467532467
The Total Accuracy for Epoch 37: 96.3916327716444
Training Loss Epoch: 0.11461260082295956
Training Accuracy Epoch: 96.3916327716444%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.04312179610133171
Training Accuracy per 100 steps: 96.875
Training Loss per 100 steps: 0.1034175797252098
Training Accuracy per 100 steps: 96.62837162837162
The Total Accuracy for Epoch 38: 96.48266511717993
Training Loss Epoch: 0.10932819088219863
Training Accuracy Epoch: 96.48266511717993%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.016540097072720528
Training Accuracy per 100 steps: 100.0
Training Loss per 100 steps: 0.09946243665609081
Training Accuracy per 100 steps: 96.89685314685315
The Total Accuracy for Epoch 39: 96.68022467557621
Training Loss Epoch: 0.1060011640082597
Training Accuracy Epoch: 96.68022467557621%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.04047878086566925
Training Accuracy per 100 steps: 100.0
Training Loss per 100 steps: 0.10633743997080834
Training Accuracy per 100 steps: 96.69393106893106
The Total Accuracy for Epoch 40: 96.60081348053457
Training Loss Epoch: 0.10795602122432225
Training Accuracy Epoch: 96.60081348053457%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.057053208351135254
Training Accuracy per 100 steps: 96.875
Training Loss per 100 steps: 0.1022827735743611
Training Accuracy per 100 steps: 96.73139360639361
The Total Accuracy for Epoch 41: 96.66472980825101
Training Loss Epoch: 0.10463503737315936
Training Accuracy Epoch: 96.66472980825101%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.07369396835565567
Training Accuracy per 100 steps: 96.875
Training Loss per 100 steps: 0.08777879087264175
Training Accuracy per 100 steps: 97.29020979020979
The Total Accuracy for Epoch 42: 97.00949060623668
Training Loss Epoch: 0.09475239194519045
Training Accuracy Epoch: 97.00949060623668%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.04721725732088089
Training Accuracy per 100 steps: 100.0
Training Loss per 100 steps: 0.09321293593816958
Training Accuracy per 100 steps: 97.12162837162838
The Total Accuracy for Epoch 43: 96.82548905674996
Training Loss Epoch: 0.10037952305776757
Training Accuracy Epoch: 96.82548905674996%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.1105528324842453
Training Accuracy per 100 steps: 96.875
Training Loss per 100 steps: 0.09152781893057259
Training Accuracy per 100 steps: 96.96553446553446
The Total Accuracy for Epoch 44: 96.819678481503
Training Loss Epoch: 0.09773812819124561
Training Accuracy Epoch: 96.819678481503%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.058501552790403366
Training Accuracy per 100 steps: 96.875
Training Loss per 100 steps: 0.08954250676414141
Training Accuracy per 100 steps: 97.14035964035963
The Total Accuracy for Epoch 45: 97.08696494286268
Training Loss Epoch: 0.0913152639664533
Training Accuracy Epoch: 97.08696494286268%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.03435545414686203
Training Accuracy per 100 steps: 100.0
Training Loss per 100 steps: 0.08807731974274748
Training Accuracy per 100 steps: 97.23713786213786
The Total Accuracy for Epoch 46: 97.08890180127833
Training Loss Epoch: 0.09254007326639396
Training Accuracy Epoch: 97.08890180127833%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.22849223017692566
Training Accuracy per 100 steps: 96.875
Training Loss per 100 steps: 0.07878279850037152
Training Accuracy per 100 steps: 97.56493506493507
The Total Accuracy for Epoch 47: 97.10052295177222
Training Loss Epoch: 0.09289981415150503
Training Accuracy Epoch: 97.10052295177222%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.02639983780682087
Training Accuracy per 100 steps: 100.0
Training Loss per 100 steps: 0.0852597511609431
Training Accuracy per 100 steps: 97.36201298701299
The Total Accuracy for Epoch 48: 97.19349215572342
Training Loss Epoch: 0.09097666590843988
Training Accuracy Epoch: 97.19349215572342%


  0%|          | 0/1614 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.025711486116051674
Training Accuracy per 100 steps: 100.0
Training Loss per 100 steps: 0.08740148057423339
Training Accuracy per 100 steps: 97.23089410589411
The Total Accuracy for Epoch 49: 97.11214410226613
Training Loss Epoch: 0.09169479128699796
Training Accuracy Epoch: 97.11214410226613%


In [35]:
model

classification_with_BERTmodel(
  (l1): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(8002, 768, padding_idx=1)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-2): 3 x TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
            (lin

In [72]:
def valid(model, testing_loader):
    model.eval()
    n_correct = 0; n_wrong = 0; total = 0; tr_loss = 0; nb_tr_steps = 0; nb_tr_examples = 0
    with torch.no_grad():
        for _, data in tqdm(enumerate(testing_loader, 0), total = len(testing_loader)):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.long)
            outputs = model(ids, mask).squeeze()
            loss = loss_function(outputs, targets)
            tr_loss += loss.item()
            big_val, big_idx = torch.max(outputs.data, dim=1)
            n_correct += calcuate_accu(big_idx, targets)

            nb_tr_steps += 1
            nb_tr_examples+=targets.size(0)

            if _%100==0:
                loss_step = tr_loss/nb_tr_steps
                accu_step = (n_correct*100)/nb_tr_examples


    epoch_loss = tr_loss/nb_tr_steps
    epoch_accu = (n_correct*100)/nb_tr_examples
    print(f"Validation Loss Epoch: {epoch_loss}")
    print(f"Validation Accuracy Epoch: {epoch_accu}")

    return epoch_accu

In [73]:
acc = valid(model, testing_loader)
print("Validation Data Accuracy = %0.2f%%" % acc)

  0%|          | 0/208 [00:00<?, ?it/s]

Validation Loss Epoch: 3.5779860724623385
Validation Accuracy Epoch: 58.0033127541033
Validation Data Accuracy = 58.00%


In [77]:
output_model_file = 'text_BERTmodel.bin'

model_to_save = model
torch.save(model_to_save, output_model_file)

print('All files saved')

All files saved


In [102]:
age = '청년'
성별 = '남성'
상황 = '일상 대화'
txt = '무려 1003일만에 한화이글스 4연승!'

text = ", ".join([age, 성별, 상황, txt])
text = tokenizer.encode_plus(
            text,
            None,
            add_special_tokens=True,
            max_length= 512,
            padding = 'max_length',
            return_token_type_ids=True,
            truncation=True
        )

outputs = model(torch.tensor(text['input_ids']).long().to(device), torch.tensor(text['attention_mask']).long().to(device)).squeeze()
big_val, big_idx = torch.max(outputs.data, dim=-1)
print(le.inverse_transform(big_idx.cpu().numpy().reshape(-1)))

['기쁨 기쁨']


In [111]:
age = '청년'
성별 = '남성'
상황 = '슬픔'
txt = '요즘 너무 힘들어. 내가 할 수 있는 건 아무것도 없어.'

text = ", ".join([age, 성별, 상황, txt])
text = tokenizer.encode_plus(
            text,
            None,
            add_special_tokens=True,
            max_length= 512,
            padding = 'max_length',
            return_token_type_ids=True,
            truncation=True
        )

outputs = model(torch.tensor(text['input_ids']).long().to(device), torch.tensor(text['attention_mask']).long().to(device)).squeeze()
big_val, big_idx = torch.max(outputs.data, dim=-1)
print(le.inverse_transform(big_idx.cpu().numpy().reshape(-1)))

['상처 버려진']


In [105]:
def calculate_accu_sentiment(big_idx, targets):
    under_10_mask = (big_idx < 10) & (targets < 10)
    over_10_mask = (big_idx >= 10) & (targets >= 10)

    under_10_correct = (big_idx[under_10_mask] == targets[under_10_mask]).sum().item()
    over_10_correct = (big_idx[over_10_mask] // 10 == targets[over_10_mask] // 10).sum().item()

    n_correct = under_10_correct + over_10_correct

    return n_correct

In [108]:
def valid(model, testing_loader):
    model.eval()
    n_correct = 0; n_wrong = 0; total = 0; tr_loss = 0; nb_tr_steps = 0; nb_tr_examples = 0
    with torch.no_grad():
        for _, data in tqdm(enumerate(testing_loader, 0), total = len(testing_loader)):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.long)
            outputs = model(ids, mask).squeeze()
            loss = loss_function(outputs, targets)
            tr_loss += loss.item()
            big_val, big_idx = torch.max(outputs.data, dim=1)
            n_correct += calculate_accu_sentiment(big_idx, targets)

            nb_tr_steps += 1
            nb_tr_examples+=targets.size(0)

            if _%100==0:
                loss_step = tr_loss/nb_tr_steps
                accu_step = (n_correct*100)/nb_tr_examples


    epoch_loss = tr_loss/nb_tr_steps
    epoch_accu = (n_correct*100)/nb_tr_examples
    print(f"Validation Loss Epoch: {epoch_loss}")
    print(f"Validation Accuracy Epoch: {epoch_accu}")

    return epoch_accu

In [109]:
acc = valid(model, testing_loader)
print("Validation Data Accuracy = %0.2f%%" % acc)

  0%|          | 0/208 [00:00<?, ?it/s]

Validation Loss Epoch: 3.5818510622932362
Validation Accuracy Epoch: 71.32961903327812
Validation Data Accuracy = 71.33%
