In [3]:
# 런타임 애러 방지
import gc
import torch

gc.collect()
torch.cuda.empty_cache()

In [2]:
!nvidia-smi

Tue May 30 02:00:45 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 457.51       Driver Version: 457.51       CUDA Version: 11.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  GeForce GTX 166... WDDM  | 00000000:01:00.0  On |                  N/A |
|  0%   48C    P8    11W / 130W |    606MiB /  6144MiB |     14%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [4]:
import pandas as pd

train_set = pd.read_csv('data/병합데이터셋-v3.csv', index_col=0) 

# 감정을 정수 라벨로 변경
emotions = {'행복': 0, '불안': 1, '놀람': 2, '슬픔': 3, '분노': 4, '중립': 5 }
train_set['emotion'] = train_set.emotion.map(emotions)

train_set.sample(n=5)

Unnamed: 0,sentence,emotion
11628,이러나 저러나 집에서 지금 하는 일이 짱이예요.,0
115211,근데 연상녀 좋아하고있더라고요?..,1
112483,친구가 무사히 출산을 했다고 알려왔는데 얼마나 다행인지 몰라.,0
5428,왠만하면 걸치고 있는 코트 벗으시지?,5
132196,자식들이 아무도 나를 도와주지 않아서 슬퍼.,2


In [5]:
# 라이브러리 불러오기
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split
import gluonnlp as nlp

#kobert
from kobert_tokenizer import KoBERTTokenizer

# transformers
from transformers import BertModel
from transformers import AdamW
from transformers.optimization import get_cosine_schedule_with_warmup

# GPU 사용시 필요
device = torch.device("cuda")

---

__learning rate = e-5__

In [4]:
tokenizer = KoBERTTokenizer.from_pretrained('skt/kobert-base-v1')
bertmodel = BertModel.from_pretrained('skt/kobert-base-v1', return_dict=False)
vocab = nlp.vocab.BERTVocab.from_sentencepiece(tokenizer.vocab_file, padding_token='[PAD]')
tok = tokenizer.tokenize

# Setting parameters
max_len = 64    
batch_size = 32 #베이스라인 64
warmup_ratio = 0.1
num_epochs = 8  # 에포크 횟수
max_grad_norm = 1
log_interval = 200
learning_rate = 1e-5

In [5]:
# 모델에 사용되는 데이터셋 클래스 정의
class BERTDataset(Dataset):
    def __init__(self, dataset, sent_idx, label_idx, bert_tokenizer,vocab, max_len,
                 pad, pair):
   
        transform = nlp.data.BERTSentenceTransform(
            bert_tokenizer, max_seq_length=max_len,vocab=vocab, pad=pad, pair=pair)
        
        self.sentences = [transform([i[sent_idx]]) for i in dataset]
        self.labels = [np.int32(i[label_idx]) for i in dataset]

    def __getitem__(self, i):
        return (self.sentences[i] + (self.labels[i], ))
         
    def __len__(self):
        return (len(self.labels))

In [6]:
# 감성 분류 모델 정의
class BERTClassifier(nn.Module):
    def __init__(self,
                 bert,
                 hidden_size = 768,
                 num_classes=6,     
                 dr_rate=None,
                 params=None):
        super(BERTClassifier, self).__init__()
        self.bert = bert
        self.dr_rate = dr_rate
                 
        self.classifier = nn.Linear(hidden_size , num_classes)
        if dr_rate:
            self.dropout = nn.Dropout(p=dr_rate)
    
    def gen_attention_mask(self, token_ids, valid_length):
        attention_mask = torch.zeros_like(token_ids)
        for i, v in enumerate(valid_length):
            attention_mask[i][:v] = 1
        return attention_mask.float()

    def forward(self, token_ids, valid_length, segment_ids):
        attention_mask = self.gen_attention_mask(token_ids, valid_length)
        
        _, pooler = self.bert(input_ids = token_ids, token_type_ids = segment_ids.long(), attention_mask = attention_mask.float().to(token_ids.device))
        if self.dr_rate:
            out = self.dropout(pooler)
        return self.classifier(out)

In [7]:
# 모델 학습에 사용할 데이터셋을 [data, label] 배열로 피팅
train_set_data = [[i, str(j)] for i, j in zip(train_set['sentence'], train_set['emotion'])]

# sklearn 의 train_test_split 모듈-> 4:1로 학습&검증 데이터를 분류 
train_set_data, test_set_data = train_test_split(train_set_data, test_size = 0.2, random_state=0)

# 데이터셋을 Bert모델에 입력할 수 있게 변환
train_set_data = BERTDataset(train_set_data, 0, 1, tok, vocab, max_len, True, False)
test_set_data = BERTDataset(test_set_data, 0, 1, tok, vocab, max_len, True, False)

# 배치데이터셋 생성
train_dataloader = torch.utils.data.DataLoader(train_set_data, batch_size=batch_size, num_workers=0)    # num_workers: 데이터 로딩할때 쓰는 프로세스 수(로딩속도)
test_dataloader = torch.utils.data.DataLoader(test_set_data, batch_size=batch_size, num_workers=0)

In [8]:
# 모델 선언
model = BERTClassifier(bertmodel, dr_rate=0.5).to(device)
no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
optimizer = AdamW(optimizer_grouped_parameters, lr=learning_rate)
loss_fn = nn.CrossEntropyLoss()
t_total = len(train_dataloader) * num_epochs
warmup_step = int(t_total * warmup_ratio)
scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=warmup_step, num_training_steps=t_total)

In [9]:
# 정확도 계산
def calc_accuracy(X,Y):
    max_vals, max_indices = torch.max(X, 1)
    train_acc = (max_indices == Y).sum().data.cpu().numpy()/max_indices.size()[0]
    return train_acc

# 예측 반환
def predict(sentence):
    dataset = [[sentence, '0']]
    test = BERTDataset(dataset, 0, 1, tok, vocab, max_len, True, False)
    test_dataloader = torch.utils.data.DataLoader(test, batch_size=batch_size, num_workers=0)  #로컬에서는 디폴트(0)으로 수정
    model.eval()
    answer = 0
    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(test_dataloader):
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length= valid_length
        label = label.long().to(device)
        out = model(token_ids, valid_length, segment_ids)
        for logits in out:
            logits = logits.detach().cpu().numpy()
            answer = np.argmax(logits)
    return answer

In [10]:
for e in range(num_epochs):
    train_acc = 0.0
    test_acc = 0.0
    model.train()
    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(tqdm(train_dataloader)):
        optimizer.zero_grad()
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length= valid_length
        label = label.long().to(device)
        out = model(token_ids, valid_length, segment_ids)
        loss = loss_fn(out, label)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
        optimizer.step()
        scheduler.step()  # Update learning rate schedule
        train_acc += calc_accuracy(out, label)
        if batch_id % log_interval == 0:
            print("epoch {} batch id {} loss {} train acc {}".format(e+1, batch_id+1, loss.data.cpu().numpy(), train_acc / (batch_id+1)))
    print("epoch {} train acc {}".format(e+1, train_acc / (batch_id+1)))
    model.eval()
    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(tqdm(test_dataloader)):   # 아까 만든 테스트 배치 데이터 - 정확도 측정
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length= valid_length
        label = label.long().to(device)
        out = model(token_ids, valid_length, segment_ids)
        test_acc += calc_accuracy(out, label)
    print("epoch {} test acc {}".format(e+1, test_acc / (batch_id+1)))

  0%|          | 0/3395 [00:00<?, ?it/s]

epoch 1 batch id 1 loss 1.8971830606460571 train acc 0.15625
epoch 1 batch id 201 loss 1.8041564226150513 train acc 0.14894278606965175
epoch 1 batch id 401 loss 1.785013198852539 train acc 0.17573254364089774
epoch 1 batch id 601 loss 1.5961964130401611 train acc 0.22753743760399334
epoch 1 batch id 801 loss 1.3921833038330078 train acc 0.27071629213483145
epoch 1 batch id 1001 loss 1.238572597503662 train acc 0.3160277222777223
epoch 1 batch id 1201 loss 1.423027753829956 train acc 0.3473667776852623
epoch 1 batch id 1401 loss 1.1432969570159912 train acc 0.37517844396859384
epoch 1 batch id 1601 loss 1.0395294427871704 train acc 0.39789584634603375
epoch 1 batch id 1801 loss 1.2298238277435303 train acc 0.41837867851193783
epoch 1 batch id 2001 loss 1.1632047891616821 train acc 0.4368753123438281
epoch 1 batch id 2201 loss 1.0964641571044922 train acc 0.454253748296229
epoch 1 batch id 2401 loss 1.249979019165039 train acc 0.46889316951270305
epoch 1 batch id 2601 loss 1.21470475196

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 1 test acc 0.6599916141752445


  0%|          | 0/3395 [00:00<?, ?it/s]

epoch 2 batch id 1 loss 0.9081122875213623 train acc 0.6875
epoch 2 batch id 201 loss 1.1028437614440918 train acc 0.6553171641791045
epoch 2 batch id 401 loss 0.9093326330184937 train acc 0.6530548628428927
epoch 2 batch id 601 loss 0.9103515148162842 train acc 0.6543261231281198
epoch 2 batch id 801 loss 0.9869348406791687 train acc 0.6522705992509363
epoch 2 batch id 1001 loss 0.9001228213310242 train acc 0.6564373126873126
epoch 2 batch id 1201 loss 1.1010137796401978 train acc 0.6572907993338885
epoch 2 batch id 1401 loss 1.019248366355896 train acc 0.6592389364739472
epoch 2 batch id 1601 loss 0.8049092292785645 train acc 0.6603099625234229
epoch 2 batch id 1801 loss 1.0864335298538208 train acc 0.6614554414214325
epoch 2 batch id 2001 loss 0.896577775478363 train acc 0.6612787356321839
epoch 2 batch id 2201 loss 0.9312554597854614 train acc 0.663604611540209
epoch 2 batch id 2401 loss 0.988960862159729 train acc 0.6655299875052062
epoch 2 batch id 2601 loss 1.0572643280029297 tr

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 2 test acc 0.6813994725252215


  0%|          | 0/3395 [00:00<?, ?it/s]

epoch 3 batch id 1 loss 0.8242672681808472 train acc 0.65625
epoch 3 batch id 201 loss 1.0347223281860352 train acc 0.7027363184079602
epoch 3 batch id 401 loss 0.7011292576789856 train acc 0.6984881546134664
epoch 3 batch id 601 loss 0.8223734498023987 train acc 0.699563227953411
epoch 3 batch id 801 loss 0.8989866971969604 train acc 0.69666822721598
epoch 3 batch id 1001 loss 0.7903527021408081 train acc 0.6996128871128872
epoch 3 batch id 1201 loss 1.05911123752594 train acc 0.6997554121565362
epoch 3 batch id 1401 loss 1.1098003387451172 train acc 0.7002141327623126
epoch 3 batch id 1601 loss 0.7144538164138794 train acc 0.7006168019987508
epoch 3 batch id 1801 loss 0.8862524032592773 train acc 0.7007912270960578
epoch 3 batch id 2001 loss 0.8934391736984253 train acc 0.7013212143928036
epoch 3 batch id 2201 loss 0.8682677149772644 train acc 0.7031179009541118
epoch 3 batch id 2401 loss 0.8805285692214966 train acc 0.7050317576009996
epoch 3 batch id 2601 loss 0.9568201899528503 tr

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 3 test acc 0.6859780688277769


  0%|          | 0/3395 [00:00<?, ?it/s]

epoch 4 batch id 1 loss 0.723929762840271 train acc 0.6875
epoch 4 batch id 201 loss 1.0655529499053955 train acc 0.7377176616915423
epoch 4 batch id 401 loss 0.5858783721923828 train acc 0.732076059850374
epoch 4 batch id 601 loss 0.7162054777145386 train acc 0.7317491680532446
epoch 4 batch id 801 loss 0.7998660802841187 train acc 0.7286594881398252
epoch 4 batch id 1001 loss 0.6983622908592224 train acc 0.7307067932067932
epoch 4 batch id 1201 loss 1.055549144744873 train acc 0.729990632805995
epoch 4 batch id 1401 loss 1.0035539865493774 train acc 0.7311964668094219
epoch 4 batch id 1601 loss 0.7015185356140137 train acc 0.7317887257963772
epoch 4 batch id 1801 loss 0.8077678680419922 train acc 0.7321626873958912
epoch 4 batch id 2001 loss 0.8292643427848816 train acc 0.7324775112443778
epoch 4 batch id 2201 loss 0.7264562845230103 train acc 0.7349074284416175
epoch 4 batch id 2401 loss 0.783591628074646 train acc 0.7361516034985423
epoch 4 batch id 2601 loss 0.860729455947876 trai

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 4 test acc 0.687928893327188


  0%|          | 0/3395 [00:00<?, ?it/s]

epoch 5 batch id 1 loss 0.6350523829460144 train acc 0.75
epoch 5 batch id 201 loss 0.9100580811500549 train acc 0.7653917910447762
epoch 5 batch id 401 loss 0.5264415740966797 train acc 0.7609881546134664
epoch 5 batch id 601 loss 0.6460691690444946 train acc 0.7627391846921797
epoch 5 batch id 801 loss 0.7493385672569275 train acc 0.7586610486891385
epoch 5 batch id 1001 loss 0.7259968519210815 train acc 0.7605831668331668
epoch 5 batch id 1201 loss 0.8377357721328735 train acc 0.7605901332223147
epoch 5 batch id 1401 loss 0.8713979721069336 train acc 0.7609966095645967
epoch 5 batch id 1601 loss 0.648465096950531 train acc 0.7618675827607745
epoch 5 batch id 1801 loss 0.7071273922920227 train acc 0.7623715991116047
epoch 5 batch id 2001 loss 0.8379148840904236 train acc 0.7628373313343328
epoch 5 batch id 2201 loss 0.676481306552887 train acc 0.7639425261244889
epoch 5 batch id 2401 loss 0.7456495761871338 train acc 0.7653451686797168
epoch 5 batch id 2601 loss 0.8416330814361572 tr

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 5 test acc 0.6911824012905208


  0%|          | 0/3395 [00:00<?, ?it/s]

epoch 6 batch id 1 loss 0.6014813780784607 train acc 0.78125
epoch 6 batch id 201 loss 0.7500813603401184 train acc 0.7932213930348259
epoch 6 batch id 401 loss 0.39017343521118164 train acc 0.7842113466334164
epoch 6 batch id 601 loss 0.5756874084472656 train acc 0.7841618136439268
epoch 6 batch id 801 loss 0.5853288173675537 train acc 0.7795724094881398
epoch 6 batch id 1001 loss 0.5835893750190735 train acc 0.78252997002997
epoch 6 batch id 1201 loss 0.717678964138031 train acc 0.7836178184845962
epoch 6 batch id 1401 loss 0.6399487257003784 train acc 0.7837928265524625
epoch 6 batch id 1601 loss 0.49559852480888367 train acc 0.7841778575890068
epoch 6 batch id 1801 loss 0.7371392846107483 train acc 0.7843559133814547
epoch 6 batch id 2001 loss 0.7329521775245667 train acc 0.7847482508745627
epoch 6 batch id 2201 loss 0.6145915985107422 train acc 0.7862051340299864
epoch 6 batch id 2401 loss 0.6567460894584656 train acc 0.7869507496876301
epoch 6 batch id 2601 loss 0.866472125053405

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 6 test acc 0.6882825702872944


  0%|          | 0/3395 [00:00<?, ?it/s]

epoch 7 batch id 1 loss 0.5165911316871643 train acc 0.71875
epoch 7 batch id 201 loss 0.9076423048973083 train acc 0.8041044776119403
epoch 7 batch id 401 loss 0.38108810782432556 train acc 0.7995635910224439
epoch 7 batch id 601 loss 0.5669339299201965 train acc 0.8011127287853578
epoch 7 batch id 801 loss 0.5849937796592712 train acc 0.7975187265917603
epoch 7 batch id 1001 loss 0.5727391839027405 train acc 0.8000124875124875
epoch 7 batch id 1201 loss 0.8483688831329346 train acc 0.8003486677768527
epoch 7 batch id 1401 loss 0.663503885269165 train acc 0.7999866167023555
epoch 7 batch id 1601 loss 0.460993230342865 train acc 0.8000078076202374
epoch 7 batch id 1801 loss 0.650044858455658 train acc 0.7990873126041088
epoch 7 batch id 2001 loss 0.8157662749290466 train acc 0.7996939030484758
epoch 7 batch id 2201 loss 0.5382829904556274 train acc 0.8005452067242163
epoch 7 batch id 2401 loss 0.6988325715065002 train acc 0.8015019783423574
epoch 7 batch id 2601 loss 0.5725100636482239

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 7 test acc 0.6868470579198033


  0%|          | 0/3395 [00:00<?, ?it/s]

epoch 8 batch id 1 loss 0.4622437357902527 train acc 0.875
epoch 8 batch id 201 loss 0.7556312680244446 train acc 0.8215174129353234
epoch 8 batch id 401 loss 0.5616306662559509 train acc 0.8144482543640897
epoch 8 batch id 601 loss 0.5034376978874207 train acc 0.8138519134775375
epoch 8 batch id 801 loss 0.6494439840316772 train acc 0.8105493133583022
epoch 8 batch id 1001 loss 0.7208328247070312 train acc 0.8106580919080919
epoch 8 batch id 1201 loss 0.7784857153892517 train acc 0.8110689009159034
epoch 8 batch id 1401 loss 0.5861051678657532 train acc 0.8108940042826552
epoch 8 batch id 1601 loss 0.6852321624755859 train acc 0.8097478138663335
epoch 8 batch id 1801 loss 0.5190677642822266 train acc 0.8090123542476402
epoch 8 batch id 2001 loss 0.8699840307235718 train acc 0.8094234132933533
epoch 8 batch id 2201 loss 0.6189286708831787 train acc 0.8101857110404361
epoch 8 batch id 2401 loss 0.6380034685134888 train acc 0.8110162432319866
epoch 8 batch id 2601 loss 0.6871709823608398

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 8 test acc 0.6872519460234546


학습률: e-5, 에포크: 5 )0.6911824012905208

---

__learingrate = 5e-6__

In [5]:
tokenizer = KoBERTTokenizer.from_pretrained('skt/kobert-base-v1')
bertmodel = BertModel.from_pretrained('skt/kobert-base-v1', return_dict=False)
vocab = nlp.vocab.BERTVocab.from_sentencepiece(tokenizer.vocab_file, padding_token='[PAD]')
tok = tokenizer.tokenize

# Setting parameters
max_len = 64    
batch_size = 32 #베이스라인 64
warmup_ratio = 0.1
num_epochs = 8  # 에포크 횟수
max_grad_norm = 1
log_interval = 200
learning_rate = 5e-6

# 모델에 사용되는 데이터셋 클래스 정의
class BERTDataset(Dataset):
    def __init__(self, dataset, sent_idx, label_idx, bert_tokenizer,vocab, max_len,
                 pad, pair):
   
        transform = nlp.data.BERTSentenceTransform(
            bert_tokenizer, max_seq_length=max_len,vocab=vocab, pad=pad, pair=pair)
        
        self.sentences = [transform([i[sent_idx]]) for i in dataset]
        self.labels = [np.int32(i[label_idx]) for i in dataset]

    def __getitem__(self, i):
        return (self.sentences[i] + (self.labels[i], ))
         
    def __len__(self):
        return (len(self.labels))
    
# 감성 분류 모델 정의
class BERTClassifier(nn.Module):
    def __init__(self,
                 bert,
                 hidden_size = 768,
                 num_classes=6,     
                 dr_rate=None,
                 params=None):
        super(BERTClassifier, self).__init__()
        self.bert = bert
        self.dr_rate = dr_rate
                 
        self.classifier = nn.Linear(hidden_size , num_classes)
        if dr_rate:
            self.dropout = nn.Dropout(p=dr_rate)
    
    def gen_attention_mask(self, token_ids, valid_length):
        attention_mask = torch.zeros_like(token_ids)
        for i, v in enumerate(valid_length):
            attention_mask[i][:v] = 1
        return attention_mask.float()

    def forward(self, token_ids, valid_length, segment_ids):
        attention_mask = self.gen_attention_mask(token_ids, valid_length)
        
        _, pooler = self.bert(input_ids = token_ids, token_type_ids = segment_ids.long(), attention_mask = attention_mask.float().to(token_ids.device))
        if self.dr_rate:
            out = self.dropout(pooler)
        return self.classifier(out)

In [6]:
# 모델 학습에 사용할 데이터셋을 [data, label] 배열로 피팅
train_set_data = [[i, str(j)] for i, j in zip(train_set['sentence'], train_set['emotion'])]

# sklearn 의 train_test_split 모듈-> 4:1로 학습&검증 데이터를 분류 
train_set_data, test_set_data = train_test_split(train_set_data, test_size = 0.2, random_state=0)

# 데이터셋을 Bert모델에 입력할 수 있게 변환
train_set_data = BERTDataset(train_set_data, 0, 1, tok, vocab, max_len, True, False)
test_set_data = BERTDataset(test_set_data, 0, 1, tok, vocab, max_len, True, False)

# 배치데이터셋 생성
train_dataloader = torch.utils.data.DataLoader(train_set_data, batch_size=batch_size, num_workers=0)    # num_workers: 데이터 로딩할때 쓰는 프로세스 수(로딩속도)
test_dataloader = torch.utils.data.DataLoader(test_set_data, batch_size=batch_size, num_workers=0)

# 모델 선언
model = BERTClassifier(bertmodel, dr_rate=0.5).to(device)
no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
optimizer = AdamW(optimizer_grouped_parameters, lr=learning_rate)
loss_fn = nn.CrossEntropyLoss()
t_total = len(train_dataloader) * num_epochs
warmup_step = int(t_total * warmup_ratio)
scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=warmup_step, num_training_steps=t_total)

# 정확도 계산
def calc_accuracy(X,Y):
    max_vals, max_indices = torch.max(X, 1)
    train_acc = (max_indices == Y).sum().data.cpu().numpy()/max_indices.size()[0]
    return train_acc

# 예측 반환
def predict(sentence):
    dataset = [[sentence, '0']]
    test = BERTDataset(dataset, 0, 1, tok, vocab, max_len, True, False)
    test_dataloader = torch.utils.data.DataLoader(test, batch_size=batch_size, num_workers=0)  #로컬에서는 디폴트(0)으로 수정
    model.eval()
    answer = 0
    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(test_dataloader):
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length= valid_length
        label = label.long().to(device)
        out = model(token_ids, valid_length, segment_ids)
        for logits in out:
            logits = logits.detach().cpu().numpy()
            answer = np.argmax(logits)
    return answer

In [7]:
for e in range(num_epochs):
    train_acc = 0.0
    test_acc = 0.0
    model.train()
    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(tqdm(train_dataloader)):
        optimizer.zero_grad()
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length= valid_length
        label = label.long().to(device)
        out = model(token_ids, valid_length, segment_ids)
        loss = loss_fn(out, label)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
        optimizer.step()
        scheduler.step()  # Update learning rate schedule
        train_acc += calc_accuracy(out, label)
        if batch_id % log_interval == 0:
            print("epoch {} batch id {} loss {} train acc {}".format(e+1, batch_id+1, loss.data.cpu().numpy(), train_acc / (batch_id+1)))
    print("epoch {} train acc {}".format(e+1, train_acc / (batch_id+1)))
    model.eval()
    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(tqdm(test_dataloader)):   # 아까 만든 테스트 배치 데이터 - 정확도 측정
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length= valid_length
        label = label.long().to(device)
        out = model(token_ids, valid_length, segment_ids)
        test_acc += calc_accuracy(out, label)
    print("epoch {} test acc {}".format(e+1, test_acc / (batch_id+1)))

  0%|          | 0/3395 [00:00<?, ?it/s]

epoch 1 batch id 1 loss 1.7560423612594604 train acc 0.25
epoch 1 batch id 201 loss 1.8199613094329834 train acc 0.18921019900497513
epoch 1 batch id 401 loss 1.7832614183425903 train acc 0.2214775561097257
epoch 1 batch id 601 loss 1.6579742431640625 train acc 0.2599313643926789
epoch 1 batch id 801 loss 1.55643892288208 train acc 0.2914715980024969
epoch 1 batch id 1001 loss 1.4163613319396973 train acc 0.3179632867132867
epoch 1 batch id 1201 loss 1.5955044031143188 train acc 0.3362822647793505
epoch 1 batch id 1401 loss 1.3921822309494019 train acc 0.3553042469664525
epoch 1 batch id 1601 loss 1.1526474952697754 train acc 0.37441442848219864
epoch 1 batch id 1801 loss 1.3657439947128296 train acc 0.38988756246529704
epoch 1 batch id 2001 loss 1.4022252559661865 train acc 0.40445402298850575
epoch 1 batch id 2201 loss 1.257387638092041 train acc 0.4181763970922308
epoch 1 batch id 2401 loss 1.2686065435409546 train acc 0.43157798833819244
epoch 1 batch id 2601 loss 1.206778287887573

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 1 test acc 0.6482722640446561


  0%|          | 0/3395 [00:00<?, ?it/s]

epoch 2 batch id 1 loss 1.0646320581436157 train acc 0.59375
epoch 2 batch id 201 loss 1.1876798868179321 train acc 0.636660447761194
epoch 2 batch id 401 loss 0.9579286575317383 train acc 0.6388715710723192
epoch 2 batch id 601 loss 1.014765739440918 train acc 0.6410669717138103
epoch 2 batch id 801 loss 1.0426069498062134 train acc 0.6399422596754057
epoch 2 batch id 1001 loss 0.9033598899841309 train acc 0.6426073926073926
epoch 2 batch id 1201 loss 1.0622522830963135 train acc 0.643526228143214
epoch 2 batch id 1401 loss 1.0717408657073975 train acc 0.6459225553176302
epoch 2 batch id 1601 loss 0.918716311454773 train acc 0.6458268269831355
epoch 2 batch id 1801 loss 1.133937120437622 train acc 0.6463943642420877
epoch 2 batch id 2001 loss 0.9892600774765015 train acc 0.6466454272863568
epoch 2 batch id 2201 loss 1.029283046722412 train acc 0.648469445706497
epoch 2 batch id 2401 loss 1.1141307353973389 train acc 0.6503800499791753
epoch 2 batch id 2601 loss 1.1424827575683594 trai

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 2 test acc 0.6742219106877656


  0%|          | 0/3395 [00:00<?, ?it/s]

epoch 3 batch id 1 loss 0.81449955701828 train acc 0.625
epoch 3 batch id 201 loss 0.9909664988517761 train acc 0.681592039800995
epoch 3 batch id 401 loss 0.8221098780632019 train acc 0.6781483790523691
epoch 3 batch id 601 loss 0.9510586261749268 train acc 0.6808444259567388
epoch 3 batch id 801 loss 0.9641091823577881 train acc 0.6794241573033708
epoch 3 batch id 1001 loss 0.8540115356445312 train acc 0.6812562437562437
epoch 3 batch id 1201 loss 0.9977259039878845 train acc 0.6802664446294754
epoch 3 batch id 1401 loss 1.044524073600769 train acc 0.6820128479657388
epoch 3 batch id 1601 loss 0.8528668284416199 train acc 0.6818199562773267
epoch 3 batch id 1801 loss 1.0531636476516724 train acc 0.6822251526929484
epoch 3 batch id 2001 loss 0.8951471447944641 train acc 0.6818153423288356
epoch 3 batch id 2201 loss 0.9447652101516724 train acc 0.6829566106315311
epoch 3 batch id 2401 loss 0.9381710290908813 train acc 0.6838817159516868
epoch 3 batch id 2601 loss 1.0848124027252197 tra

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 3 test acc 0.6832030649869412


  0%|          | 0/3395 [00:00<?, ?it/s]

epoch 4 batch id 1 loss 0.7308766841888428 train acc 0.6875
epoch 4 batch id 201 loss 1.0068799257278442 train acc 0.699160447761194
epoch 4 batch id 401 loss 0.7843340635299683 train acc 0.6982543640897756
epoch 4 batch id 601 loss 0.8583381772041321 train acc 0.699303244592346
epoch 4 batch id 801 loss 0.9086947441101074 train acc 0.69666822721598
epoch 4 batch id 1001 loss 0.8532354831695557 train acc 0.7002060439560439
epoch 4 batch id 1201 loss 1.0290346145629883 train acc 0.7001457119067444
epoch 4 batch id 1401 loss 0.9974351525306702 train acc 0.701686295503212
epoch 4 batch id 1601 loss 0.7042068243026733 train acc 0.7016317926296065
epoch 4 batch id 1801 loss 0.8527926206588745 train acc 0.701467934480844
epoch 4 batch id 2001 loss 0.8598966002464294 train acc 0.7015086206896551
epoch 4 batch id 2201 loss 0.8696739673614502 train acc 0.7030895047705589
epoch 4 batch id 2401 loss 0.87723708152771 train acc 0.7042898792169929
epoch 4 batch id 2601 loss 0.873131513595581 train a

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 4 test acc 0.6867878450350796


  0%|          | 0/3395 [00:00<?, ?it/s]

epoch 5 batch id 1 loss 0.7460145950317383 train acc 0.65625
epoch 5 batch id 201 loss 0.9557799696922302 train acc 0.7223258706467661
epoch 5 batch id 401 loss 0.7206341028213501 train acc 0.7192955112219451
epoch 5 batch id 601 loss 0.8167496919631958 train acc 0.718801996672213
epoch 5 batch id 801 loss 0.8008030652999878 train acc 0.7170333957553059
epoch 5 batch id 1001 loss 0.7334114909172058 train acc 0.7205919080919081
epoch 5 batch id 1201 loss 0.9250078201293945 train acc 0.719738759367194
epoch 5 batch id 1401 loss 1.0890610218048096 train acc 0.7208244111349036
epoch 5 batch id 1601 loss 0.6852176785469055 train acc 0.7199016239850093
epoch 5 batch id 1801 loss 1.0151104927062988 train acc 0.7197910882842865
epoch 5 batch id 2001 loss 0.9826447367668152 train acc 0.7197651174412794
epoch 5 batch id 2201 loss 0.8871434330940247 train acc 0.7221433439345752
epoch 5 batch id 2401 loss 0.8034678101539612 train acc 0.7227717617659308
epoch 5 batch id 2601 loss 0.9389786124229431

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 5 test acc 0.6883481845649613


  0%|          | 0/3395 [00:00<?, ?it/s]

epoch 6 batch id 1 loss 0.7258897423744202 train acc 0.6875
epoch 6 batch id 201 loss 0.9424387812614441 train acc 0.7388059701492538
epoch 6 batch id 401 loss 0.6600649356842041 train acc 0.7340243142144638
epoch 6 batch id 601 loss 0.753972589969635 train acc 0.7345049916805324
epoch 6 batch id 801 loss 0.8334688544273376 train acc 0.7322877652933832
epoch 6 batch id 1001 loss 0.7524935007095337 train acc 0.7343593906093906
epoch 6 batch id 1201 loss 1.0087593793869019 train acc 0.7342579100749376
epoch 6 batch id 1401 loss 0.8968778252601624 train acc 0.734943790149893
epoch 6 batch id 1601 loss 0.7052469849586487 train acc 0.7348532167395377
epoch 6 batch id 1801 loss 0.873069167137146 train acc 0.7342448639644642
epoch 6 batch id 2001 loss 0.804807186126709 train acc 0.7338049725137431
epoch 6 batch id 2201 loss 0.7369181513786316 train acc 0.7353475692866879
epoch 6 batch id 2401 loss 0.9235896468162537 train acc 0.7359303415243649
epoch 6 batch id 2601 loss 0.884548544883728 tra

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 6 test acc 0.6894748297229477


  0%|          | 0/3395 [00:00<?, ?it/s]

epoch 7 batch id 1 loss 0.6103459000587463 train acc 0.78125
epoch 7 batch id 201 loss 0.912868857383728 train acc 0.746113184079602
epoch 7 batch id 401 loss 0.6236191391944885 train acc 0.7429862842892768
epoch 7 batch id 601 loss 0.7048535943031311 train acc 0.7444363560732113
epoch 7 batch id 801 loss 0.8117244839668274 train acc 0.7413779650436954
epoch 7 batch id 1001 loss 0.7018693685531616 train acc 0.7434440559440559
epoch 7 batch id 1201 loss 0.9094683527946472 train acc 0.7438592839300583
epoch 7 batch id 1401 loss 1.0306223630905151 train acc 0.7440444325481799
epoch 7 batch id 1601 loss 0.7332769632339478 train acc 0.7428169893816364
epoch 7 batch id 1801 loss 0.7370643615722656 train acc 0.742816490838423
epoch 7 batch id 2001 loss 0.9353119730949402 train acc 0.7430503498250874
epoch 7 batch id 2201 loss 0.7949662208557129 train acc 0.7439374148114494
epoch 7 batch id 2401 loss 0.7995442748069763 train acc 0.744338296543107
epoch 7 batch id 2601 loss 0.9155064225196838 t

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 7 test acc 0.6888346904286372


  0%|          | 0/3395 [00:00<?, ?it/s]

epoch 8 batch id 1 loss 0.6644065380096436 train acc 0.6875
epoch 8 batch id 201 loss 0.758638322353363 train acc 0.7534203980099502
epoch 8 batch id 401 loss 0.5922772884368896 train acc 0.7510910224438903
epoch 8 batch id 601 loss 0.7285424470901489 train acc 0.7514039101497504
epoch 8 batch id 801 loss 0.7126544117927551 train acc 0.7483614232209738
epoch 8 batch id 1001 loss 0.7316358685493469 train acc 0.7499687812187812
epoch 8 batch id 1201 loss 0.9857854247093201 train acc 0.7491933805162365
epoch 8 batch id 1401 loss 0.8305946588516235 train acc 0.7493085296216988
epoch 8 batch id 1601 loss 0.6849831342697144 train acc 0.748145690193629
epoch 8 batch id 1801 loss 0.8749753832817078 train acc 0.7470502498611882
epoch 8 batch id 2001 loss 0.7876958847045898 train acc 0.7469546476761619
epoch 8 batch id 2201 loss 0.7933128476142883 train acc 0.7479696728759655
epoch 8 batch id 2401 loss 0.6795426607131958 train acc 0.7486984589754269
epoch 8 batch id 2601 loss 0.891927182674408 t

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 8 test acc 0.6888714984380601


학습률: 5e-6, 에포크: 6 ) 0.6894748297229477

---

__Learning rate = e-4__

In [4]:
tokenizer = KoBERTTokenizer.from_pretrained('skt/kobert-base-v1')
bertmodel = BertModel.from_pretrained('skt/kobert-base-v1', return_dict=False)
vocab = nlp.vocab.BERTVocab.from_sentencepiece(tokenizer.vocab_file, padding_token='[PAD]')
tok = tokenizer.tokenize

# Setting parameters
max_len = 64    
batch_size = 32 #베이스라인 64
warmup_ratio = 0.1
num_epochs = 6  # 에포크 횟수
max_grad_norm = 1
log_interval = 200
learning_rate = 1e-4

# 모델에 사용되는 데이터셋 클래스 정의
class BERTDataset(Dataset):
    def __init__(self, dataset, sent_idx, label_idx, bert_tokenizer,vocab, max_len,
                 pad, pair):
   
        transform = nlp.data.BERTSentenceTransform(
            bert_tokenizer, max_seq_length=max_len,vocab=vocab, pad=pad, pair=pair)
        
        self.sentences = [transform([i[sent_idx]]) for i in dataset]
        self.labels = [np.int32(i[label_idx]) for i in dataset]

    def __getitem__(self, i):
        return (self.sentences[i] + (self.labels[i], ))
         
    def __len__(self):
        return (len(self.labels))
    
# 감성 분류 모델 정의
class BERTClassifier(nn.Module):
    def __init__(self,
                 bert,
                 hidden_size = 768,
                 num_classes=6,     
                 dr_rate=None,
                 params=None):
        super(BERTClassifier, self).__init__()
        self.bert = bert
        self.dr_rate = dr_rate
                 
        self.classifier = nn.Linear(hidden_size , num_classes)
        if dr_rate:
            self.dropout = nn.Dropout(p=dr_rate)
    
    def gen_attention_mask(self, token_ids, valid_length):
        attention_mask = torch.zeros_like(token_ids)
        for i, v in enumerate(valid_length):
            attention_mask[i][:v] = 1
        return attention_mask.float()

    def forward(self, token_ids, valid_length, segment_ids):
        attention_mask = self.gen_attention_mask(token_ids, valid_length)
        
        _, pooler = self.bert(input_ids = token_ids, token_type_ids = segment_ids.long(), attention_mask = attention_mask.float().to(token_ids.device))
        if self.dr_rate:
            out = self.dropout(pooler)
        return self.classifier(out)

In [6]:
# 모델 학습에 사용할 데이터셋을 [data, label] 배열로 피팅
train_set_data = [[i, str(j)] for i, j in zip(train_set['sentence'], train_set['emotion'])]

# sklearn 의 train_test_split 모듈-> 4:1로 학습&검증 데이터를 분류 
train_set_data, test_set_data = train_test_split(train_set_data, test_size = 0.2, random_state=0)

# 데이터셋을 Bert모델에 입력할 수 있게 변환
train_set_data = BERTDataset(train_set_data, 0, 1, tok, vocab, max_len, True, False)
test_set_data = BERTDataset(test_set_data, 0, 1, tok, vocab, max_len, True, False)

# 배치데이터셋 생성
train_dataloader = torch.utils.data.DataLoader(train_set_data, batch_size=batch_size, num_workers=0)    # num_workers: 데이터 로딩할때 쓰는 프로세스 수(로딩속도)
test_dataloader = torch.utils.data.DataLoader(test_set_data, batch_size=batch_size, num_workers=0)

# 모델 선언
model = BERTClassifier(bertmodel, dr_rate=0.5).to(device)
no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
optimizer = AdamW(optimizer_grouped_parameters, lr=learning_rate)
loss_fn = nn.CrossEntropyLoss()
t_total = len(train_dataloader) * num_epochs
warmup_step = int(t_total * warmup_ratio)
scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=warmup_step, num_training_steps=t_total)

# 정확도 계산
def calc_accuracy(X,Y):
    max_vals, max_indices = torch.max(X, 1)
    train_acc = (max_indices == Y).sum().data.cpu().numpy()/max_indices.size()[0]
    return train_acc

# 예측 반환
def predict(sentence):
    dataset = [[sentence, '0']]
    test = BERTDataset(dataset, 0, 1, tok, vocab, max_len, True, False)
    test_dataloader = torch.utils.data.DataLoader(test, batch_size=batch_size, num_workers=0)  #로컬에서는 디폴트(0)으로 수정
    model.eval()
    answer = 0
    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(test_dataloader):
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length= valid_length
        label = label.long().to(device)
        out = model(token_ids, valid_length, segment_ids)
        for logits in out:
            logits = logits.detach().cpu().numpy()
            answer = np.argmax(logits)
    return answer

In [7]:
for e in range(num_epochs):
    train_acc = 0.0
    test_acc = 0.0
    model.train()
    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(tqdm(train_dataloader)):
        optimizer.zero_grad()
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length= valid_length
        label = label.long().to(device)
        out = model(token_ids, valid_length, segment_ids)
        loss = loss_fn(out, label)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
        optimizer.step()
        scheduler.step()  # Update learning rate schedule
        train_acc += calc_accuracy(out, label)
        if batch_id % log_interval == 0:
            print("epoch {} batch id {} loss {} train acc {}".format(e+1, batch_id+1, loss.data.cpu().numpy(), train_acc / (batch_id+1)))
    print("epoch {} train acc {}".format(e+1, train_acc / (batch_id+1)))
    model.eval()
    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(tqdm(test_dataloader)):   # 아까 만든 테스트 배치 데이터 - 정확도 측정
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length= valid_length
        label = label.long().to(device)
        out = model(token_ids, valid_length, segment_ids)
        test_acc += calc_accuracy(out, label)
    print("epoch {} test acc {}".format(e+1, test_acc / (batch_id+1)))

  0%|          | 0/3395 [00:00<?, ?it/s]

epoch 1 batch id 1 loss 1.864007830619812 train acc 0.1875
epoch 1 batch id 201 loss 1.5482573509216309 train acc 0.2751865671641791
epoch 1 batch id 401 loss 1.2720866203308105 train acc 0.3870791770573566
epoch 1 batch id 601 loss 1.0586458444595337 train acc 0.45211106489184694
epoch 1 batch id 801 loss 1.0244446992874146 train acc 0.49020755305867664
epoch 1 batch id 1001 loss 0.9156310558319092 train acc 0.5175761738261738
epoch 1 batch id 1201 loss 1.1014080047607422 train acc 0.5339560782681099
epoch 1 batch id 1401 loss 0.9878259897232056 train acc 0.5471538187009279
epoch 1 batch id 1601 loss 0.9179600477218628 train acc 0.5560977514053717
epoch 1 batch id 1801 loss 1.2706944942474365 train acc 0.5627255691282621
epoch 1 batch id 2001 loss 0.9917345643043518 train acc 0.5679660169915043
epoch 1 batch id 2201 loss 0.8570663332939148 train acc 0.5734609268514311
epoch 1 batch id 2401 loss 1.1694954633712769 train acc 0.577741045397751
epoch 1 batch id 2601 loss 0.964412689208984

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 1 test acc 0.656907743124904


  0%|          | 0/3395 [00:00<?, ?it/s]

epoch 2 batch id 1 loss 0.9108739495277405 train acc 0.625
epoch 2 batch id 201 loss 1.089524745941162 train acc 0.6414800995024875
epoch 2 batch id 401 loss 0.8243495225906372 train acc 0.640819825436409
epoch 2 batch id 601 loss 0.8856736421585083 train acc 0.6437707986688852
epoch 2 batch id 801 loss 1.0208039283752441 train acc 0.6432974406991261
epoch 2 batch id 1001 loss 0.7564154863357544 train acc 0.6465409590409591
epoch 2 batch id 1201 loss 1.03435480594635 train acc 0.6471950457951707
epoch 2 batch id 1401 loss 1.1974906921386719 train acc 0.6499821556031407
epoch 2 batch id 1601 loss 0.763526201248169 train acc 0.6520534041224235
epoch 2 batch id 1801 loss 1.1741960048675537 train acc 0.6531267351471405
epoch 2 batch id 2001 loss 1.0108872652053833 train acc 0.6549693903048476
epoch 2 batch id 2201 loss 0.9217188954353333 train acc 0.6574568378009995
epoch 2 batch id 2401 loss 0.9743409752845764 train acc 0.6593216368179925
epoch 2 batch id 2601 loss 0.9491451382637024 trai

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 2 test acc 0.6702098376606749


  0%|          | 0/3395 [00:00<?, ?it/s]

epoch 3 batch id 1 loss 0.7010499835014343 train acc 0.75
epoch 3 batch id 201 loss 0.8250601291656494 train acc 0.6876554726368159
epoch 3 batch id 401 loss 0.6361896991729736 train acc 0.6865648379052369
epoch 3 batch id 601 loss 0.8381947875022888 train acc 0.6883319467554077
epoch 3 batch id 801 loss 0.802291750907898 train acc 0.6884363295880149
epoch 3 batch id 1001 loss 0.7579696774482727 train acc 0.6924013486513486
epoch 3 batch id 1201 loss 0.8300990462303162 train acc 0.6940570358034971
epoch 3 batch id 1401 loss 1.0622456073760986 train acc 0.6969352248394004
epoch 3 batch id 1601 loss 0.664162278175354 train acc 0.698567301686446
epoch 3 batch id 1801 loss 0.9706401228904724 train acc 0.6986743475846752
epoch 3 batch id 2001 loss 0.9214903712272644 train acc 0.7002280109945027
epoch 3 batch id 2201 loss 0.7552844882011414 train acc 0.7029759200363471
epoch 3 batch id 2401 loss 0.9663523435592651 train acc 0.705109850062474
epoch 3 batch id 2601 loss 0.7968223690986633 trai

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 3 test acc 0.6709235929738311


  0%|          | 0/3395 [00:00<?, ?it/s]

epoch 4 batch id 1 loss 0.6823593378067017 train acc 0.8125
epoch 4 batch id 201 loss 0.6303158402442932 train acc 0.744092039800995
epoch 4 batch id 401 loss 0.4736688435077667 train acc 0.739713216957606
epoch 4 batch id 601 loss 0.7558210492134094 train acc 0.7432404326123128
epoch 4 batch id 801 loss 0.6477299332618713 train acc 0.74457709113608
epoch 4 batch id 1001 loss 0.5585026144981384 train acc 0.7487824675324676
epoch 4 batch id 1201 loss 0.7597712278366089 train acc 0.7510147793505412
epoch 4 batch id 1401 loss 0.8614625930786133 train acc 0.7538365453247681
epoch 4 batch id 1601 loss 0.4792482554912567 train acc 0.7552115865084322
epoch 4 batch id 1801 loss 0.7550599575042725 train acc 0.7556392282065519
epoch 4 batch id 2001 loss 0.9364442825317383 train acc 0.7572307596201899
epoch 4 batch id 2201 loss 0.5168399214744568 train acc 0.760222626079055
epoch 4 batch id 2401 loss 0.7534055113792419 train acc 0.7626379633486048
epoch 4 batch id 2601 loss 0.700395405292511 trai

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 4 test acc 0.6691936165309571


  0%|          | 0/3395 [00:00<?, ?it/s]

epoch 5 batch id 1 loss 0.4423949718475342 train acc 0.84375
epoch 5 batch id 201 loss 0.6938710808753967 train acc 0.8006840796019901
epoch 5 batch id 401 loss 0.43662476539611816 train acc 0.7980829177057357
epoch 5 batch id 601 loss 0.5720667839050293 train acc 0.8015806988352745
epoch 5 batch id 801 loss 0.579055666923523 train acc 0.8043071161048689
epoch 5 batch id 1001 loss 0.4663785696029663 train acc 0.8085664335664335
epoch 5 batch id 1201 loss 0.5443342924118042 train acc 0.8111469608659451
epoch 5 batch id 1401 loss 0.763404905796051 train acc 0.8139944682369736
epoch 5 batch id 1601 loss 0.30409371852874756 train acc 0.8155449718925671
epoch 5 batch id 1801 loss 0.7939116358757019 train acc 0.8165255413659078
epoch 5 batch id 2001 loss 0.7642737627029419 train acc 0.8185126186906547
epoch 5 batch id 2201 loss 0.49552828073501587 train acc 0.8207774875056792
epoch 5 batch id 2401 loss 0.46359017491340637 train acc 0.8229904206580592
epoch 5 batch id 2601 loss 0.638687431812

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 5 test acc 0.6763343703589901


  0%|          | 0/3395 [00:00<?, ?it/s]

epoch 6 batch id 1 loss 0.3354267477989197 train acc 0.875
epoch 6 batch id 201 loss 0.40598466992378235 train acc 0.8532338308457711
epoch 6 batch id 401 loss 0.3074420988559723 train acc 0.8516988778054863
epoch 6 batch id 601 loss 0.46637555956840515 train acc 0.8545133111480865
epoch 6 batch id 801 loss 0.4471226930618286 train acc 0.8554151061173533
epoch 6 batch id 1001 loss 0.3513924479484558 train acc 0.8585789210789211
epoch 6 batch id 1201 loss 0.3365473747253418 train acc 0.8601165695253955
epoch 6 batch id 1401 loss 0.4722123146057129 train acc 0.8622412562455389
epoch 6 batch id 1601 loss 0.3823358416557312 train acc 0.8629177076826983
epoch 6 batch id 1801 loss 0.6051333546638489 train acc 0.8631836479733481
epoch 6 batch id 2001 loss 0.5618377327919006 train acc 0.8644427786106946
epoch 6 batch id 2201 loss 0.39600956439971924 train acc 0.8659842117219446
epoch 6 batch id 2401 loss 0.24185024201869965 train acc 0.867112661391087
epoch 6 batch id 2601 loss 0.5746057033538

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 6 test acc 0.6754509781328417


In [None]:
학습률 e-4, 에포크4) 0.6691936165309571

---

In [6]:
tokenizer = KoBERTTokenizer.from_pretrained('skt/kobert-base-v1')
bertmodel = BertModel.from_pretrained('skt/kobert-base-v1', return_dict=False)
vocab = nlp.vocab.BERTVocab.from_sentencepiece(tokenizer.vocab_file, padding_token='[PAD]')
tok = tokenizer.tokenize

# Setting parameters
max_len = 64    
batch_size = 32 #베이스라인 64
warmup_ratio = 0.1
num_epochs = 6  # 에포크 횟수
max_grad_norm = 1
log_interval = 200
learning_rate = 0.05

# 모델에 사용되는 데이터셋 클래스 정의
class BERTDataset(Dataset):
    def __init__(self, dataset, sent_idx, label_idx, bert_tokenizer,vocab, max_len,
                 pad, pair):
   
        transform = nlp.data.BERTSentenceTransform(
            bert_tokenizer, max_seq_length=max_len,vocab=vocab, pad=pad, pair=pair)
        
        self.sentences = [transform([i[sent_idx]]) for i in dataset]
        self.labels = [np.int32(i[label_idx]) for i in dataset]

    def __getitem__(self, i):
        return (self.sentences[i] + (self.labels[i], ))
         
    def __len__(self):
        return (len(self.labels))
    
# 감성 분류 모델 정의
class BERTClassifier(nn.Module):
    def __init__(self,
                 bert,
                 hidden_size = 768,
                 num_classes=6,     
                 dr_rate=None,
                 params=None):
        super(BERTClassifier, self).__init__()
        self.bert = bert
        self.dr_rate = dr_rate
                 
        self.classifier = nn.Linear(hidden_size , num_classes)
        if dr_rate:
            self.dropout = nn.Dropout(p=dr_rate)
    
    def gen_attention_mask(self, token_ids, valid_length):
        attention_mask = torch.zeros_like(token_ids)
        for i, v in enumerate(valid_length):
            attention_mask[i][:v] = 1
        return attention_mask.float()

    def forward(self, token_ids, valid_length, segment_ids):
        attention_mask = self.gen_attention_mask(token_ids, valid_length)
        
        _, pooler = self.bert(input_ids = token_ids, token_type_ids = segment_ids.long(), attention_mask = attention_mask.float().to(token_ids.device))
        if self.dr_rate:
            out = self.dropout(pooler)
        return self.classifier(out)

In [7]:
# 모델 학습에 사용할 데이터셋을 [data, label] 배열로 피팅
train_set_data = [[i, str(j)] for i, j in zip(train_set['sentence'], train_set['emotion'])]

# sklearn 의 train_test_split 모듈-> 4:1로 학습&검증 데이터를 분류 
train_set_data, test_set_data = train_test_split(train_set_data, test_size = 0.2, random_state=0)

# 데이터셋을 Bert모델에 입력할 수 있게 변환
train_set_data = BERTDataset(train_set_data, 0, 1, tok, vocab, max_len, True, False)
test_set_data = BERTDataset(test_set_data, 0, 1, tok, vocab, max_len, True, False)

# 배치데이터셋 생성
train_dataloader = torch.utils.data.DataLoader(train_set_data, batch_size=batch_size, num_workers=0)    # num_workers: 데이터 로딩할때 쓰는 프로세스 수(로딩속도)
test_dataloader = torch.utils.data.DataLoader(test_set_data, batch_size=batch_size, num_workers=0)

# 모델 선언
model = BERTClassifier(bertmodel, dr_rate=0.5).to(device)
no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
optimizer = AdamW(optimizer_grouped_parameters, lr=learning_rate)
loss_fn = nn.CrossEntropyLoss()
t_total = len(train_dataloader) * num_epochs
warmup_step = int(t_total * warmup_ratio)
scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=warmup_step, num_training_steps=t_total)

# 정확도 계산
def calc_accuracy(X,Y):
    max_vals, max_indices = torch.max(X, 1)
    train_acc = (max_indices == Y).sum().data.cpu().numpy()/max_indices.size()[0]
    return train_acc

# 예측 반환
def predict(sentence):
    dataset = [[sentence, '0']]
    test = BERTDataset(dataset, 0, 1, tok, vocab, max_len, True, False)
    test_dataloader = torch.utils.data.DataLoader(test, batch_size=batch_size, num_workers=0)  #로컬에서는 디폴트(0)으로 수정
    model.eval()
    answer = 0
    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(test_dataloader):
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length= valid_length
        label = label.long().to(device)
        out = model(token_ids, valid_length, segment_ids)
        for logits in out:
            logits = logits.detach().cpu().numpy()
            answer = np.argmax(logits)
    return answer

In [8]:
for e in range(num_epochs):
    train_acc = 0.0
    test_acc = 0.0
    model.train()
    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(tqdm(train_dataloader)):
        optimizer.zero_grad()
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length= valid_length
        label = label.long().to(device)
        out = model(token_ids, valid_length, segment_ids)
        loss = loss_fn(out, label)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
        optimizer.step()
        scheduler.step()  # Update learning rate schedule
        train_acc += calc_accuracy(out, label)
        if batch_id % log_interval == 0:
            print("epoch {} batch id {} loss {} train acc {}".format(e+1, batch_id+1, loss.data.cpu().numpy(), train_acc / (batch_id+1)))
    print("epoch {} train acc {}".format(e+1, train_acc / (batch_id+1)))
    model.eval()
    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(tqdm(test_dataloader)):   # 아까 만든 테스트 배치 데이터 - 정확도 측정
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length= valid_length
        label = label.long().to(device)
        out = model(token_ids, valid_length, segment_ids)
        test_acc += calc_accuracy(out, label)
    print("epoch {} test acc {}".format(e+1, test_acc / (batch_id+1)))

  0%|          | 0/3395 [00:00<?, ?it/s]

epoch 1 batch id 1 loss 1.8784935474395752 train acc 0.125
epoch 1 batch id 201 loss 1.6341322660446167 train acc 0.2691231343283582
epoch 1 batch id 401 loss 1.7473065853118896 train acc 0.2627805486284289
epoch 1 batch id 601 loss 1.9483098983764648 train acc 0.2621672212978369
epoch 1 batch id 801 loss 1.8602584600448608 train acc 0.25858302122347065
epoch 1 batch id 1001 loss 2.070012331008911 train acc 0.2563998501498502
epoch 1 batch id 1201 loss 3.2637345790863037 train acc 0.2505203996669442
epoch 1 batch id 1401 loss 3.4350531101226807 train acc 0.24386598857958602
epoch 1 batch id 1601 loss 2.602599859237671 train acc 0.23826905059337913
epoch 1 batch id 1801 loss 13.998085975646973 train acc 0.23349875069405884
epoch 1 batch id 2001 loss 13.515995979309082 train acc 0.23024425287356323
epoch 1 batch id 2201 loss 10.79781436920166 train acc 0.22743923216719672
epoch 1 batch id 2401 loss 9.103242874145508 train acc 0.22573927530195753
epoch 1 batch id 2601 loss 13.108366012573

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 1 test acc 0.332041852307062


  0%|          | 0/3395 [00:00<?, ?it/s]

epoch 2 batch id 1 loss 11.930437088012695 train acc 0.25
epoch 2 batch id 201 loss 8.03327465057373 train acc 0.19760572139303484
epoch 2 batch id 401 loss 8.71690845489502 train acc 0.1987998753117207
epoch 2 batch id 601 loss 8.592778205871582 train acc 0.19581946755407653
epoch 2 batch id 801 loss 11.336365699768066 train acc 0.1970583645443196
epoch 2 batch id 1001 loss 5.910654067993164 train acc 0.1981143856143856
epoch 2 batch id 1201 loss 10.445281982421875 train acc 0.19764779350541214
epoch 2 batch id 1401 loss 6.336135387420654 train acc 0.1982735546038544
epoch 2 batch id 1601 loss 10.253129959106445 train acc 0.19897720174890693
epoch 2 batch id 1801 loss 7.217033386230469 train acc 0.19784147695724597
epoch 2 batch id 2001 loss 7.630019664764404 train acc 0.19730759620189905
epoch 2 batch id 2201 loss 7.626842975616455 train acc 0.19699852339845525
epoch 2 batch id 2401 loss 7.371237277984619 train acc 0.19741774260724698
epoch 2 batch id 2601 loss 9.045645713806152 trai

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 2 test acc 0.332041852307062


  0%|          | 0/3395 [00:00<?, ?it/s]

epoch 3 batch id 1 loss 8.392675399780273 train acc 0.34375
epoch 3 batch id 201 loss 6.370953559875488 train acc 0.19838308457711443
epoch 3 batch id 401 loss 9.300577163696289 train acc 0.1996571072319202
epoch 3 batch id 601 loss 6.4138665199279785 train acc 0.20148710482529117
epoch 3 batch id 801 loss 8.092765808105469 train acc 0.2016619850187266
epoch 3 batch id 1001 loss 5.32517671585083 train acc 0.2024537962037962
epoch 3 batch id 1201 loss 10.068965911865234 train acc 0.2016548709408826
epoch 3 batch id 1401 loss 9.406394004821777 train acc 0.20110635260528195
epoch 3 batch id 1601 loss 5.392395973205566 train acc 0.2005972829481574
epoch 3 batch id 1801 loss 6.14544677734375 train acc 0.20037479178234313
epoch 3 batch id 2001 loss 7.151872158050537 train acc 0.20039980009995004
epoch 3 batch id 2201 loss 5.616616249084473 train acc 0.19989493412085416
epoch 3 batch id 2401 loss 3.8167219161987305 train acc 0.20005987088713037
epoch 3 batch id 2601 loss 8.663488388061523 tra

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 3 test acc 0.11707027449172941


  0%|          | 0/3395 [00:00<?, ?it/s]

epoch 4 batch id 1 loss 8.612787246704102 train acc 0.125
epoch 4 batch id 201 loss 4.565395832061768 train acc 0.19496268656716417
epoch 4 batch id 401 loss 5.748716354370117 train acc 0.19950124688279303
epoch 4 batch id 601 loss 5.079095363616943 train acc 0.1975873544093178
epoch 4 batch id 801 loss 6.069664001464844 train acc 0.19975031210986266
epoch 4 batch id 1001 loss 8.04370403289795 train acc 0.20248501498501498
epoch 4 batch id 1201 loss 4.092875957489014 train acc 0.20121253122398003
epoch 4 batch id 1401 loss 4.969269752502441 train acc 0.20117326909350464
epoch 4 batch id 1601 loss 5.827573776245117 train acc 0.20071439725171766
epoch 4 batch id 1801 loss 6.111551284790039 train acc 0.200461549139367
epoch 4 batch id 2001 loss 5.749558925628662 train acc 0.20100887056471764
epoch 4 batch id 2201 loss 5.1712727546691895 train acc 0.1997103589277601
epoch 4 batch id 2401 loss 3.429074287414551 train acc 0.2007887338608913
epoch 4 batch id 2601 loss 6.8730621337890625 train

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 4 test acc 0.332041852307062


  0%|          | 0/3395 [00:00<?, ?it/s]

epoch 5 batch id 1 loss 3.1780529022216797 train acc 0.25
epoch 5 batch id 201 loss 3.5592195987701416 train acc 0.20864427860696516
epoch 5 batch id 401 loss 3.3476760387420654 train acc 0.20643703241895262
epoch 5 batch id 601 loss 3.587517261505127 train acc 0.20564683860232946
epoch 5 batch id 801 loss 3.9019222259521484 train acc 0.20657771535580524
epoch 5 batch id 1001 loss 2.2047529220581055 train acc 0.2081356143856144
epoch 5 batch id 1201 loss 2.800680637359619 train acc 0.20618234804329724
epoch 5 batch id 1401 loss 3.0531346797943115 train acc 0.2061250892219843
epoch 5 batch id 1601 loss 2.663379669189453 train acc 0.20664818863210493
epoch 5 batch id 1801 loss 2.9226787090301514 train acc 0.20693364797334815
epoch 5 batch id 2001 loss 2.842444896697998 train acc 0.2070995752123938
epoch 5 batch id 2201 loss 2.2258388996124268 train acc 0.20724954566106316
epoch 5 batch id 2401 loss 2.091364860534668 train acc 0.2082725947521866
epoch 5 batch id 2601 loss 2.79256200790405

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 5 test acc 0.1566244814871716


  0%|          | 0/3395 [00:00<?, ?it/s]

epoch 6 batch id 1 loss 2.483792304992676 train acc 0.15625
epoch 6 batch id 201 loss 2.037726879119873 train acc 0.23149875621890548
epoch 6 batch id 401 loss 2.096045732498169 train acc 0.23145261845386533
epoch 6 batch id 601 loss 1.7864364385604858 train acc 0.23247712146422628
epoch 6 batch id 801 loss 1.9927068948745728 train acc 0.23431647940074907
epoch 6 batch id 1001 loss 1.6718286275863647 train acc 0.23944805194805194
epoch 6 batch id 1201 loss 1.7539863586425781 train acc 0.24180370524562864
epoch 6 batch id 1401 loss 1.9316866397857666 train acc 0.2434421841541756
epoch 6 batch id 1601 loss 1.7280086278915405 train acc 0.24611570893191756
epoch 6 batch id 1801 loss 1.8776930570602417 train acc 0.2484730705163798
epoch 6 batch id 2001 loss 1.7919366359710693 train acc 0.25181159420289856
epoch 6 batch id 2201 loss 1.7567132711410522 train acc 0.2553668786915039
epoch 6 batch id 2401 loss 1.6685898303985596 train acc 0.2593450645564348
epoch 6 batch id 2601 loss 1.736379384

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 6 test acc 0.332041852307062


학습률 0.05 , 에포크
(발산)

---

__학습률5e-5, 에포크3__

In [6]:
tokenizer = KoBERTTokenizer.from_pretrained('skt/kobert-base-v1')
bertmodel = BertModel.from_pretrained('skt/kobert-base-v1', return_dict=False)
vocab = nlp.vocab.BERTVocab.from_sentencepiece(tokenizer.vocab_file, padding_token='[PAD]')
tok = tokenizer.tokenize

# Setting parameters
max_len = 64    
batch_size = 32 #베이스라인 64
warmup_ratio = 0.1
num_epochs = 3  # 에포크 횟수
max_grad_norm = 1
log_interval = 200
learning_rate = 5e-5

# 모델에 사용되는 데이터셋 클래스 정의
class BERTDataset(Dataset):
    def __init__(self, dataset, sent_idx, label_idx, bert_tokenizer,vocab, max_len,
                 pad, pair):
   
        transform = nlp.data.BERTSentenceTransform(
            bert_tokenizer, max_seq_length=max_len,vocab=vocab, pad=pad, pair=pair)
        
        self.sentences = [transform([i[sent_idx]]) for i in dataset]
        self.labels = [np.int32(i[label_idx]) for i in dataset]

    def __getitem__(self, i):
        return (self.sentences[i] + (self.labels[i], ))
         
    def __len__(self):
        return (len(self.labels))
    
# 감성 분류 모델 정의
class BERTClassifier(nn.Module):
    def __init__(self,
                 bert,
                 hidden_size = 768,
                 num_classes=6,     
                 dr_rate=None,
                 params=None):
        super(BERTClassifier, self).__init__()
        self.bert = bert
        self.dr_rate = dr_rate
                 
        self.classifier = nn.Linear(hidden_size , num_classes)
        if dr_rate:
            self.dropout = nn.Dropout(p=dr_rate)
    
    def gen_attention_mask(self, token_ids, valid_length):
        attention_mask = torch.zeros_like(token_ids)
        for i, v in enumerate(valid_length):
            attention_mask[i][:v] = 1
        return attention_mask.float()

    def forward(self, token_ids, valid_length, segment_ids):
        attention_mask = self.gen_attention_mask(token_ids, valid_length)
        
        _, pooler = self.bert(input_ids = token_ids, token_type_ids = segment_ids.long(), attention_mask = attention_mask.float().to(token_ids.device))
        if self.dr_rate:
            out = self.dropout(pooler)
        return self.classifier(out)

In [7]:
# 모델 학습에 사용할 데이터셋을 [data, label] 배열로 피팅
train_set_data = [[i, str(j)] for i, j in zip(train_set['sentence'], train_set['emotion'])]

# sklearn 의 train_test_split 모듈-> 4:1로 학습&검증 데이터를 분류 
train_set_data, test_set_data = train_test_split(train_set_data, test_size = 0.2, random_state=0)

# 데이터셋을 Bert모델에 입력할 수 있게 변환
train_set_data = BERTDataset(train_set_data, 0, 1, tok, vocab, max_len, True, False)
test_set_data = BERTDataset(test_set_data, 0, 1, tok, vocab, max_len, True, False)

# 배치데이터셋 생성
train_dataloader = torch.utils.data.DataLoader(train_set_data, batch_size=batch_size, num_workers=0)    # num_workers: 데이터 로딩할때 쓰는 프로세스 수(로딩속도)
test_dataloader = torch.utils.data.DataLoader(test_set_data, batch_size=batch_size, num_workers=0)

# 모델 선언
model = BERTClassifier(bertmodel, dr_rate=0.5).to(device)
no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
optimizer = AdamW(optimizer_grouped_parameters, lr=learning_rate)
loss_fn = nn.CrossEntropyLoss()
t_total = len(train_dataloader) * num_epochs
warmup_step = int(t_total * warmup_ratio)
scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=warmup_step, num_training_steps=t_total)

# 정확도 계산
def calc_accuracy(X,Y):
    max_vals, max_indices = torch.max(X, 1)
    train_acc = (max_indices == Y).sum().data.cpu().numpy()/max_indices.size()[0]
    return train_acc

# 예측 반환
def predict(sentence):
    dataset = [[sentence, '0']]
    test = BERTDataset(dataset, 0, 1, tok, vocab, max_len, True, False)
    test_dataloader = torch.utils.data.DataLoader(test, batch_size=batch_size, num_workers=0)  #로컬에서는 디폴트(0)으로 수정
    model.eval()
    answer = 0
    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(test_dataloader):
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length= valid_length
        label = label.long().to(device)
        out = model(token_ids, valid_length, segment_ids)
        for logits in out:
            logits = logits.detach().cpu().numpy()
            answer = np.argmax(logits)
    return answer

In [8]:
train_accuarcy, test_accuarcy = [], []

for e in range(num_epochs):
    train_acc = 0.0
    test_acc = 0.0
    model.train()
    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(tqdm(train_dataloader)):
        optimizer.zero_grad()
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length= valid_length
        label = label.long().to(device)
        out = model(token_ids, valid_length, segment_ids)
        loss = loss_fn(out, label)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
        optimizer.step()
        scheduler.step()  # Update learning rate schedule
        train_acc += calc_accuracy(out, label)
        if batch_id % log_interval == 0:
            print("epoch {} batch id {} loss {} train acc {}".format(e+1, batch_id+1, loss.data.cpu().numpy(), train_acc / (batch_id+1)))
    train_accuarcy.append(train_acc / (batch_id+1))
    print("epoch {} train acc {}".format(e+1, train_acc / (batch_id+1)))
    model.eval()
    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(tqdm(test_dataloader)):   # 아까 만든 테스트 배치 데이터 - 정확도 측정
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length= valid_length
        label = label.long().to(device)
        out = model(token_ids, valid_length, segment_ids)
        test_acc += calc_accuracy(out, label)
    test_accuarcy.append(test_acc / (batch_id+1))
    print("epoch {} test acc {}".format(e+1, test_acc / (batch_id+1)))

  0%|          | 0/3395 [00:00<?, ?it/s]

epoch 1 batch id 1 loss 1.8545067310333252 train acc 0.15625
epoch 1 batch id 201 loss 1.527410626411438 train acc 0.28280472636815923
epoch 1 batch id 401 loss 1.1838306188583374 train acc 0.3892612219451372
epoch 1 batch id 601 loss 1.1383438110351562 train acc 0.45450291181364394
epoch 1 batch id 801 loss 1.0952427387237549 train acc 0.48892009987515606
epoch 1 batch id 1001 loss 1.0606321096420288 train acc 0.5161713286713286
epoch 1 batch id 1201 loss 1.0103175640106201 train acc 0.5336698584512906
epoch 1 batch id 1401 loss 1.1667410135269165 train acc 0.5482913990007138
epoch 1 batch id 1601 loss 0.9199232459068298 train acc 0.5585961898813242
epoch 1 batch id 1801 loss 1.1422905921936035 train acc 0.5668031649083842
epoch 1 batch id 2001 loss 1.1484204530715942 train acc 0.574197276361819
epoch 1 batch id 2201 loss 0.9804944396018982 train acc 0.5814260563380281
epoch 1 batch id 2401 loss 1.1562060117721558 train acc 0.5875026030820492
epoch 1 batch id 2601 loss 1.0403692722320

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 1 test acc 0.6740378706406513


  0%|          | 0/3395 [00:00<?, ?it/s]

epoch 2 batch id 1 loss 0.7684194445610046 train acc 0.6875
epoch 2 batch id 201 loss 1.0111333131790161 train acc 0.6694651741293532
epoch 2 batch id 401 loss 0.843646764755249 train acc 0.6659912718204489
epoch 2 batch id 601 loss 0.9792618155479431 train acc 0.668053244592346
epoch 2 batch id 801 loss 0.8164739012718201 train acc 0.6707240948813983
epoch 2 batch id 1001 loss 0.7771753072738647 train acc 0.6764173326673326
epoch 2 batch id 1201 loss 1.0254918336868286 train acc 0.6795378850957535
epoch 2 batch id 1401 loss 1.0714303255081177 train acc 0.6831727337615988
epoch 2 batch id 1601 loss 0.7517008185386658 train acc 0.6855090568394753
epoch 2 batch id 1801 loss 0.9379293918609619 train acc 0.6876214602998334
epoch 2 batch id 2001 loss 0.8575830459594727 train acc 0.6900456021989005
epoch 2 batch id 2201 loss 0.8544245362281799 train acc 0.6931224443434802
epoch 2 batch id 2401 loss 0.8986386060714722 train acc 0.695192107455227
epoch 2 batch id 2601 loss 0.8617163300514221 t

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 2 test acc 0.6905646668715112


  0%|          | 0/3395 [00:00<?, ?it/s]

epoch 3 batch id 1 loss 0.6888160705566406 train acc 0.71875
epoch 3 batch id 201 loss 0.7808974385261536 train acc 0.7492226368159204
epoch 3 batch id 401 loss 0.6058851480484009 train acc 0.742752493765586
epoch 3 batch id 601 loss 0.7424806952476501 train acc 0.7453202995008319
epoch 3 batch id 801 loss 0.7666810750961304 train acc 0.7468398876404494
epoch 3 batch id 1001 loss 0.6716791987419128 train acc 0.7514985014985015
epoch 3 batch id 1201 loss 0.7556205987930298 train acc 0.7546835970024979
epoch 3 batch id 1401 loss 0.6293813586235046 train acc 0.7579853675945754
epoch 3 batch id 1601 loss 0.5433908700942993 train acc 0.7600718301061836
epoch 3 batch id 1801 loss 0.6680213809013367 train acc 0.7614519711271516
epoch 3 batch id 2001 loss 0.7848539352416992 train acc 0.7631028235882059
epoch 3 batch id 2201 loss 0.629702091217041 train acc 0.7656746933212176
epoch 3 batch id 2401 loss 0.6801443696022034 train acc 0.7678701582673886
epoch 3 batch id 2601 loss 0.8584754467010498

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 3 test acc 0.6939654068725355


In [9]:
train_accuarcy, test_accuarcy

([0.6073607020127639, 0.7052804369170349, 0.7728184830633285],
 [0.6740378706406513, 0.6905646668715112, 0.6939654068725355])

In [10]:
torch.save(model, 'model/kobert-v6.pt')

In [11]:
# 모델 사이즈 확인
import os

model_path = 'model/kobert-v6.pt'
size = os.path.getsize(model_path) / (1024*1024) # mb 단위
print(f"Model size: {size:.2f} MB")

Model size: 351.79 MB
