In [1]:
# 런타임 애러 방지
import gc
import torch

gc.collect()
torch.cuda.empty_cache()

In [2]:
import pandas as pd

train_set = pd.read_csv('data/병합데이터셋-v3b.csv', index_col=0) 

# 감정을 정수 라벨로 변경
emotions = {'행복': 0, '불안': 1, '놀람': 2, '슬픔': 3, '분노': 4, '중립': 5 }
train_set['emotion'] = train_set.emotion.map(emotions)

train_set.sample(n=5)

Unnamed: 0,sentence,emotion
180,벌써 1년이 되어가네요시간이 참 빨라요!!,2
47700,선발진은 거의 두산 맞먹네.,2
101674,"너무 맛있을 거 같아요, 할머님.",0
20011,미래가 비참하지 않으려면 지금 어느 정도 모아놓아야 할 텐데 걱정이야.,3
87099,고객층이 정~말 다양하시네요?,5


In [3]:
# 라이브러리 불러오기
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split
import gluonnlp as nlp

#kobert
from kobert_tokenizer import KoBERTTokenizer

# transformers
from transformers import BertModel
from transformers import AdamW
from transformers.optimization import get_cosine_schedule_with_warmup

# GPU 사용시 필요
device = torch.device("cuda")

In [4]:
tokenizer = KoBERTTokenizer.from_pretrained('skt/kobert-base-v1')
bertmodel = BertModel.from_pretrained('skt/kobert-base-v1', return_dict=False)
vocab = nlp.vocab.BERTVocab.from_sentencepiece(tokenizer.vocab_file, padding_token='[PAD]')
tok = tokenizer.tokenize

# Setting parameters
max_len = 64    
batch_size = 32 #베이스라인 64
warmup_ratio = 0.1
num_epochs = 5  # 에포크 횟수
max_grad_norm = 1
log_interval = 200
learning_rate = 5e-5

# 모델에 사용되는 데이터셋 클래스 정의
class BERTDataset(Dataset):
    def __init__(self, dataset, sent_idx, label_idx, bert_tokenizer,vocab, max_len,
                 pad, pair):
   
        transform = nlp.data.BERTSentenceTransform(
            bert_tokenizer, max_seq_length=max_len,vocab=vocab, pad=pad, pair=pair)
        
        self.sentences = [transform([i[sent_idx]]) for i in dataset]
        self.labels = [np.int32(i[label_idx]) for i in dataset]

    def __getitem__(self, i):
        return (self.sentences[i] + (self.labels[i], ))
         
    def __len__(self):
        return (len(self.labels))
    
# 감성 분류 모델 정의
class BERTClassifier(nn.Module):
    def __init__(self,
                 bert,
                 hidden_size = 768,
                 num_classes=6,     
                 dr_rate=None,
                 params=None):
        super(BERTClassifier, self).__init__()
        self.bert = bert
        self.dr_rate = dr_rate
                 
        self.classifier = nn.Linear(hidden_size , num_classes)
        if dr_rate:
            self.dropout = nn.Dropout(p=dr_rate)
    
    def gen_attention_mask(self, token_ids, valid_length):
        attention_mask = torch.zeros_like(token_ids)
        for i, v in enumerate(valid_length):
            attention_mask[i][:v] = 1
        return attention_mask.float()

    def forward(self, token_ids, valid_length, segment_ids):
        attention_mask = self.gen_attention_mask(token_ids, valid_length)
        
        _, pooler = self.bert(input_ids = token_ids, token_type_ids = segment_ids.long(), attention_mask = attention_mask.float().to(token_ids.device))
        if self.dr_rate:
            out = self.dropout(pooler)
        return self.classifier(out)

In [5]:
# 모델 학습에 사용할 데이터셋을 [data, label] 배열로 피팅
train_set_data = [[i, str(j)] for i, j in zip(train_set['sentence'], train_set['emotion'])]

# sklearn 의 train_test_split 모듈-> 4:1로 학습&검증 데이터를 분류 
train_set_data, test_set_data = train_test_split(train_set_data, test_size = 0.2, random_state=0)

# 데이터셋을 Bert모델에 입력할 수 있게 변환
train_set_data = BERTDataset(train_set_data, 0, 1, tok, vocab, max_len, True, False)
test_set_data = BERTDataset(test_set_data, 0, 1, tok, vocab, max_len, True, False)

# 배치데이터셋 생성
train_dataloader = torch.utils.data.DataLoader(train_set_data, batch_size=batch_size, num_workers=0)    # num_workers: 데이터 로딩할때 쓰는 프로세스 수(로딩속도)
test_dataloader = torch.utils.data.DataLoader(test_set_data, batch_size=batch_size, num_workers=0)

# 모델 선언
model = BERTClassifier(bertmodel, dr_rate=0.5).to(device)
no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
optimizer = AdamW(optimizer_grouped_parameters, lr=learning_rate)
loss_fn = nn.CrossEntropyLoss()
t_total = len(train_dataloader) * num_epochs
warmup_step = int(t_total * warmup_ratio)
scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=warmup_step, num_training_steps=t_total)

# 정확도 계산
def calc_accuracy(X,Y):
    max_vals, max_indices = torch.max(X, 1)
    train_acc = (max_indices == Y).sum().data.cpu().numpy()/max_indices.size()[0]
    return train_acc

# 예측 반환
def predict(sentence):
    dataset = [[sentence, '0']]
    test = BERTDataset(dataset, 0, 1, tok, vocab, max_len, True, False)
    test_dataloader = torch.utils.data.DataLoader(test, batch_size=batch_size, num_workers=0)  #로컬에서는 디폴트(0)으로 수정
    model.eval()
    answer = 0
    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(test_dataloader):
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length= valid_length
        label = label.long().to(device)
        out = model(token_ids, valid_length, segment_ids)
        for logits in out:
            logits = logits.detach().cpu().numpy()
            answer = np.argmax(logits)
    return answer

In [6]:
train_accuarcy, test_accuarcy = [], []

for e in range(num_epochs):
    train_acc = 0.0
    test_acc = 0.0
    model.train()
    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(tqdm(train_dataloader)):
        optimizer.zero_grad()
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length= valid_length
        label = label.long().to(device)
        out = model(token_ids, valid_length, segment_ids)
        loss = loss_fn(out, label)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
        optimizer.step()
        scheduler.step()  # Update learning rate schedule
        train_acc += calc_accuracy(out, label)
        if batch_id % log_interval == 0:
            print("epoch {} batch id {} loss {} train acc {}".format(e+1, batch_id+1, loss.data.cpu().numpy(), train_acc / (batch_id+1)))
    train_accuarcy.append(train_acc / (batch_id+1))
    print("epoch {} train acc {}".format(e+1, train_acc / (batch_id+1)))
    model.eval()
    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(tqdm(test_dataloader)):   # 아까 만든 테스트 배치 데이터 - 정확도 측정
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length= valid_length
        label = label.long().to(device)
        out = model(token_ids, valid_length, segment_ids)
        test_acc += calc_accuracy(out, label)
    test_accuarcy.append(test_acc / (batch_id+1))
    print("epoch {} test acc {}".format(e+1, test_acc / (batch_id+1)))

  0%|          | 0/3394 [00:00<?, ?it/s]

epoch 1 batch id 1 loss 1.8405989408493042 train acc 0.15625
epoch 1 batch id 201 loss 1.6611812114715576 train acc 0.24984452736318408
epoch 1 batch id 401 loss 1.1772855520248413 train acc 0.33930798004987534
epoch 1 batch id 601 loss 1.2624648809432983 train acc 0.40370216306156403
epoch 1 batch id 801 loss 1.2549487352371216 train acc 0.44877496878901374
epoch 1 batch id 1001 loss 1.0199912786483765 train acc 0.47986388611388614
epoch 1 batch id 1201 loss 1.03671133518219 train acc 0.5033825978351374
epoch 1 batch id 1401 loss 0.7906304001808167 train acc 0.5221270521056388
epoch 1 batch id 1601 loss 0.8985372185707092 train acc 0.534548719550281
epoch 1 batch id 1801 loss 0.6591906547546387 train acc 0.5436910049972238
epoch 1 batch id 2001 loss 0.9145181775093079 train acc 0.5524112943528235
epoch 1 batch id 2201 loss 0.8111550211906433 train acc 0.5603276919582009
epoch 1 batch id 2401 loss 0.9330563545227051 train acc 0.5671204706372345
epoch 1 batch id 2601 loss 0.981473863124

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 1 test acc 0.6622717903415782


  0%|          | 0/3394 [00:00<?, ?it/s]

epoch 2 batch id 1 loss 1.498046875 train acc 0.375
epoch 2 batch id 201 loss 1.09908127784729 train acc 0.6565609452736318
epoch 2 batch id 401 loss 0.7351030707359314 train acc 0.6617051122194514
epoch 2 batch id 601 loss 0.9609889984130859 train acc 0.6616056572379367
epoch 2 batch id 801 loss 0.9599224925041199 train acc 0.6629993757802747
epoch 2 batch id 1001 loss 0.8492099046707153 train acc 0.6629932567432567
epoch 2 batch id 1201 loss 0.8906266689300537 train acc 0.6669702331390508
epoch 2 batch id 1401 loss 0.723548412322998 train acc 0.6704809064953604
epoch 2 batch id 1601 loss 0.7749738097190857 train acc 0.6726850405996252
epoch 2 batch id 1801 loss 0.4804736375808716 train acc 0.6733064963908939
epoch 2 batch id 2001 loss 0.854459285736084 train acc 0.6754591454272864
epoch 2 batch id 2201 loss 0.650543749332428 train acc 0.6786261926397092
epoch 2 batch id 2401 loss 0.7461828589439392 train acc 0.6806538942107455
epoch 2 batch id 2601 loss 0.7593034505844116 train acc 0

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 2 test acc 0.6741975853945819


  0%|          | 0/3394 [00:00<?, ?it/s]

epoch 3 batch id 1 loss 1.2704143524169922 train acc 0.5
epoch 3 batch id 201 loss 0.95688796043396 train acc 0.7083333333333334
epoch 3 batch id 401 loss 0.4161251485347748 train acc 0.720464463840399
epoch 3 batch id 601 loss 0.6803663372993469 train acc 0.7237936772046589
epoch 3 batch id 801 loss 0.9167876839637756 train acc 0.7260065543071161
epoch 3 batch id 1001 loss 0.6575143337249756 train acc 0.7262112887112887
epoch 3 batch id 1201 loss 0.5818576216697693 train acc 0.7302508326394671
epoch 3 batch id 1401 loss 0.5183041095733643 train acc 0.7334716274089935
epoch 3 batch id 1601 loss 0.6896904110908508 train acc 0.7368246408494691
epoch 3 batch id 1801 loss 0.34975388646125793 train acc 0.7378886729594669
epoch 3 batch id 2001 loss 0.6799705028533936 train acc 0.7398332083958021
epoch 3 batch id 2201 loss 0.4767628014087677 train acc 0.7427163789186734
epoch 3 batch id 2401 loss 0.5166285037994385 train acc 0.7449109745939192
epoch 3 batch id 2601 loss 0.6827917695045471 tra

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 3 test acc 0.6826855123674912


  0%|          | 0/3394 [00:00<?, ?it/s]

epoch 4 batch id 1 loss 0.8032402992248535 train acc 0.6875
epoch 4 batch id 201 loss 1.0090919733047485 train acc 0.7756529850746269
epoch 4 batch id 401 loss 0.3548327088356018 train acc 0.7813279301745636
epoch 4 batch id 601 loss 0.5835792422294617 train acc 0.7855137271214643
epoch 4 batch id 801 loss 0.8195212483406067 train acc 0.7874141697877652
epoch 4 batch id 1001 loss 0.4579055905342102 train acc 0.7895854145854145
epoch 4 batch id 1201 loss 0.4711407423019409 train acc 0.7932972522897586
epoch 4 batch id 1401 loss 0.42371252179145813 train acc 0.7968415417558886
epoch 4 batch id 1601 loss 0.3355950713157654 train acc 0.7999102123672704
epoch 4 batch id 1801 loss 0.22865699231624603 train acc 0.8010480288728484
epoch 4 batch id 2001 loss 0.5283963680267334 train acc 0.8030515992003998
epoch 4 batch id 2201 loss 0.3444550931453705 train acc 0.8052873693775556
epoch 4 batch id 2401 loss 0.3987598419189453 train acc 0.8073068513119533
epoch 4 batch id 2601 loss 0.4600212275981

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 4 test acc 0.682243816254417


  0%|          | 0/3394 [00:00<?, ?it/s]

epoch 5 batch id 1 loss 0.4588736891746521 train acc 0.78125
epoch 5 batch id 201 loss 0.7597526907920837 train acc 0.8337997512437811
epoch 5 batch id 401 loss 0.16679087281227112 train acc 0.8405548628428927
epoch 5 batch id 601 loss 0.5324738025665283 train acc 0.8410981697171381
epoch 5 batch id 801 loss 0.6838040351867676 train acc 0.8412921348314607
epoch 5 batch id 1001 loss 0.43277183175086975 train acc 0.8419393106893107
epoch 5 batch id 1201 loss 0.44571810960769653 train acc 0.8434637801831807
epoch 5 batch id 1401 loss 0.22327153384685516 train acc 0.8455121341898644
epoch 5 batch id 1601 loss 0.3069186210632324 train acc 0.8475757339163024
epoch 5 batch id 1801 loss 0.13263973593711853 train acc 0.8483654913936702
epoch 5 batch id 2001 loss 0.38930419087409973 train acc 0.8490129935032483
epoch 5 batch id 2201 loss 0.4218185842037201 train acc 0.8498125851885506
epoch 5 batch id 2401 loss 0.2279215157032013 train acc 0.8509345064556435
epoch 5 batch id 2601 loss 0.55704092

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 5 test acc 0.6777826855123675


In [7]:
train_accuarcy, test_accuarcy

([0.5915770477312905,
  0.6921129198585739,
  0.7563439157336476,
  0.8160356511490866,
  0.8555631261048909],
 [0.6622717903415782,
  0.6741975853945819,
  0.6826855123674912,
  0.682243816254417,
  0.6777826855123675])

---

In [1]:
# 런타임 애러 방지
import gc
import torch

gc.collect()
torch.cuda.empty_cache()

In [2]:
import pandas as pd

train_set = pd.read_csv('data/병합데이터셋-v4.csv', index_col=0) 

# 감정을 정수 라벨로 변경
emotions = {'행복': 0, '불안': 1, '놀람': 2, '슬픔': 3, '분노': 4, '중립': 5 }
train_set['emotion'] = train_set.emotion.map(emotions)

train_set.sample(n=5)

Unnamed: 0,sentence,emotion
99498,담화할 시간은 있고 검찰조사 받을 시간은 없고?,4
24242,"치이... 음악제 나간다는 곡 다 만들었어? 다 만들면 들어봐주기로 했잖아, 내가.",5
65250,나는 부모님이 사소한 일에도 이래라저래라해서 너무 신경 쓰여.,3
69867,학교가 적성에 안 맞는데 진지하게 자퇴를 해야 할지 그냥 다녀야 할지 혼란스러워.,2
19613,촛불집회로 청와대 검찰까지 점령하자,5


In [3]:
# 라이브러리 불러오기
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split
import gluonnlp as nlp

#kobert
from kobert_tokenizer import KoBERTTokenizer

# transformers
from transformers import BertModel
from transformers import AdamW
from transformers.optimization import get_cosine_schedule_with_warmup

# GPU 사용시 필요
device = torch.device("cuda")

In [6]:
tokenizer = KoBERTTokenizer.from_pretrained('skt/kobert-base-v1')
bertmodel = BertModel.from_pretrained('skt/kobert-base-v1', return_dict=False)
vocab = nlp.vocab.BERTVocab.from_sentencepiece(tokenizer.vocab_file, padding_token='[PAD]')
tok = tokenizer.tokenize

# Setting parameters
max_len = 64    
batch_size = 32 #베이스라인 64
warmup_ratio = 0.1
num_epochs = 5  # 에포크 횟수
max_grad_norm = 1
log_interval = 200
learning_rate = 5e-5

# 모델에 사용되는 데이터셋 클래스 정의
class BERTDataset(Dataset):
    def __init__(self, dataset, sent_idx, label_idx, bert_tokenizer,vocab, max_len,
                 pad, pair):
   
        transform = nlp.data.BERTSentenceTransform(
            bert_tokenizer, max_seq_length=max_len,vocab=vocab, pad=pad, pair=pair)
        
        self.sentences = [transform([i[sent_idx]]) for i in dataset]
        self.labels = [np.int32(i[label_idx]) for i in dataset]

    def __getitem__(self, i):
        return (self.sentences[i] + (self.labels[i], ))
         
    def __len__(self):
        return (len(self.labels))
    
# 감성 분류 모델 정의
class BERTClassifier(nn.Module):
    def __init__(self,
                 bert,
                 hidden_size = 768,
                 num_classes=6,     
                 dr_rate=None,
                 params=None):
        super(BERTClassifier, self).__init__()
        self.bert = bert
        self.dr_rate = dr_rate
                 
        self.classifier = nn.Linear(hidden_size , num_classes)
        if dr_rate:
            self.dropout = nn.Dropout(p=dr_rate)
    
    def gen_attention_mask(self, token_ids, valid_length):
        attention_mask = torch.zeros_like(token_ids)
        for i, v in enumerate(valid_length):
            attention_mask[i][:v] = 1
        return attention_mask.float()

    def forward(self, token_ids, valid_length, segment_ids):
        attention_mask = self.gen_attention_mask(token_ids, valid_length)
        
        _, pooler = self.bert(input_ids = token_ids, token_type_ids = segment_ids.long(), attention_mask = attention_mask.float().to(token_ids.device))
        if self.dr_rate:
            out = self.dropout(pooler)
        return self.classifier(out)

In [7]:
# 모델 학습에 사용할 데이터셋을 [data, label] 배열로 피팅
train_set_data = [[i, str(j)] for i, j in zip(train_set['sentence'], train_set['emotion'])]

# sklearn 의 train_test_split 모듈-> 4:1로 학습&검증 데이터를 분류 
train_set_data, test_set_data = train_test_split(train_set_data, test_size = 0.2, random_state=0)

# 데이터셋을 Bert모델에 입력할 수 있게 변환
train_set_data = BERTDataset(train_set_data, 0, 1, tok, vocab, max_len, True, False)
test_set_data = BERTDataset(test_set_data, 0, 1, tok, vocab, max_len, True, False)

# 배치데이터셋 생성
train_dataloader = torch.utils.data.DataLoader(train_set_data, batch_size=batch_size, num_workers=0)    # num_workers: 데이터 로딩할때 쓰는 프로세스 수(로딩속도)
test_dataloader = torch.utils.data.DataLoader(test_set_data, batch_size=batch_size, num_workers=0)

# 모델 선언
model = BERTClassifier(bertmodel, dr_rate=0.5).to(device)
no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
optimizer = AdamW(optimizer_grouped_parameters, lr=learning_rate)
loss_fn = nn.CrossEntropyLoss()
t_total = len(train_dataloader) * num_epochs
warmup_step = int(t_total * warmup_ratio)
scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=warmup_step, num_training_steps=t_total)

# 정확도 계산
def calc_accuracy(X,Y):
    max_vals, max_indices = torch.max(X, 1)
    train_acc = (max_indices == Y).sum().data.cpu().numpy()/max_indices.size()[0]
    return train_acc

# 예측 반환
def predict(sentence):
    dataset = [[sentence, '0']]
    test = BERTDataset(dataset, 0, 1, tok, vocab, max_len, True, False)
    test_dataloader = torch.utils.data.DataLoader(test, batch_size=batch_size, num_workers=0)  #로컬에서는 디폴트(0)으로 수정
    model.eval()
    answer = 0
    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(test_dataloader):
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length= valid_length
        label = label.long().to(device)
        out = model(token_ids, valid_length, segment_ids)
        for logits in out:
            logits = logits.detach().cpu().numpy()
            answer = np.argmax(logits)
    return answer

In [8]:
train_accuarcy, test_accuarcy = [], []

for e in range(num_epochs):
    train_acc = 0.0
    test_acc = 0.0
    model.train()
    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(tqdm(train_dataloader)):
        optimizer.zero_grad()
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length= valid_length
        label = label.long().to(device)
        out = model(token_ids, valid_length, segment_ids)
        loss = loss_fn(out, label)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
        optimizer.step()
        scheduler.step()  # Update learning rate schedule
        train_acc += calc_accuracy(out, label)
        if batch_id % log_interval == 0:
            print("epoch {} batch id {} loss {} train acc {}".format(e+1, batch_id+1, loss.data.cpu().numpy(), train_acc / (batch_id+1)))
    train_accuarcy.append(train_acc / (batch_id+1))
    print("epoch {} train acc {}".format(e+1, train_acc / (batch_id+1)))
    model.eval()
    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(tqdm(test_dataloader)):   # 아까 만든 테스트 배치 데이터 - 정확도 측정
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length= valid_length
        label = label.long().to(device)
        out = model(token_ids, valid_length, segment_ids)
        test_acc += calc_accuracy(out, label)
    test_accuarcy.append(test_acc / (batch_id+1))
    print("epoch {} test acc {}".format(e+1, test_acc / (batch_id+1)))

  0%|          | 0/3394 [00:00<?, ?it/s]

epoch 1 batch id 1 loss 1.811486840248108 train acc 0.1875
epoch 1 batch id 201 loss 1.7802951335906982 train acc 0.28467039800995025
epoch 1 batch id 401 loss 1.425218105316162 train acc 0.3637001246882793
epoch 1 batch id 601 loss 1.037771463394165 train acc 0.420757071547421
epoch 1 batch id 801 loss 1.1631488800048828 train acc 0.460440074906367
epoch 1 batch id 1001 loss 1.0462727546691895 train acc 0.4906968031968032
epoch 1 batch id 1201 loss 1.0188231468200684 train acc 0.5118390924229809
epoch 1 batch id 1401 loss 1.163686990737915 train acc 0.5269450392576731
epoch 1 batch id 1601 loss 0.7133719325065613 train acc 0.5397993441599
epoch 1 batch id 1801 loss 1.0510989427566528 train acc 0.5492434758467518
epoch 1 batch id 2001 loss 0.8356772661209106 train acc 0.5585176161919041
epoch 1 batch id 2201 loss 1.1321587562561035 train acc 0.5656661744661517
epoch 1 batch id 2401 loss 1.051943302154541 train acc 0.5718190337359433
epoch 1 batch id 2601 loss 0.9795247912406921 train a

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 1 test acc 0.6638013047023649


  0%|          | 0/3394 [00:00<?, ?it/s]

epoch 2 batch id 1 loss 0.7878365516662598 train acc 0.71875
epoch 2 batch id 201 loss 1.2827378511428833 train acc 0.6573383084577115
epoch 2 batch id 401 loss 0.9302335977554321 train acc 0.6629519950124688
epoch 2 batch id 601 loss 0.7089939117431641 train acc 0.665765391014975
epoch 2 batch id 801 loss 1.1416207551956177 train acc 0.6657693508114857
epoch 2 batch id 1001 loss 0.9826791882514954 train acc 0.6679258241758241
epoch 2 batch id 1201 loss 0.9294556379318237 train acc 0.670092631140716
epoch 2 batch id 1401 loss 0.9485880136489868 train acc 0.6709939329050678
epoch 2 batch id 1601 loss 0.5654458403587341 train acc 0.6746564647095565
epoch 2 batch id 1801 loss 0.6567437648773193 train acc 0.6770544142143253
epoch 2 batch id 2001 loss 0.8190523982048035 train acc 0.6793165917041479
epoch 2 batch id 2201 loss 1.0485857725143433 train acc 0.6820195365742844
epoch 2 batch id 2401 loss 0.7834093570709229 train acc 0.6842461474385673
epoch 2 batch id 2601 loss 0.8431361317634583

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 2 test acc 0.6740509196339585


  0%|          | 0/3394 [00:00<?, ?it/s]

epoch 3 batch id 1 loss 0.6175364255905151 train acc 0.75
epoch 3 batch id 201 loss 1.296385407447815 train acc 0.7255907960199005
epoch 3 batch id 401 loss 0.6983014941215515 train acc 0.7272443890274314
epoch 3 batch id 601 loss 0.4794703722000122 train acc 0.7295133111480865
epoch 3 batch id 801 loss 0.8395825624465942 train acc 0.7286204744069913
epoch 3 batch id 1001 loss 0.9233671426773071 train acc 0.7303321678321678
epoch 3 batch id 1201 loss 0.7946392297744751 train acc 0.7330609908409659
epoch 3 batch id 1401 loss 0.8191417455673218 train acc 0.7352560670949322
epoch 3 batch id 1601 loss 0.4188605546951294 train acc 0.7392840412242349
epoch 3 batch id 1801 loss 0.4464825391769409 train acc 0.7413069128262076
epoch 3 batch id 2001 loss 0.5248220562934875 train acc 0.7442060219890055
epoch 3 batch id 2201 loss 0.9251001477241516 train acc 0.7476573148568832
epoch 3 batch id 2401 loss 0.5123996734619141 train acc 0.7498958767180341
epoch 3 batch id 2601 loss 0.8486307859420776 t

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 3 test acc 0.6789662045845792


  0%|          | 0/3394 [00:00<?, ?it/s]

epoch 4 batch id 1 loss 0.4814194142818451 train acc 0.84375
epoch 4 batch id 201 loss 1.1751004457473755 train acc 0.7907338308457711
epoch 4 batch id 401 loss 0.5371299386024475 train acc 0.7967581047381546
epoch 4 batch id 601 loss 0.3577406406402588 train acc 0.7976809484193012
epoch 4 batch id 801 loss 0.5434825420379639 train acc 0.797869850187266
epoch 4 batch id 1001 loss 0.7746517658233643 train acc 0.8003246753246753
epoch 4 batch id 1201 loss 0.822655200958252 train acc 0.8015716069941715
epoch 4 batch id 1401 loss 0.7087211608886719 train acc 0.8030424696645253
epoch 4 batch id 1601 loss 0.2680892050266266 train acc 0.8054145846346034
epoch 4 batch id 1801 loss 0.2751755714416504 train acc 0.8072251526929484
epoch 4 batch id 2001 loss 0.3100835978984833 train acc 0.8097513743128436
epoch 4 batch id 2201 loss 0.696457028388977 train acc 0.812315424806906
epoch 4 batch id 2401 loss 0.5053235292434692 train acc 0.8139186797167847
epoch 4 batch id 2601 loss 0.45906496047973633 

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 4 test acc 0.6755430597082541


  0%|          | 0/3394 [00:00<?, ?it/s]

epoch 5 batch id 1 loss 0.27501434087753296 train acc 0.96875
epoch 5 batch id 201 loss 1.0902817249298096 train acc 0.8454601990049752
epoch 5 batch id 401 loss 0.3095819056034088 train acc 0.8491271820448878
epoch 5 batch id 601 loss 0.18720588088035583 train acc 0.8496776206322796
epoch 5 batch id 801 loss 0.4377713203430176 train acc 0.8485096754057428
epoch 5 batch id 1001 loss 0.7474216818809509 train acc 0.8504308191808192
epoch 5 batch id 1201 loss 0.7089880108833313 train acc 0.8507493755203996
epoch 5 batch id 1401 loss 0.4744078516960144 train acc 0.8512892576730906
epoch 5 batch id 1601 loss 0.16201433539390564 train acc 0.8528068394753279
epoch 5 batch id 1801 loss 0.15278543531894684 train acc 0.8529289283731261
epoch 5 batch id 2001 loss 0.2478848546743393 train acc 0.8542603698150925
epoch 5 batch id 2201 loss 0.5300540924072266 train acc 0.856230122671513
epoch 5 batch id 2401 loss 0.38902610540390015 train acc 0.8565962099125365
epoch 5 batch id 2601 loss 0.4132373332

  0%|          | 0/849 [00:00<?, ?it/s]

epoch 5 test acc 0.6754326356799856


In [None]:
train_accuarcy, test_accuarcy

In [None]:
# torch.save(model, 'model/kobert-v7.pt')