In [1]:
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import gluonnlp as nlp
import numpy as np
from tqdm.notebook import tqdm
import random
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import preprocessing
from sklearn.metrics import f1_score

import warnings
warnings.filterwarnings(action='ignore') 

In [2]:
from kobert import get_tokenizer
from kobert import get_pytorch_kobert_model

In [3]:
from transformers import AdamW
from transformers.optimization import get_cosine_schedule_with_warmup

In [4]:

device = torch.device("cuda:3")

In [4]:
CFG = {
    'SEED':41
}

In [5]:
## Setting parameters
max_len = 90
batch_size = 8
warmup_ratio = 0.1
num_epochs = 100
max_grad_norm = 1
log_interval = 200
learning_rate =  5e-5

In [6]:
bertmodel, vocab = get_pytorch_kobert_model(cachedir=".cache")

using cached model. /data1/hom1/ict18/sentence/KoBERT/.cache/kobert_v1.zip
using cached model. /data1/hom1/ict18/sentence/KoBERT/.cache/kobert_news_wiki_ko_cased-1087f8699e.spiece


In [7]:
tokenizer = get_tokenizer()
tok = nlp.data.BERTSPTokenizer(tokenizer, vocab, lower=False)

using cached model. /data1/hom1/ict18/sentence/KoBERT/.cache/kobert_news_wiki_ko_cased-1087f8699e.spiece


In [8]:
class BERTDataset(Dataset):
    def __init__(self, dataset, sent_idx, label_idx, bert_tokenizer, max_len,
                 pad, pair):
        transform = nlp.data.BERTSentenceTransform(
            bert_tokenizer, max_seq_length=max_len, pad=pad, pair=pair)

        self.sentences = [transform([i[sent_idx]]) for i in dataset]
        if label_idx==None:
            self.labels=None
        else:
            self.labels = [np.int32(i[label_idx]) for i in dataset]
        

    def __getitem__(self, i):
        return (self.sentences[i] + (self.labels[i], ))

    def __len__(self):
        return (len(self.labels))


In [9]:
class BERTClassifier(nn.Module):
    def __init__(self,
                 bert,
                 hidden_size = 768,
                 num_classes=2,
                 dr_rate=None,
                 params=None):
        super(BERTClassifier, self).__init__()
        self.bert = bert
        self.dr_rate = dr_rate
                 
        self.classifier = nn.Linear(hidden_size , num_classes)
        if dr_rate:
            self.dropout = nn.Dropout(p=dr_rate)
    
    def gen_attention_mask(self, token_ids, valid_length):
        attention_mask = torch.zeros_like(token_ids)
        for i, v in enumerate(valid_length):
            attention_mask[i][:v] = 1
        return attention_mask.float()

    def forward(self, token_ids, valid_length, segment_ids):
        attention_mask = self.gen_attention_mask(token_ids, valid_length)
        
        _, pooler = self.bert(input_ids = token_ids, token_type_ids = segment_ids.long(), attention_mask = attention_mask.float().to(token_ids.device))
        if self.dr_rate:
            out = self.dropout(pooler)
        else:
            out = pooler
        return self.classifier(out)

In [10]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

In [11]:
df = pd.read_csv('data/train.csv')
test = pd.read_csv('data/test.csv')

In [23]:
train, val, _, _ = train_test_split(df, df['label'], test_size=0.2, random_state=CFG['SEED'])

In [24]:
type_le = preprocessing.LabelEncoder()
train["유형"] = type_le.fit_transform(train["유형"].values)
val["유형"] = type_le.transform(val["유형"].values)

polarity_le = preprocessing.LabelEncoder()
train["극성"] = polarity_le.fit_transform(train["극성"].values)
val["극성"] = polarity_le.transform(val["극성"].values)

tense_le = preprocessing.LabelEncoder()
train["시제"] = tense_le.fit_transform(train["시제"].values)
val["시제"] = tense_le.transform(val["시제"].values)

certainty_le = preprocessing.LabelEncoder()
train["확실성"] = certainty_le.fit_transform(train["확실성"].values)
val["확실성"] = certainty_le.transform(val["확실성"].values)

In [15]:
type_train=pd.concat([train['문장'],train['유형']],axis=1)
polarity_train=pd.concat([train['문장'],train['극성']],axis=1)
tense_train=pd.concat([train['문장'],train['시제']],axis=1)
certainty_train=pd.concat([train['문장'],train['확실성']],axis=1)

In [16]:
type_val=pd.concat([val['문장'],val['유형']],axis=1)
polarity_val=pd.concat([val['문장'],val['극성']],axis=1)
tense_val=pd.concat([val['문장'],val['시제']],axis=1)
certainty_val=pd.concat([val['문장'],val['확실성']],axis=1)

In [17]:
type_train_set_data = [[i, str(j)] for i, j in zip(type_train['문장'], type_train['유형'])]
polarity_train_set_data = [[i, str(j)] for i, j in zip(polarity_train['문장'], polarity_train['극성'])]
tense_train_set_data = [[i, str(j)] for i, j in zip(tense_train['문장'], tense_train['시제'])]
certainty_train_set_data = [[i, str(j)] for i, j in zip(certainty_train['문장'], certainty_train['확실성'])]

In [18]:
type_val_set_data = [[i, str(j)] for i, j in zip(type_val['문장'], type_val['유형'])]
polarity_val_set_data = [[i, str(j)] for i, j in zip(polarity_val['문장'], polarity_val['극성'])]
tense_val_set_data = [[i, str(j)] for i, j in zip(tense_val['문장'], tense_val['시제'])]
certainty_val_set_data = [[i, str(j)] for i, j in zip(certainty_val['문장'], certainty_val['확실성'])]

In [12]:
test_data=[[i,str(j)] for i,j in zip(test['문장'], np.zeros(7090,dtype=np.int64))]

In [20]:
type_train_set_data=BERTDataset(type_train_set_data,0,1,tok,max_len,True,False)
polarity_train_set_data=BERTDataset(polarity_train_set_data,0,1,tok,max_len,True,False)
tense_train_set_data=BERTDataset(tense_train_set_data,0,1,tok,max_len,True,False)
certainty_train_set_data=BERTDataset(certainty_train_set_data,0,1,tok,max_len,True,False)

In [21]:
type_val_set_data=BERTDataset(type_val_set_data,0,1,tok,max_len,True,False)
polarity_val_set_data=BERTDataset(polarity_val_set_data,0,1,tok,max_len,True,False)
tense_val_set_data=BERTDataset(tense_val_set_data,0,1,tok,max_len,True,False)
certainty_val_set_data=BERTDataset(certainty_val_set_data,0,1,tok,max_len,True,False)

In [13]:
test_dataset=BERTDataset(test_data,0,1,tok,max_len,True,False)

In [23]:
type_train_dataloader = torch.utils.data.DataLoader(type_train_set_data, batch_size=batch_size, num_workers=5)
polarity_train_dataloader = torch.utils.data.DataLoader(polarity_train_set_data, batch_size=batch_size, num_workers=5)
tense_train_dataloader = torch.utils.data.DataLoader(tense_train_set_data, batch_size=batch_size, num_workers=5)
certainty_train_dataloader = torch.utils.data.DataLoader(certainty_train_set_data, batch_size=batch_size, num_workers=5)

In [24]:
type_val_dataloader = torch.utils.data.DataLoader(type_val_set_data, batch_size=batch_size, num_workers=5)
polarity_val_dataloader = torch.utils.data.DataLoader(polarity_val_set_data, batch_size=batch_size, num_workers=5)
tense_val_dataloader = torch.utils.data.DataLoader(tense_val_set_data, batch_size=batch_size, num_workers=5)
certainty_val_dataloader = torch.utils.data.DataLoader(certainty_val_set_data, batch_size=batch_size, num_workers=5)

In [14]:
test_dataloader=torch.utils.data.DataLoader(test_dataset,batch_size=256,num_workers=0, shuffle=False)

In [15]:
type_model=BERTClassifier(bertmodel,num_classes=4, dr_rate=0.5)
polarity_model=BERTClassifier(bertmodel,num_classes=3, dr_rate=0.5)
tense_model=BERTClassifier(bertmodel,num_classes=3, dr_rate=0.5)
certainty_model=BERTClassifier(bertmodel,num_classes=2, dr_rate=0.5)

In [27]:
# Prepare optimizer and schedule (linear warmup and decay)
no_decay = ['bias', 'LayerNorm.weight']
type_optimizer_grouped_parameters = [
    {'params': [p for n, p in type_model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in type_model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
polarity_optimizer_grouped_parameters = [
    {'params': [p for n, p in polarity_model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in polarity_model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
tense_optimizer_grouped_parameters = [
    {'params': [p for n, p in tense_model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in tense_model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
certainty_optimizer_grouped_parameters = [
    {'params': [p for n, p in certainty_model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in certainty_model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]

In [28]:
nSamples=[575,13558,257,2151]
normedWeights = [1 - (x / sum(nSamples)) for x in nSamples]
normedWeights = torch.FloatTensor(normedWeights).to(device)
type_loss = nn.CrossEntropyLoss(normedWeights)

nSamples=[15793,183,565]
normedWeights = [1 - (x / sum(nSamples)) for x in nSamples]
normedWeights = torch.FloatTensor(normedWeights).to(device)
polarity_loss = nn.CrossEntropyLoss(normedWeights)

nSamples=[8032,1643,6866]
normedWeights = [1 - (x / sum(nSamples)) for x in nSamples]
normedWeights = torch.FloatTensor(normedWeights).to(device)
tense_loss = nn.CrossEntropyLoss(normedWeights)

nSamples=[15192,1349]
normedWeights = [1 - (x / sum(nSamples)) for x in nSamples]
normedWeights = torch.FloatTensor(normedWeights).to(device)
certainty_loss = nn.CrossEntropyLoss(normedWeights)

In [29]:
type_optimizer = AdamW(type_optimizer_grouped_parameters, lr=learning_rate)
polarity_optimizer = AdamW(polarity_optimizer_grouped_parameters, lr=learning_rate)
tense_optimizer = AdamW(tense_optimizer_grouped_parameters, lr=learning_rate)
certainty_optimizer = AdamW(certainty_optimizer_grouped_parameters, lr=learning_rate)

In [30]:
t_total = len(type_train_dataloader) * num_epochs
warmup_step = int(t_total * warmup_ratio)

In [31]:
type_scheduler = get_cosine_schedule_with_warmup(type_optimizer, num_warmup_steps=warmup_step, num_training_steps=t_total)
polarity_scheduler = get_cosine_schedule_with_warmup(polarity_optimizer, num_warmup_steps=warmup_step, num_training_steps=t_total)
tense_scheduler = get_cosine_schedule_with_warmup(tense_optimizer, num_warmup_steps=warmup_step, num_training_steps=t_total)
certainty_scheduler = get_cosine_schedule_with_warmup(certainty_optimizer, num_warmup_steps=warmup_step, num_training_steps=t_total)

In [32]:
def calc_accuracy(X,Y):
    max_vals, max_indices = torch.max(X, 1)
    train_acc = (max_indices == Y).sum().data.cpu().numpy()/max_indices.size()[0]
    return train_acc

In [33]:
def validation(model, val_loader, loss_function):
    model.eval()
    val_loss = []
    
    preds = []
    labels = []
    
    
    with torch.no_grad():
        for batch_id, (token_ids, valid_length, segment_ids, label) in tqdm(enumerate(val_loader), total=len(val_loader)):
            token_ids = token_ids.long().to(device)
            segment_ids = segment_ids.long().to(device)
            valid_length= valid_length
            label = label.long().to(device)
            out = model(token_ids, valid_length, segment_ids)
            loss = loss_function(out, label)
            
            val_loss.append(loss.item())
            
            preds += out.argmax(1).detach().cpu().numpy().tolist()
            labels += label.detach().cpu().numpy().tolist()
    
    f1 = f1_score(labels, preds, average='weighted')

    
    return np.mean(val_loss), f1

In [34]:
def train(model, train_dataloader, optimizer, scheduler, val_loader, name, loss_function):
    best_loss = 999999
    best_model = None
    early_stopping_threshold_count =0
    for e in range(num_epochs):
        train_acc = 0.0
        test_acc = 0.0
        model.train()
        train_loss=[]
        for batch_id, (token_ids, valid_length, segment_ids, label) in tqdm(enumerate(train_dataloader), total=len(train_dataloader)):
            optimizer.zero_grad()
            token_ids = token_ids.long().to(device)
            segment_ids = segment_ids.long().to(device)
            valid_length= valid_length
            label = label.long().to(device)
            out = model(token_ids, valid_length, segment_ids)
            loss = loss_function(out, label)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
            optimizer.step()
            scheduler.step()  # Update learning rate schedule
            train_acc += calc_accuracy(out, label)
            if batch_id % log_interval == 0:
                print("epoch {} batch id {} loss {} train acc {}".format(e+1, batch_id+1, loss.data.cpu().numpy(), train_acc / (batch_id+1)))
            train_loss.append(loss.item())
            
        print("epoch {} train acc {}".format(e+1, train_acc / (batch_id+1)))
        
        val_loss, f1=validation(model, val_loader, loss_function)
        print(f'Epoch : [{e}] Train Loss : [{np.mean(train_loss):.5f}] Val Loss : [{val_loss:.5f}]  F1 : [{f1:.5f}]')
        
        if scheduler is not None:
            scheduler.step(val_loss)
            
        if best_loss > val_loss:
            best_loss = val_loss
            best_model = model
            early_stopping_threshold_count =0
        else:
            early_stopping_threshold_count +=1
        if early_stopping_threshold_count >=3:
            print('Early stopping')
            break
        
            
    return best_model

In [35]:
type_model=train(type_model,type_train_dataloader, type_optimizer, type_scheduler, type_val_dataloader, 'type', type_loss)

  0%|          | 0/1654 [00:00<?, ?it/s]

epoch 1 batch id 1 loss 1.2257969379425049 train acc 0.5
epoch 1 batch id 201 loss 1.1433652639389038 train acc 0.5541044776119403
epoch 1 batch id 401 loss 1.3089152574539185 train acc 0.6197007481296758
epoch 1 batch id 601 loss 0.723433256149292 train acc 0.6582778702163061
epoch 1 batch id 801 loss 0.8412370085716248 train acc 0.6924157303370787
epoch 1 batch id 1001 loss 0.6535593867301941 train acc 0.7144105894105894
epoch 1 batch id 1201 loss 1.8856397867202759 train acc 0.732098251457119
epoch 1 batch id 1401 loss 0.6375046372413635 train acc 0.7486616702355461
epoch 1 batch id 1601 loss 1.2529233694076538 train acc 0.7597595252966896
epoch 1 train acc 0.7623941958887546


  0%|          | 0/414 [00:00<?, ?it/s]

Epoch : [0] Train Loss : [0.99896] Val Loss : [0.63646]  F1 : [0.86184]


  0%|          | 0/1654 [00:00<?, ?it/s]

epoch 2 batch id 1 loss 0.3805767893791199 train acc 1.0
epoch 2 batch id 201 loss 0.2651229798793793 train acc 0.8600746268656716
epoch 2 batch id 401 loss 0.40428605675697327 train acc 0.8712593516209476
epoch 2 batch id 601 loss 0.480182409286499 train acc 0.8695923460898503
epoch 2 batch id 801 loss 0.3990096151828766 train acc 0.8748439450686641
epoch 2 batch id 1001 loss 0.08762066066265106 train acc 0.874000999000999
epoch 2 batch id 1201 loss 1.8608652353286743 train acc 0.8751040799333888
epoch 2 batch id 1401 loss 0.30836984515190125 train acc 0.8785688793718772
epoch 2 batch id 1601 loss 0.3605375289916992 train acc 0.8804653341661461
epoch 2 train acc 0.8804413542926239


  0%|          | 0/414 [00:00<?, ?it/s]

Epoch : [1] Train Loss : [0.58451] Val Loss : [0.64192]  F1 : [0.88207]


  0%|          | 0/1654 [00:00<?, ?it/s]

epoch 3 batch id 1 loss 0.14632119238376617 train acc 0.875
epoch 3 batch id 201 loss 0.06814781576395035 train acc 0.8793532338308457
epoch 3 batch id 401 loss 0.13821561634540558 train acc 0.8946384039900249
epoch 3 batch id 601 loss 0.5329660773277283 train acc 0.894134775374376
epoch 3 batch id 801 loss 0.06609110534191132 train acc 0.8979400749063671
epoch 3 batch id 1001 loss 0.026273634284734726 train acc 0.8952297702297702
epoch 3 batch id 1201 loss 1.4183930158615112 train acc 0.8963363863447127
epoch 3 batch id 1401 loss 0.027169886976480484 train acc 0.8993576017130621
epoch 3 batch id 1601 loss 0.07900922000408173 train acc 0.9022485946283573
epoch 3 train acc 0.9028869407496977


  0%|          | 0/414 [00:00<?, ?it/s]

Epoch : [2] Train Loss : [0.52354] Val Loss : [0.79107]  F1 : [0.88105]


  0%|          | 0/1654 [00:00<?, ?it/s]

epoch 4 batch id 1 loss 0.23291684687137604 train acc 0.875
epoch 4 batch id 201 loss 0.020428139716386795 train acc 0.8812189054726368
epoch 4 batch id 401 loss 0.11047447472810745 train acc 0.8937032418952618
epoch 4 batch id 601 loss 0.5946571230888367 train acc 0.896630615640599
epoch 4 batch id 801 loss 0.049391236156225204 train acc 0.9024656679151061
epoch 4 batch id 1001 loss 0.011167455464601517 train acc 0.9032217782217782
epoch 4 batch id 1201 loss 1.934476375579834 train acc 0.9058076602830974
epoch 4 batch id 1401 loss 0.009304380044341087 train acc 0.9095289079229122
epoch 4 batch id 1601 loss 0.048136308789253235 train acc 0.9134915677701436
epoch 4 train acc 0.9143742442563483


  0%|          | 0/414 [00:00<?, ?it/s]

Epoch : [3] Train Loss : [0.50532] Val Loss : [0.90853]  F1 : [0.88345]
Early stopping


In [36]:
polarity_model=train(polarity_model, polarity_train_dataloader,polarity_optimizer, polarity_scheduler, polarity_val_dataloader, 'polarity', polarity_loss)


  0%|          | 0/1654 [00:00<?, ?it/s]

epoch 1 batch id 1 loss 1.441328525543213 train acc 0.0
epoch 1 batch id 201 loss 1.347513198852539 train acc 0.12686567164179105
epoch 1 batch id 401 loss 0.4696314334869385 train acc 0.3974438902743142
epoch 1 batch id 601 loss 2.155280590057373 train acc 0.5846505823627288
epoch 1 batch id 801 loss 0.07204989343881607 train acc 0.6771223470661673
epoch 1 batch id 1001 loss 0.038335464894771576 train acc 0.7321428571428571
epoch 1 batch id 1201 loss 0.032422080636024475 train acc 0.7693588676103247
epoch 1 batch id 1401 loss 0.02649766393005848 train acc 0.7957708779443254
epoch 1 batch id 1601 loss 1.2515182495117188 train acc 0.8148813241723922
epoch 1 train acc 0.8195284159613059


  0%|          | 0/414 [00:00<?, ?it/s]

Epoch : [0] Train Loss : [0.94687] Val Loss : [0.60394]  F1 : [0.93030]


  0%|          | 0/1654 [00:00<?, ?it/s]

epoch 2 batch id 1 loss 0.020177537575364113 train acc 1.0
epoch 2 batch id 201 loss 0.017890745773911476 train acc 0.9589552238805971
epoch 2 batch id 401 loss 0.012472732923924923 train acc 0.9597880299251871
epoch 2 batch id 601 loss 3.439194679260254 train acc 0.9636023294509152
epoch 2 batch id 801 loss 0.010147104039788246 train acc 0.9656679151061174
epoch 2 batch id 1001 loss 0.006915493868291378 train acc 0.9665334665334665
epoch 2 batch id 1201 loss 0.008684694766998291 train acc 0.9677352206494588
epoch 2 batch id 1401 loss 0.007105417549610138 train acc 0.9681477516059958
epoch 2 batch id 1601 loss 0.023674221709370613 train acc 0.9681449094316052
epoch 2 train acc 0.968409915356711


  0%|          | 0/414 [00:00<?, ?it/s]

Epoch : [1] Train Loss : [0.52425] Val Loss : [0.35139]  F1 : [0.96273]


  0%|          | 0/1654 [00:00<?, ?it/s]

epoch 3 batch id 1 loss 0.009783676825463772 train acc 1.0
epoch 3 batch id 201 loss 0.007711075711995363 train acc 0.9701492537313433
epoch 3 batch id 401 loss 0.006249956786632538 train acc 0.9741271820448878
epoch 3 batch id 601 loss 3.469383955001831 train acc 0.9762895174708819
epoch 3 batch id 801 loss 0.005902361590415239 train acc 0.9753433208489388
epoch 3 batch id 1001 loss 0.005322685930877924 train acc 0.9756493506493507
epoch 3 batch id 1201 loss 0.004161996766924858 train acc 0.9760616153205662
epoch 3 batch id 1401 loss 0.003681275760754943 train acc 0.9766238401142041
epoch 3 batch id 1601 loss 0.014508719556033611 train acc 0.9761086820737039
epoch 3 train acc 0.9761940749697703


  0%|          | 0/414 [00:00<?, ?it/s]

Epoch : [2] Train Loss : [0.39006] Val Loss : [0.39568]  F1 : [0.97067]


  0%|          | 0/1654 [00:00<?, ?it/s]

epoch 4 batch id 1 loss 0.0032567407470196486 train acc 1.0
epoch 4 batch id 201 loss 0.0036923233419656754 train acc 0.9776119402985075
epoch 4 batch id 401 loss 0.002738969400525093 train acc 0.9778678304239401
epoch 4 batch id 601 loss 0.010881985537707806 train acc 0.9796173044925125
epoch 4 batch id 801 loss 0.0029488462023437023 train acc 0.9783083645443196
epoch 4 batch id 1001 loss 0.0034035800490528345 train acc 0.9787712287712288
epoch 4 batch id 1201 loss 0.0025848536752164364 train acc 0.9791840133222315
epoch 4 batch id 1401 loss 0.002130878157913685 train acc 0.9799250535331906
epoch 4 batch id 1601 loss 0.00908734556287527 train acc 0.9795440349781387
epoch 4 train acc 0.9798216444981862


  0%|          | 0/414 [00:00<?, ?it/s]

Epoch : [3] Train Loss : [0.35587] Val Loss : [0.40648]  F1 : [0.97191]


  0%|          | 0/1654 [00:00<?, ?it/s]

epoch 5 batch id 1 loss 0.0023647139314562082 train acc 1.0
epoch 5 batch id 201 loss 0.003007124410942197 train acc 0.9782338308457711
epoch 5 batch id 401 loss 0.001827083877287805 train acc 0.9788029925187033
epoch 5 batch id 601 loss 0.007006478030234575 train acc 0.9810732113144759
epoch 5 batch id 801 loss 0.0019518944900482893 train acc 0.9804931335830213
epoch 5 batch id 1001 loss 0.0019228343153372407 train acc 0.9812687312687313
epoch 5 batch id 1201 loss 0.001923520932905376 train acc 0.9813696919233972
epoch 5 batch id 1401 loss 0.0016129453433677554 train acc 0.981887937187723
epoch 5 batch id 1601 loss 0.012760600075125694 train acc 0.9817301686445972
epoch 5 train acc 0.9819377267230955


  0%|          | 0/414 [00:00<?, ?it/s]

Epoch : [4] Train Loss : [0.31870] Val Loss : [0.42671]  F1 : [0.96911]
Early stopping


In [37]:
tense_model=train(tense_model, tense_train_dataloader, tense_optimizer, tense_scheduler, tense_val_dataloader,'tense',tense_loss)


  0%|          | 0/1654 [00:00<?, ?it/s]

epoch 1 batch id 1 loss 1.1805119514465332 train acc 0.375
epoch 1 batch id 201 loss 1.0521631240844727 train acc 0.26927860696517414
epoch 1 batch id 401 loss 0.9507574439048767 train acc 0.3444513715710723
epoch 1 batch id 601 loss 0.7635687589645386 train acc 0.43718801996672213
epoch 1 batch id 801 loss 0.2700583338737488 train acc 0.5332397003745318
epoch 1 batch id 1001 loss 0.28905290365219116 train acc 0.6013986013986014
epoch 1 batch id 1201 loss 0.4421083331108093 train acc 0.6472731057452124
epoch 1 batch id 1401 loss 0.06933211535215378 train acc 0.6821020699500356
epoch 1 batch id 1601 loss 0.04656066745519638 train acc 0.7065115552779513
epoch 1 train acc 0.7116837968561064


  0%|          | 0/414 [00:00<?, ?it/s]

Epoch : [0] Train Loss : [0.67424] Val Loss : [0.41851]  F1 : [0.88580]


  0%|          | 0/1654 [00:00<?, ?it/s]

epoch 2 batch id 1 loss 0.03835670277476311 train acc 1.0
epoch 2 batch id 201 loss 0.362768292427063 train acc 0.8961442786069652
epoch 2 batch id 401 loss 0.05786117538809776 train acc 0.898067331670823
epoch 2 batch id 601 loss 0.57061767578125 train acc 0.8951747088186356
epoch 2 batch id 801 loss 0.03298766538500786 train acc 0.892478152309613
epoch 2 batch id 1001 loss 0.03130272403359413 train acc 0.8943556443556444
epoch 2 batch id 1201 loss 0.6473284363746643 train acc 0.8959200666111574
epoch 2 batch id 1401 loss 0.02183251641690731 train acc 0.8992683797287652
epoch 2 batch id 1601 loss 0.01692136749625206 train acc 0.8989693941286696
epoch 2 train acc 0.8981257557436517


  0%|          | 0/414 [00:00<?, ?it/s]

Epoch : [1] Train Loss : [0.38552] Val Loss : [0.45083]  F1 : [0.88386]


  0%|          | 0/1654 [00:00<?, ?it/s]

epoch 3 batch id 1 loss 0.023181436583399773 train acc 1.0
epoch 3 batch id 201 loss 0.046251654624938965 train acc 0.8917910447761194
epoch 3 batch id 401 loss 0.1596718579530716 train acc 0.8993142144638404
epoch 3 batch id 601 loss 0.5957642197608948 train acc 0.9022462562396006
epoch 3 batch id 801 loss 0.017972372472286224 train acc 0.9024656679151061
epoch 3 batch id 1001 loss 0.025851937010884285 train acc 0.9040959040959041
epoch 3 batch id 1201 loss 0.6422171592712402 train acc 0.9068484596169858
epoch 3 batch id 1401 loss 0.01328431349247694 train acc 0.9105995717344754
epoch 3 batch id 1601 loss 0.011883329600095749 train acc 0.9119300437226733
epoch 3 train acc 0.9111245465538089


  0%|          | 0/414 [00:00<?, ?it/s]

Epoch : [2] Train Loss : [0.36615] Val Loss : [0.49204]  F1 : [0.89078]


  0%|          | 0/1654 [00:00<?, ?it/s]

epoch 4 batch id 1 loss 0.013363776728510857 train acc 1.0
epoch 4 batch id 201 loss 0.10875698179006577 train acc 0.8930348258706468
epoch 4 batch id 401 loss 0.29926949739456177 train acc 0.9014962593516209
epoch 4 batch id 601 loss 0.6586987972259521 train acc 0.9043261231281198
epoch 4 batch id 801 loss 0.013041781261563301 train acc 0.906210986267166
epoch 4 batch id 1001 loss 0.02259519137442112 train acc 0.9094655344655345
epoch 4 batch id 1201 loss 0.5588502883911133 train acc 0.9121565362198168
epoch 4 batch id 1401 loss 0.009458492510020733 train acc 0.9167558886509636
epoch 4 batch id 1601 loss 0.009776146151125431 train acc 0.9181761399125546
epoch 4 train acc 0.9179262394195888


  0%|          | 0/414 [00:00<?, ?it/s]

Epoch : [3] Train Loss : [0.35521] Val Loss : [0.51664]  F1 : [0.88639]
Early stopping


In [38]:
certainty_model=train(certainty_model, certainty_train_dataloader, certainty_optimizer, certainty_scheduler, certainty_val_dataloader,'certainty', certainty_loss)

  0%|          | 0/1654 [00:00<?, ?it/s]

epoch 1 batch id 1 loss 0.8062800168991089 train acc 0.25
epoch 1 batch id 201 loss 0.38894277811050415 train acc 0.6075870646766169
epoch 1 batch id 401 loss 0.07595953345298767 train acc 0.7593516209476309
epoch 1 batch id 601 loss 0.06562653183937073 train acc 0.8132279534109818
epoch 1 batch id 801 loss 0.017638487741351128 train acc 0.8408239700374532
epoch 1 batch id 1001 loss 0.014156739227473736 train acc 0.8562687312687313
epoch 1 batch id 1201 loss 0.03233494609594345 train acc 0.8672980849292257
epoch 1 batch id 1401 loss 0.006194839719682932 train acc 0.8758029978586723
epoch 1 batch id 1601 loss 0.03026076965034008 train acc 0.8801530293566521
epoch 1 train acc 0.8811215235792019


  0%|          | 0/414 [00:00<?, ?it/s]

Epoch : [0] Train Loss : [0.14237] Val Loss : [0.03964]  F1 : [0.87274]


  0%|          | 0/1654 [00:00<?, ?it/s]

epoch 2 batch id 1 loss 0.02675476111471653 train acc 1.0
epoch 2 batch id 201 loss 0.04759451374411583 train acc 0.927860696517413
epoch 2 batch id 401 loss 0.006378155201673508 train acc 0.9205112219451371
epoch 2 batch id 601 loss 0.04327165335416794 train acc 0.920549084858569
epoch 2 batch id 801 loss 0.004766068886965513 train acc 0.9213483146067416
epoch 2 batch id 1001 loss 0.01577947661280632 train acc 0.9208291708291708
epoch 2 batch id 1201 loss 0.02201961539685726 train acc 0.9216278101582015
epoch 2 batch id 1401 loss 0.002971262438222766 train acc 0.9231798715203426
epoch 2 batch id 1601 loss 0.011543757282197475 train acc 0.9228607120549657
epoch 2 train acc 0.9229141475211609


  0%|          | 0/414 [00:00<?, ?it/s]

Epoch : [1] Train Loss : [0.03890] Val Loss : [0.04248]  F1 : [0.91741]


  0%|          | 0/1654 [00:00<?, ?it/s]

epoch 3 batch id 1 loss 0.017034392803907394 train acc 1.0
epoch 3 batch id 201 loss 0.06152062118053436 train acc 0.9365671641791045
epoch 3 batch id 401 loss 0.006844318471848965 train acc 0.9317331670822943
epoch 3 batch id 601 loss 0.018648996949195862 train acc 0.93261231281198
epoch 3 batch id 801 loss 0.0023276268038898706 train acc 0.9355493133583022
epoch 3 batch id 1001 loss 0.0033039499539881945 train acc 0.9348151848151848
epoch 3 batch id 1201 loss 0.015329592861235142 train acc 0.9348459616985845
epoch 3 batch id 1401 loss 0.0029074563644826412 train acc 0.9363847251962883
epoch 3 batch id 1601 loss 0.004436803981661797 train acc 0.9368363522798251
epoch 3 train acc 0.9371977025392987


  0%|          | 0/414 [00:00<?, ?it/s]

Epoch : [2] Train Loss : [0.03805] Val Loss : [0.05039]  F1 : [0.91906]


  0%|          | 0/1654 [00:00<?, ?it/s]

epoch 4 batch id 1 loss 0.0024489383213222027 train acc 1.0
epoch 4 batch id 201 loss 0.07271525263786316 train acc 0.9427860696517413
epoch 4 batch id 401 loss 0.010010959580540657 train acc 0.9360972568578554
epoch 4 batch id 601 loss 0.009100777097046375 train acc 0.9417637271214643
epoch 4 batch id 801 loss 0.002240594709292054 train acc 0.9447565543071161
epoch 4 batch id 1001 loss 0.00170193612575531 train acc 0.9448051948051948
epoch 4 batch id 1201 loss 0.011457439512014389 train acc 0.9450457951706911
epoch 4 batch id 1401 loss 0.0025608958676457405 train acc 0.9467344753747323
epoch 4 batch id 1601 loss 0.004894557408988476 train acc 0.948001249219238
epoch 4 train acc 0.9483071342200725


  0%|          | 0/414 [00:00<?, ?it/s]

Epoch : [3] Train Loss : [0.03637] Val Loss : [0.06081]  F1 : [0.92521]
Early stopping


In [17]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    
    preds = []
    
    for batch_idx,(token_ids, valid_length, segment_ids,labels) in tqdm(enumerate(test_loader), total=len(test_dataloader)):
        
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length= valid_length
        out = model(token_ids, valid_length, segment_ids)
        
        preds += out.argmax(1).detach().cpu().numpy().tolist()
        
        
    
    return preds

In [17]:
type_preds = inference(type_model, test_dataloader, device)

  0%|          | 0/28 [00:00<?, ?it/s]

In [19]:
polarity_preds = inference(polarity_model, test_dataloader, device)

  0%|          | 0/28 [00:00<?, ?it/s]

In [20]:
tense_preds = inference(tense_model, test_dataloader,device)

  0%|          | 0/28 [00:00<?, ?it/s]

In [21]:
certainty_preds = inference(certainty_model, test_dataloader,device)

  0%|          | 0/28 [00:00<?, ?it/s]

In [25]:
type_preds = type_le.inverse_transform(type_preds)
polarity_preds = polarity_le.inverse_transform(polarity_preds)
tense_preds = tense_le.inverse_transform(tense_preds)
certainty_preds = certainty_le.inverse_transform(certainty_preds)

In [26]:
predictions = []
for type_pred, polarity_pred, tense_pred, certainty_pred in zip(type_preds, polarity_preds, tense_preds, certainty_preds):
    predictions.append(type_pred+'-'+polarity_pred+'-'+tense_pred+'-'+certainty_pred)

In [27]:
submit = pd.read_csv('data/sample_submission.csv')
submit['label'] = predictions

In [28]:
submit.head()

Unnamed: 0,ID,label
0,TEST_0000,사실형-긍정-현재-확실
1,TEST_0001,사실형-긍정-현재-확실
2,TEST_0002,사실형-긍정-과거-확실
3,TEST_0003,사실형-긍정-현재-확실
4,TEST_0004,사실형-긍정-과거-확실


In [None]:
submit.to_csv('./baseline_submit.csv', index=False)