# KoBERT finetuning

### settings

In [None]:
# !pip install ipywidgets  # for vscode
!pip install git+https://git@github.com/SKTBrain/KoBERT.git@master

In [None]:
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import gluonnlp as nlp
import numpy as np
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
import pandas as pd

In [None]:
seed = 0
max_len = 64
batch_size = 260
warmup_ratio = 0.1
num_epochs = 1000
max_grad_norm = 1
log_interval = 200
learning_rate =  5e-5
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
from kobert import get_tokenizer
from kobert import get_pytorch_kobert_model

In [None]:
from transformers import AdamW
from transformers.optimization import get_cosine_schedule_with_warmup

In [None]:
bertmodel, vocab = get_pytorch_kobert_model(cachedir=".cache")

/content/.cache/kobert_v1.zip[██████████████████████████████████████████████████]
/content/.cache/kobert_news_wiki_ko_cased-1087f8699e.spiece[██████████████████████████████████████████████████]


### load data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
label_dict = {"유형":{"사실형":0, "추론형":1, "대화형":2, "예측형":3}, "극성":{"긍정":0, "부정":1, "미정":2}, "시제":{"과거":0, "현재":1, "미래":2}, "확실성":{"확실":0, "불확실":1}}

In [None]:
df = pd.read_csv('/content/drive/MyDrive/kobert/train.csv')

In [None]:
for label in label_dict:
  for i in range(len(df)):
    df[label].iloc[i] = label_dict[label][df[label].iloc[i]]

In [None]:
df

Unnamed: 0,ID,문장,유형,극성,시제,확실성,label
0,TRAIN_00000,0.75%포인트 금리 인상은 1994년 이후 28년 만에 처음이다.,0,0,1,0,사실형-긍정-현재-확실
1,TRAIN_00001,이어 ＂앞으로 전문가들과 함께 4주 단위로 상황을 재평가할 예정＂이라며 ＂그 이전이...,0,0,0,0,사실형-긍정-과거-확실
2,TRAIN_00002,정부가 고유가 대응을 위해 7월부터 연말까지 유류세 인하 폭을 30%에서 37%까지...,0,0,2,0,사실형-긍정-미래-확실
3,TRAIN_00003,"서울시는 올해 3월 즉시 견인 유예시간 60분을 제공하겠다고 밝혔지만, 하루 만에 ...",0,0,0,0,사실형-긍정-과거-확실
4,TRAIN_00004,익사한 자는 사다리에 태워 거꾸로 놓고 소금으로 코를 막아 가득 채운다.,0,0,1,0,사실형-긍정-현재-확실
...,...,...,...,...,...,...,...
16536,TRAIN_16536,"＇신동덤＇은 ＇신비한 동물사전＇과 ＇해리 포터＇ 시리즈를 잇는 마법 어드벤처물로, ...",0,0,0,0,사실형-긍정-과거-확실
16537,TRAIN_16537,"수족냉증은 어릴 때부터 심했으며 관절은 어디 한 곳이 아니고 목, 어깨, 팔꿈치, ...",0,0,0,0,사실형-긍정-과거-확실
16538,TRAIN_16538,김금희 소설가는 ＂계약서 조정이 그리 어려운가 작가를 격려한다면서 그런 문구 하나 ...,0,0,0,0,사실형-긍정-과거-확실
16539,TRAIN_16539,1만명이 넘는 방문자수를 기록한 이번 전시회는 총 77개 작품을 넥슨 사옥을 그대로...,0,0,0,1,사실형-긍정-과거-불확실


In [None]:
train, val, _, _ = train_test_split(df, df['label'], test_size=0.2, random_state=seed, stratify = df["극성"])

In [None]:
print(train["유형"].value_counts(), train["시제"].value_counts(), train["확실성"].value_counts(), sep="\n")

0    10857
1     1715
2      460
3      200
Name: 유형, dtype: int64
0    6402
1    5530
2    1300
Name: 시제, dtype: int64
0    12160
1     1072
Name: 확실성, dtype: int64


In [None]:
print(val["유형"].value_counts(), val["시제"].value_counts(), val["확실성"].value_counts(), sep="\n")

0    2701
1     436
2     115
3      57
Name: 유형, dtype: int64
0    1630
1    1336
2     343
Name: 시제, dtype: int64
0    3032
1     277
Name: 확실성, dtype: int64


### define model

In [None]:
tokenizer = get_tokenizer()
tok = nlp.data.BERTSPTokenizer(tokenizer, vocab, lower=False)

using cached model. /content/.cache/kobert_news_wiki_ko_cased-1087f8699e.spiece


In [None]:
class BERTDataset(Dataset):
    def __init__(self, dataset, bert_tokenizer, max_len,
                 pad, pair, train):
        transform = nlp.data.BERTSentenceTransform(
            bert_tokenizer, max_seq_length=max_len, pad=pad, pair=pair)
        self.train=train
        self.sentences = [transform([i]) for i in dataset["문장"]]
        if train:
          self.type_labels = [np.int32(i) for i in dataset["유형"]]
          self.polarity_labels = [np.int32(i) for i in dataset["극성"]]
          self.tense_labels = [np.int32(i) for i in dataset["시제"]]
          self.certainty_labels = [np.int32(i) for i in dataset["확실성"]]

    def __getitem__(self, i):
        if self.train:
          return (self.sentences[i] + (self.type_labels[i], self.polarity_labels[i], self.tense_labels[i], self.certainty_labels[i]))
        return (self.sentences[i],)

    def __len__(self):
        return (len(self.sentences))


In [None]:
data_train = BERTDataset(train, tok, max_len, True, False, True)
data_val = BERTDataset(val, tok, max_len, True, False, True)

In [None]:
train_dataloader = torch.utils.data.DataLoader(data_train, batch_size=batch_size, shuffle=True, num_workers=0, drop_last=True)
val_dataloader = torch.utils.data.DataLoader(data_val, batch_size=batch_size, shuffle=False, num_workers=0)

In [None]:
class BERTClassifier(nn.Module):
    def __init__(self,
                 bert,
                 hidden_size = 768,
                 dr_rate=None,
                 params=None):
        super(BERTClassifier, self).__init__()
        self.bert = bert
        self.dr_rate = dr_rate
                 
        self.type_classifier = nn.Linear(hidden_size , 4)
        self.polarity_classifier = nn.Linear(hidden_size , 3)
        self.tense_classifier = nn.Linear(hidden_size , 3)
        self.certainty_classifier = nn.Linear(hidden_size , 2)
        if dr_rate:
            self.dropout = nn.Dropout(p=dr_rate)
    
    def gen_attention_mask(self, token_ids, valid_length):
        attention_mask = torch.zeros_like(token_ids)
        for i, v in enumerate(valid_length):
            attention_mask[i][:v] = 1
        return attention_mask.float()

    def forward(self, token_ids, valid_length, segment_ids):
        attention_mask = self.gen_attention_mask(token_ids, valid_length)
        
        _, pooler = self.bert(input_ids = token_ids, token_type_ids = segment_ids.long(), attention_mask = attention_mask.float().to(token_ids.device))
        if self.dr_rate:
            out = self.dropout(pooler)
        else:
            out = pooler
        return self.type_classifier(out), self.polarity_classifier(out), self.tense_classifier(out), self.certainty_classifier(out)

In [None]:
model = BERTClassifier(bertmodel,  dr_rate=0.5).to(device)

In [None]:
# Prepare optimizer and schedule (linear warmup and decay)
no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]

In [None]:
optimizer = AdamW(optimizer_grouped_parameters, lr=learning_rate)
loss_fn = nn.CrossEntropyLoss()

In [None]:
t_total = len(train_dataloader) * num_epochs
warmup_step = int(t_total * warmup_ratio)

In [None]:
scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=warmup_step, num_training_steps=t_total)

### train

In [None]:
def validation(model, val_loader, criterion, device):
    model.eval()
    val_loss = []
    
    type_preds, polarity_preds, tense_preds, certainty_preds = [], [], [], []
    type_labels, polarity_labels, tense_labels, certainty_labels = [], [], [], []
    
    
    with torch.no_grad():
        for batch_id, (token_ids, valid_length, segment_ids, type_label, polarity_label, tense_label, certainty_label) in tqdm(enumerate(val_loader), total=len(val_loader)):
            token_ids = token_ids.long().to(device)
            segment_ids = segment_ids.long().to(device)
            valid_length = valid_length
            type_label = type_label.long().to(device)
            polarity_label = polarity_label.long().to(device)
            tense_label = tense_label.long().to(device)
            certainty_label = certainty_label.long().to(device)
            type_logit, polarity_logit, tense_logit, certainty_logit = model(token_ids, valid_length, segment_ids)
            
            loss = 0.25 * criterion['type'](type_logit, type_label) + \
                        0.25 * criterion['polarity'](polarity_logit, polarity_label) + \
                        0.25 * criterion['tense'](tense_logit, tense_label) + \
                        0.25 * criterion['certainty'](certainty_logit, certainty_label)
            
            val_loss.append(loss.item())
            
            type_preds += type_logit.argmax(1).detach().cpu().numpy().tolist()
            type_labels += type_label.detach().cpu().numpy().tolist()
            
            polarity_preds += polarity_logit.argmax(1).detach().cpu().numpy().tolist()
            polarity_labels += polarity_label.detach().cpu().numpy().tolist()
            
            tense_preds += tense_logit.argmax(1).detach().cpu().numpy().tolist()
            tense_labels += tense_label.detach().cpu().numpy().tolist()
            
            certainty_preds += certainty_logit.argmax(1).detach().cpu().numpy().tolist()
            certainty_labels += certainty_label.detach().cpu().numpy().tolist()
    
    type_f1 = f1_score(type_labels, type_preds, average='weighted')
    polarity_f1 = f1_score(polarity_labels, polarity_preds, average='weighted')
    tense_f1 = f1_score(tense_labels, tense_preds, average='weighted')
    certainty_f1 = f1_score(certainty_labels, certainty_preds, average='weighted')
    
    return np.mean(val_loss), type_f1, polarity_f1, tense_f1, certainty_f1

In [None]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):

  log_df = {"train_losses":[], "val_losses":[], "type_f1s":[], "polarity_f1s":[], "tense_f1s":[], "certainty_f1s":[]}

  for e in range(num_epochs):

      criterion = {
          'type' : nn.CrossEntropyLoss().to(device),
          'polarity' : nn.CrossEntropyLoss().to(device),
          'tense' : nn.CrossEntropyLoss().to(device),
          'certainty' : nn.CrossEntropyLoss().to(device)
      }

      model.train()
      train_loss = []
      for batch_id, (token_ids, valid_length, segment_ids, type_labels, polarity_labels, tense_labels, certainty_labels) in tqdm(enumerate(train_dataloader), total=len(train_dataloader)):
          optimizer.zero_grad()
          token_ids = token_ids.long().to(device)
          segment_ids = segment_ids.long().to(device)
          valid_length = valid_length
          type_labels = type_labels.long().to(device)
          polarity_labels = polarity_labels.long().to(device)
          tense_labels = tense_labels.long().to(device)
          certainty_labels = certainty_labels.long().to(device)
          out = model(token_ids, valid_length, segment_ids)

          loss = 0.25 * criterion['type'](out[0], type_labels) + \
                      0.25 * criterion['polarity'](out[1], polarity_labels) + \
                      0.25 * criterion['tense'](out[2], tense_labels) + \
                      0.25 * criterion['certainty'](out[3], certainty_labels)
          loss.backward()
          torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
          optimizer.step()
          scheduler.step()  # Update learning rate schedule
          train_loss.append(loss.item())

      model.eval()
      val_loss, type_f1, polarity_f1, tense_f1, certainty_f1 = validation(model, val_loader, criterion, device)
      print(f'Epoch : [{e}] Train Loss : [{np.mean(train_loss):.5f}] Val Loss : [{val_loss:.5f}] 유형 F1 : [{type_f1:.5f}] 극성 F1 : [{polarity_f1:.5f}] 시제 F1 : [{tense_f1:.5f}] 확실성 F1 : [{certainty_f1:.5f}]')
      
      log_df["val_losses"].append(val_loss)
      log_df["train_losses"].append(np.mean(train_loss))
      log_df["type_f1s"].append(type_f1)
      log_df["polarity_f1s"].append(polarity_f1)
      log_df["tense_f1s"].append(tense_f1)
      log_df["certainty_f1s"].append(certainty_f1)

      pd.DataFrame(log_df).to_csv('/content/drive/MyDrive/kobert/saved_models/log.csv')
      torch.save(model.state_dict(), f"/content/drive/MyDrive/kobert/saved_models/model_state_dict{str(e+1).zfill(4)}.pth")

  return model

In [None]:
infer_model = train(model, optimizer, train_dataloader, val_dataloader, scheduler, device)

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Epoch : [0] Train Loss : [1.14653] Val Loss : [1.08070] 유형 F1 : [0.19067] 극성 F1 : [0.03026] 시제 F1 : [0.35150] 확실성 F1 : [0.56790]


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Epoch : [1] Train Loss : [1.05357] Val Loss : [0.97436] 유형 F1 : [0.72779] 극성 F1 : [0.12542] 시제 F1 : [0.34128] 확실성 F1 : [0.86965]


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Epoch : [2] Train Loss : [0.95190] Val Loss : [0.87065] 유형 F1 : [0.73217] 극성 F1 : [0.92891] 시제 F1 : [0.45538] 확실성 F1 : [0.87596]


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Epoch : [3] Train Loss : [0.83863] Val Loss : [0.74685] 유형 F1 : [0.73336] 극성 F1 : [0.93253] 시제 F1 : [0.48427] 확실성 F1 : [0.87626]


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Epoch : [4] Train Loss : [0.71071] Val Loss : [0.63102] 유형 F1 : [0.73368] 극성 F1 : [0.93253] 시제 F1 : [0.35552] 확실성 F1 : [0.87626]


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Epoch : [5] Train Loss : [0.60543] Val Loss : [0.55231] 유형 F1 : [0.73368] 극성 F1 : [0.93253] 시제 F1 : [0.52658] 확실성 F1 : [0.87626]


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Epoch : [6] Train Loss : [0.53601] Val Loss : [0.48570] 유형 F1 : [0.74022] 극성 F1 : [0.93253] 시제 F1 : [0.74217] 확실성 F1 : [0.87626]


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Epoch : [7] Train Loss : [0.46453] Val Loss : [0.41738] 유형 F1 : [0.79502] 극성 F1 : [0.93253] 시제 F1 : [0.77493] 확실성 F1 : [0.87626]


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Epoch : [8] Train Loss : [0.41023] Val Loss : [0.37733] 유형 F1 : [0.80397] 극성 F1 : [0.93253] 시제 F1 : [0.78877] 확실성 F1 : [0.87626]


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Epoch : [9] Train Loss : [0.37213] Val Loss : [0.34621] 유형 F1 : [0.80532] 극성 F1 : [0.93253] 시제 F1 : [0.82688] 확실성 F1 : [0.89177]


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Epoch : [10] Train Loss : [0.34101] Val Loss : [0.32172] 유형 F1 : [0.81051] 극성 F1 : [0.93253] 시제 F1 : [0.85935] 확실성 F1 : [0.91627]


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Epoch : [11] Train Loss : [0.31633] Val Loss : [0.29891] 유형 F1 : [0.83108] 극성 F1 : [0.93253] 시제 F1 : [0.87179] 확실성 F1 : [0.92197]


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Epoch : [12] Train Loss : [0.29258] Val Loss : [0.28132] 유형 F1 : [0.86034] 극성 F1 : [0.93266] 시제 F1 : [0.87575] 확실성 F1 : [0.92402]


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Epoch : [13] Train Loss : [0.27161] Val Loss : [0.26473] 유형 F1 : [0.87321] 극성 F1 : [0.94589] 시제 F1 : [0.88156] 확실성 F1 : [0.92557]


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Epoch : [14] Train Loss : [0.25311] Val Loss : [0.25714] 유형 F1 : [0.88023] 극성 F1 : [0.96370] 시제 F1 : [0.88749] 확실성 F1 : [0.92857]


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Epoch : [15] Train Loss : [0.23847] Val Loss : [0.24573] 유형 F1 : [0.87962] 극성 F1 : [0.96604] 시제 F1 : [0.88827] 확실성 F1 : [0.92890]


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Epoch : [16] Train Loss : [0.22438] Val Loss : [0.24301] 유형 F1 : [0.88124] 극성 F1 : [0.96648] 시제 F1 : [0.88332] 확실성 F1 : [0.93247]


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Epoch : [17] Train Loss : [0.21124] Val Loss : [0.23813] 유형 F1 : [0.88488] 극성 F1 : [0.96977] 시제 F1 : [0.88749] 확실성 F1 : [0.93038]


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Epoch : [18] Train Loss : [0.19836] Val Loss : [0.23947] 유형 F1 : [0.88408] 극성 F1 : [0.97083] 시제 F1 : [0.88113] 확실성 F1 : [0.93029]


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Epoch : [19] Train Loss : [0.18467] Val Loss : [0.24566] 유형 F1 : [0.87812] 극성 F1 : [0.97405] 시제 F1 : [0.88327] 확실성 F1 : [0.93049]


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Epoch : [20] Train Loss : [0.17250] Val Loss : [0.24437] 유형 F1 : [0.88066] 극성 F1 : [0.97319] 시제 F1 : [0.87898] 확실성 F1 : [0.93102]


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Epoch : [21] Train Loss : [0.16383] Val Loss : [0.24692] 유형 F1 : [0.88367] 극성 F1 : [0.97269] 시제 F1 : [0.88038] 확실성 F1 : [0.92873]


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Epoch : [22] Train Loss : [0.15053] Val Loss : [0.24212] 유형 F1 : [0.88236] 극성 F1 : [0.96963] 시제 F1 : [0.88144] 확실성 F1 : [0.92869]


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Epoch : [23] Train Loss : [0.14202] Val Loss : [0.24650] 유형 F1 : [0.88346] 극성 F1 : [0.97349] 시제 F1 : [0.87941] 확실성 F1 : [0.92922]


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Epoch : [24] Train Loss : [0.12875] Val Loss : [0.25007] 유형 F1 : [0.87814] 극성 F1 : [0.97138] 시제 F1 : [0.87680] 확실성 F1 : [0.92722]


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Epoch : [25] Train Loss : [0.11855] Val Loss : [0.25407] 유형 F1 : [0.87933] 극성 F1 : [0.97306] 시제 F1 : [0.87816] 확실성 F1 : [0.92582]


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Epoch : [26] Train Loss : [0.10874] Val Loss : [0.26548] 유형 F1 : [0.88078] 극성 F1 : [0.97497] 시제 F1 : [0.87983] 확실성 F1 : [0.92766]


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Epoch : [27] Train Loss : [0.10148] Val Loss : [0.26450] 유형 F1 : [0.87964] 극성 F1 : [0.97504] 시제 F1 : [0.87565] 확실성 F1 : [0.92439]


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Epoch : [28] Train Loss : [0.09288] Val Loss : [0.27499] 유형 F1 : [0.87404] 극성 F1 : [0.97289] 시제 F1 : [0.87775] 확실성 F1 : [0.92476]


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Epoch : [29] Train Loss : [0.08558] Val Loss : [0.28308] 유형 F1 : [0.87796] 극성 F1 : [0.97666] 시제 F1 : [0.87967] 확실성 F1 : [0.92177]


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Epoch : [30] Train Loss : [0.07658] Val Loss : [0.28231] 유형 F1 : [0.87850] 극성 F1 : [0.97392] 시제 F1 : [0.87904] 확실성 F1 : [0.92522]


  0%|          | 0/50 [00:00<?, ?it/s]

KeyboardInterrupt: ignored

## Inference

In [None]:
pretrained_dict = torch.load(r'/content/drive/MyDrive/kobert/saved_models/model_state_dict0018.pth')
model.load_state_dict(pretrained_dict)
infer_model = model

In [None]:
data_test = BERTDataset(val, tok, max_len, True, False, train=False)
test_dataloader = torch.utils.data.DataLoader(data_test, batch_size=batch_size, shuffle=False, num_workers=0)

# test = pd.read_csv('/content/drive/MyDrive/kobert/test.csv')

# data_test = BERTDataset(test, tok, max_len, True, False, train=False)
# test_dataloader = torch.utils.data.DataLoader(data_test, batch_size=batch_size, shuffle=False, num_workers=0)

In [None]:
def inference(infer_model, test_dataloader, device):
    infer_model.to(device)
    infer_model.eval()
    
    type_preds, polarity_preds, tense_preds, certainty_preds = [], [], [], []
    
    with torch.no_grad():
      for batch_id, i in tqdm(enumerate(test_dataloader), total=len(test_dataloader)):
        token_ids, valid_length, segment_ids = i[0][0], i[0][1], i[0][2]
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        type_logit, polarity_logit, tense_logit, certainty_logit = model(token_ids, valid_length, segment_ids)
        
        type_preds += type_logit.argmax(1).detach().cpu().numpy().tolist()
        polarity_preds += polarity_logit.argmax(1).detach().cpu().numpy().tolist()
        tense_preds += tense_logit.argmax(1).detach().cpu().numpy().tolist()
        certainty_preds += certainty_logit.argmax(1).detach().cpu().numpy().tolist()
        
    return type_preds, polarity_preds, tense_preds, certainty_preds

In [None]:
type_preds, polarity_preds, tense_preds, certainty_preds = inference(infer_model, test_dataloader, device)

  0%|          | 0/13 [00:00<?, ?it/s]

In [None]:
label_dict = {"유형":["사실형", "추론형", "대화형", "예측형"], "극성":["긍정", "부정", "미정"], "시제":["과거", "현재", "미래"], "확실성":["확실", "불확실"]}

In [None]:
type_preds = [label_dict["유형"][x] for x in type_preds]
polarity_preds = [label_dict["극성"][x] for x in polarity_preds]
tense_preds = [label_dict["시제"][x] for x in tense_preds]
certainty_preds = [label_dict["확실성"][x] for x in certainty_preds]

In [None]:
predictions = []
for type_pred, polarity_pred, tense_pred, certainty_pred in zip(type_preds, polarity_preds, tense_preds, certainty_preds):
    predictions.append(type_pred+'-'+polarity_pred+'-'+tense_pred+'-'+certainty_pred)

In [None]:
# print(len(val["문장"]), len(polarity_preds))
# pd.DataFrame({"sentence":val["문장"], "유형":[label_dict["유형"][x] for x in val["유형"]], "극성":[label_dict["극성"][x] for x in val["극성"]], "시제":[label_dict["시제"][x] for x in val["시제"]], "확실성":[label_dict["확실성"][x] for x in val["확실성"]], "type_preds":type_preds, "polarity_preds":polarity_preds, "tense_preds":tense_preds, "certainty_preds":certainty_preds, "predictions":predictions}).to_csv("./prediction.csv")

## Submission

In [None]:
submit = pd.read_csv('/content/drive/MyDrive/kobert/sample_submission.csv')
submit['label'] = predictions

ValueError: ignored

In [None]:
submit.head()

In [None]:
submit.to_csv('/content/drive/MyDrive/kobert/submissions/submission.csv', index=False)