
# Pytorch + HuggingFace 
## KoElectra Model
박장원님의 KoElectra-small 사용<br>
https://monologg.kr/2020/05/02/koelectra-part1/<br>
https://github.com/monologg/KoELECTRA

## References
- https://huggingface.co/transformers/training.html
- https://tutorials.pytorch.kr/beginner/data_loading_tutorial.html
- https://tutorials.pytorch.kr/beginner/blitz/cifar10_tutorial.html
- https://wikidocs.net/44249

### setting

In [1]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.25.1-py3-none-any.whl (5.8 MB)
[K     |████████████████████████████████| 5.8 MB 17.6 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 52.9 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 69.1 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.11.1 tokenizers-0.13.2 transformers-4.25.1


In [2]:
import pandas as pd
import torch
from torch.nn import functional as F
from torch.utils.data import DataLoader, Dataset
from transformers import AutoTokenizer, ElectraForSequenceClassification, AdamW
from tqdm.notebook import tqdm
import numpy as np
from torch import nn
from sklearn.metrics import f1_score
import random
import os

In [3]:
epochs = 1000
batch_size = 150
learning_rate = 5e-6
seed = 0

In [4]:
# GPU 사용
device = 'cuda' if torch.cuda.is_available() else 'cpu'
max_grad_norm = 1

In [5]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(seed) # Seed 고정

### Dataset 만들어서 불러오기 

In [6]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
class ElectraDataset(Dataset):
  
  def __init__(self, csv_file, train):
    self.dataset = pd.read_csv(csv_file)
    self.tokenizer = AutoTokenizer.from_pretrained("monologg/koelectra-small-v2-discriminator")
    self.sentences = self.dataset["문장"].tolist()
    self.train = train
    if self.train:
      self.type_labels = [np.int32(i) for i in self.dataset["유형"]]
      self.polarity_labels = [np.int32(i) for i in self.dataset["극성"]]
      self.tense_labels = [np.int32(i) for i in self.dataset["시제"]]
      self.certainty_labels = [np.int32(i) for i in self.dataset["확실성"]]

  def __len__(self):
    return len(self.dataset)
  
  def __getitem__(self, idx):
    
      inputs = self.tokenizer(
          self.sentences[idx], 
          return_tensors='pt',
          truncation=True,
          max_length=256,
          pad_to_max_length=True,
          add_special_tokens=True
          )
      input_ids = inputs['input_ids'][0]
      attention_mask = inputs['attention_mask'][0]
      if self.train:
        return input_ids, attention_mask, self.type_labels[idx], self.polarity_labels[idx], self.tense_labels[idx], self.certainty_labels[idx]
      return input_ids, attention_mask

In [8]:
train_dataset = ElectraDataset("/content/drive/MyDrive/NLP/trainset/trian_split_by_polarity.csv", True)
val_dataset = ElectraDataset("/content/drive/MyDrive/NLP/trainset/validation_split_by_polarity.csv", True)
# train_dataset = ElectraDataset("/content/drive/MyDrive/NLP/trainset/trian_split_by_type.csv", True)
# val_dataset = ElectraDataset("/content/drive/MyDrive/NLP/trainset/validation_split_by_type.csv", True)

Downloading:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/486 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/255k [00:00<?, ?B/s]

### Create Model

In [9]:
electra = ElectraForSequenceClassification.from_pretrained("monologg/koelectra-small-v2-discriminator")

# 한번 실행해보기
# text, attention_mask, y1, y2, y3, y4 = train_dataset[0]
# model(text.unsqueeze(0).to(device), attention_mask=attention_mask.unsqueeze(0).to(device))

Downloading:   0%|          | 0.00/55.1M [00:00<?, ?B/s]

Some weights of the model checkpoint at monologg/koelectra-small-v2-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-small-v2-discriminator and are newly initialized

In [10]:
# model.load_state_dict(torch.load("model.pt"))

In [11]:
# 모델 레이어 보기
# electra

In [12]:
electra.classifier = torch.nn.Sequential(*(list(electra.classifier.children())[:-1]))

In [13]:
electra.to(device)

ElectraForSequenceClassification(
  (electra): ElectraModel(
    (embeddings): ElectraEmbeddings(
      (word_embeddings): Embedding(32200, 128, padding_idx=0)
      (position_embeddings): Embedding(512, 128)
      (token_type_embeddings): Embedding(2, 128)
      (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (embeddings_project): Linear(in_features=128, out_features=256, bias=True)
    (encoder): ElectraEncoder(
      (layer): ModuleList(
        (0): ElectraLayer(
          (attention): ElectraAttention(
            (self): ElectraSelfAttention(
              (query): Linear(in_features=256, out_features=256, bias=True)
              (key): Linear(in_features=256, out_features=256, bias=True)
              (value): Linear(in_features=256, out_features=256, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): ElectraSelfOutput(
              (dense): Linear(in_

In [14]:
# text, attention_mask, y1, y2, y3, y4 = train_dataset[0]
# electra(text.unsqueeze(0).to(device), attention_mask=attention_mask.unsqueeze(0).to(device))

In [15]:
class ElectraClassifier(nn.Module):
    def __init__(self, electra):
        super(ElectraClassifier, self).__init__()
        self.electra = electra
        self.type_classifier = nn.Linear(65536 , 4)
        self.polarity_classifier = nn.Linear(65536 , 3)
        self.tense_classifier = nn.Linear(65536 , 3)
        self.certainty_classifier = nn.Linear(65536 , 2)
  
    def forward(self, input_ids, attention_mask):
        out = torch.flatten(self.electra(input_ids, attention_mask = attention_mask).logits, start_dim=1).to(device)
        return (self.type_classifier(out), self.polarity_classifier(out), self.tense_classifier(out), self.certainty_classifier(out))

In [16]:
model = ElectraClassifier(electra)

In [None]:
model.to(device)

In [18]:
class FocalLoss(nn.Module):
    def __init__(self, gamma=2, alpha=0.25):
        super(FocalLoss, self).__init__()
        self.loss_fn = nn.BCEWithLogitsLoss()
        self.gamma = gamma
        self.alpha = alpha

    def forward(self, pred, true):
        bceloss = self.loss_fn(pred, true.float())
        pred_prob = torch.sigmoid(pred)  # p  pt는 p가 true 이면 pt = p / false 이면 pt = 1 - p
        alpha_factor = true * self.alpha + (1-true) * (1 - self.alpha)  # add balance
        modulating_factor = torch.abs(true - pred_prob) ** self.gamma  # focal term
        loss = alpha_factor * modulating_factor * bceloss  # bceloss에 이미 음수가 들어가 있음

        return loss.mean()

### Learn

In [19]:
optimizer = AdamW(model.parameters(), lr=learning_rate)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)



In [20]:
def validation(model, val_loader, criterion, device):
    model.eval()
    val_loss = []
    
    type_preds, polarity_preds, tense_preds, certainty_preds = [], [], [], []
    type_labels, polarity_labels, tense_labels, certainty_labels = [], [], [], []
    
    
    with torch.no_grad():
      for input_ids_batch, attention_masks_batch, type_label, polarity_label, tense_label, certainty_label in tqdm(val_loader):
        type_label = type_label.long().to(device)
        polarity_label = polarity_label.long().to(device)
        tense_label = tense_label.long().to(device)
        certainty_label = certainty_label.long().to(device)
        type_logit, polarity_logit, tense_logit, certainty_logit = model(input_ids_batch.to(device), attention_masks_batch.to(device))
        loss = 0.25 * criterion['type'](type_logit, type_label) + \
                    0.25 * criterion['polarity'](polarity_logit, polarity_label) + \
                    0.25 * criterion['tense'](tense_logit, tense_label) + \
                    0.25 * criterion['certainty'](certainty_logit, certainty_label)
          
        val_loss.append(loss.item())
        
        type_preds += type_logit.argmax(1).detach().cpu().numpy().tolist()
        type_labels += type_label.detach().cpu().numpy().tolist()
        
        polarity_preds += polarity_logit.argmax(1).detach().cpu().numpy().tolist()
        polarity_labels += polarity_label.detach().cpu().numpy().tolist()
        
        tense_preds += tense_logit.argmax(1).detach().cpu().numpy().tolist()
        tense_labels += tense_label.detach().cpu().numpy().tolist()
        
        certainty_preds += certainty_logit.argmax(1).detach().cpu().numpy().tolist()
        certainty_labels += certainty_label.detach().cpu().numpy().tolist()
    type_f1 = f1_score(type_labels, type_preds, average='weighted')
    polarity_f1 = f1_score(polarity_labels, polarity_preds, average='weighted')
    tense_f1 = f1_score(tense_labels, tense_preds, average='weighted')
    certainty_f1 = f1_score(certainty_labels, certainty_preds, average='weighted')
    
    return np.mean(val_loss), type_f1, polarity_f1, tense_f1, certainty_f1

In [21]:
def train(model, optimizer, train_loader, val_loader, device):
  log_df = {"train_losses":[], "val_losses":[], "type_f1s":[], "polarity_f1s":[], "tense_f1s":[], "certainty_f1s":[]}

  criterion = {
      'type' : nn.CrossEntropyLoss().to(device),
      'polarity' : nn.CrossEntropyLoss().to(device),
      'tense' : nn.CrossEntropyLoss().to(device),
      'certainty' : nn.CrossEntropyLoss().to(device)
  }
  
  for e in range(epochs):
    train_loss = []
    for input_ids_batch, attention_masks_batch, type_label, polarity_label, tense_label, certainty_label in tqdm(train_loader):
      model.train()
      optimizer.zero_grad()
      type_label = type_label.long().to(device)
      polarity_label = polarity_label.long().to(device)
      tense_label = tense_label.long().to(device)
      certainty_label = certainty_label.long().to(device)
      type_logit, polarity_logit, tense_logit, certainty_logit = model(input_ids_batch.to(device), attention_masks_batch.to(device))

      loss = 0.25 * criterion['type'](type_logit, type_label) + \
                  0.25 * criterion['polarity'](polarity_logit, polarity_label) + \
                  0.25 * criterion['tense'](tense_logit, tense_label) + \
                  0.25 * criterion['certainty'](certainty_logit, certainty_label)

      loss.backward()
      torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
      optimizer.step()
      train_loss.append(loss.item())

    model.eval()
    val_loss, type_f1, polarity_f1, tense_f1, certainty_f1 = validation(model, val_loader, criterion, device)
    print(f'Epoch : [{e+1}] Train Loss : [{np.mean(train_loss):.5f}] Val Loss : [{val_loss:.5f}] 유형 F1 : [{type_f1:.5f}] 극성 F1 : [{polarity_f1:.5f}] 시제 F1 : [{tense_f1:.5f}] 확실성 F1 : [{certainty_f1:.5f}]')
    
    log_df["val_losses"].append(val_loss)
    log_df["train_losses"].append(np.mean(train_loss))
    log_df["type_f1s"].append(type_f1)
    log_df["polarity_f1s"].append(polarity_f1)
    log_df["tense_f1s"].append(tense_f1)
    log_df["certainty_f1s"].append(certainty_f1)

    pd.DataFrame(log_df).to_csv('/content/drive/MyDrive/NLP/saved_models/log.csv')
    torch.save(model.state_dict(), f"/content/drive/MyDrive/NLP/saved_models/model_state_dict{str(e+1).zfill(4)}.pth")

In [22]:
# def validation(model, val_loader, criterion, device):
#     model.eval()
#     val_loss = []
    
#     type_preds, polarity_preds, tense_preds, certainty_preds = [], [], [], []
#     type_labels, polarity_labels, tense_labels, certainty_labels = [], [], [], []
    
    
#     with torch.no_grad():
#       for input_ids_batch, attention_masks_batch, type_label, polarity_label, tense_label, certainty_label in tqdm(val_loader):
#         type_label_hot = F.one_hot(type_label.to(torch.int64), num_classes=4).float().to(device)
#         polarity_label_hot = F.one_hot(polarity_label.to(torch.int64), num_classes=3).float().to(device)
#         tense_label_hot = F.one_hot(tense_label.to(torch.int64), num_classes=3).float().to(device)
#         certainty_label_hot = F.one_hot(certainty_label.to(torch.int64), num_classes=2).float().to(device) 
#         type_logit, polarity_logit, tense_logit, certainty_logit = model(input_ids_batch.to(device), attention_masks_batch.to(device))
#         loss = 0.25 * criterion['type'](type_logit, type_label_hot) + \
#                     0.25 * criterion['polarity'](polarity_logit, polarity_label_hot) + \
#                     0.25 * criterion['tense'](tense_logit, tense_label_hot) + \
#                     0.25 * criterion['certainty'](certainty_logit, certainty_label_hot)
          
#         val_loss.append(loss.item())

#         type_preds += type_logit.argmax(1).detach().cpu().numpy().tolist()
#         type_labels += type_label.detach().cpu().numpy().tolist()

#         polarity_preds += polarity_logit.argmax(1).detach().cpu().numpy().tolist()
#         polarity_labels += polarity_label.detach().cpu().numpy().tolist()
        
#         tense_preds += tense_logit.argmax(1).detach().cpu().numpy().tolist()
#         tense_labels += tense_label.detach().cpu().numpy().tolist()
        
#         certainty_preds += certainty_logit.argmax(1).detach().cpu().numpy().tolist()
#         certainty_labels += certainty_label.detach().cpu().numpy().tolist()

#     type_f1 = f1_score(type_labels, type_preds, average='weighted')
#     polarity_f1 = f1_score(polarity_labels, polarity_preds, average='weighted')
#     tense_f1 = f1_score(tense_labels, tense_preds, average='weighted')
#     certainty_f1 = f1_score(certainty_labels, certainty_preds, average='weighted')
    
#     return np.mean(val_loss), type_f1, polarity_f1, tense_f1, certainty_f1

In [23]:
# def train(model, optimizer, train_loader, val_loader, device):
#   log_df = {"train_losses":[], "val_losses":[], "type_f1s":[], "polarity_f1s":[], "tense_f1s":[], "certainty_f1s":[]}

#   criterion = {
#       'type' : FocalLoss().to(device),
#       'polarity' : FocalLoss().to(device),
#       'tense' : FocalLoss().to(device),
#       'certainty' : FocalLoss().to(device)
#   }

#   for e in range(epochs):
#     train_loss = []
#     for input_ids_batch, attention_masks_batch, type_label, polarity_label, tense_label, certainty_label in tqdm(train_loader):
#       model.train()
#       optimizer.zero_grad()
      
#       type_label = F.one_hot(type_label.to(torch.int64), num_classes=4).float().to(device)
#       polarity_label = F.one_hot(polarity_label.to(torch.int64), num_classes=3).float().to(device)
#       tense_label = F.one_hot(tense_label.to(torch.int64), num_classes=3).float().to(device)
#       certainty_label = F.one_hot(certainty_label.to(torch.int64), num_classes=2).float().to(device) 
#       type_logit, polarity_logit, tense_logit, certainty_logit = model(input_ids_batch.to(device), attention_masks_batch.to(device))

#       loss = 0.25 * criterion['type'](type_logit, type_label) + \
#                   0.25 * criterion['polarity'](polarity_logit, polarity_label) + \
#                   0.25 * criterion['tense'](tense_logit, tense_label) + \
#                   0.25 * criterion['certainty'](certainty_logit, certainty_label)

#       loss.backward()
#       torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
#       optimizer.step()
#       train_loss.append(loss.item())
      
#     model.eval()
#     val_loss, type_f1, polarity_f1, tense_f1, certainty_f1 = validation(model, val_loader, criterion, device)
#     print(f'Epoch : [{e+1}] Train Loss : [{np.mean(train_loss):.5f}] Val Loss : [{val_loss:.5f}] 유형 F1 : [{type_f1:.5f}] 극성 F1 : [{polarity_f1:.5f}] 시제 F1 : [{tense_f1:.5f}] 확실성 F1 : [{certainty_f1:.5f}]')
    
#     log_df["val_losses"].append(val_loss)
#     log_df["train_losses"].append(np.mean(train_loss))
#     log_df["type_f1s"].append(type_f1)
#     log_df["polarity_f1s"].append(polarity_f1)
#     log_df["tense_f1s"].append(tense_f1)
#     log_df["certainty_f1s"].append(certainty_f1)

#     pd.DataFrame(log_df).to_csv('/content/drive/MyDrive/NLP/saved_models/log.csv')
#     torch.save(model.state_dict(), f"/content/drive/MyDrive/NLP/saved_models/model_state_dict{str(e+1).zfill(4)}.pth")

In [24]:
model = train(model, optimizer, train_loader, val_loader, device)

  0%|          | 0/89 [00:00<?, ?it/s]



  0%|          | 0/23 [00:00<?, ?it/s]

Epoch : [1] Train Loss : [0.57696] Val Loss : [0.46938] 유형 F1 : [0.73368] 극성 F1 : [0.93253] 시제 F1 : [0.63793] 확실성 F1 : [0.87626]


  0%|          | 0/89 [00:00<?, ?it/s]



  0%|          | 0/23 [00:00<?, ?it/s]

Epoch : [2] Train Loss : [0.38417] Val Loss : [0.31053] 유형 F1 : [0.77299] 극성 F1 : [0.93253] 시제 F1 : [0.86228] 확실성 F1 : [0.87808]


  0%|          | 0/89 [00:00<?, ?it/s]



  0%|          | 0/23 [00:00<?, ?it/s]

Epoch : [3] Train Loss : [0.28913] Val Loss : [0.27233] 유형 F1 : [0.82747] 극성 F1 : [0.93382] 시제 F1 : [0.88619] 확실성 F1 : [0.91298]


  0%|          | 0/89 [00:00<?, ?it/s]



  0%|          | 0/23 [00:00<?, ?it/s]

Epoch : [4] Train Loss : [0.25369] Val Loss : [0.24212] 유형 F1 : [0.85891] 극성 F1 : [0.94585] 시제 F1 : [0.88703] 확실성 F1 : [0.92146]


  0%|          | 0/89 [00:00<?, ?it/s]



  0%|          | 0/23 [00:00<?, ?it/s]

Epoch : [5] Train Loss : [0.23520] Val Loss : [0.24101] 유형 F1 : [0.86453] 극성 F1 : [0.95556] 시제 F1 : [0.89247] 확실성 F1 : [0.92590]


  0%|          | 0/89 [00:00<?, ?it/s]



  0%|          | 0/23 [00:00<?, ?it/s]

Epoch : [6] Train Loss : [0.22400] Val Loss : [0.21765] 유형 F1 : [0.86680] 극성 F1 : [0.96447] 시제 F1 : [0.89421] 확실성 F1 : [0.92549]


  0%|          | 0/89 [00:00<?, ?it/s]



  0%|          | 0/23 [00:00<?, ?it/s]

Epoch : [7] Train Loss : [0.21453] Val Loss : [0.21944] 유형 F1 : [0.87415] 극성 F1 : [0.96520] 시제 F1 : [0.89740] 확실성 F1 : [0.92909]


  0%|          | 0/89 [00:00<?, ?it/s]



  0%|          | 0/23 [00:00<?, ?it/s]

Epoch : [8] Train Loss : [0.20846] Val Loss : [0.21245] 유형 F1 : [0.87771] 극성 F1 : [0.97000] 시제 F1 : [0.89820] 확실성 F1 : [0.93141]


  0%|          | 0/89 [00:00<?, ?it/s]



  0%|          | 0/23 [00:00<?, ?it/s]

Epoch : [9] Train Loss : [0.20220] Val Loss : [0.20641] 유형 F1 : [0.87527] 극성 F1 : [0.97248] 시제 F1 : [0.90060] 확실성 F1 : [0.93231]


  0%|          | 0/89 [00:00<?, ?it/s]



  0%|          | 0/23 [00:00<?, ?it/s]

Epoch : [10] Train Loss : [0.19816] Val Loss : [0.20710] 유형 F1 : [0.87597] 극성 F1 : [0.97322] 시제 F1 : [0.89880] 확실성 F1 : [0.93185]


  0%|          | 0/89 [00:00<?, ?it/s]



  0%|          | 0/23 [00:00<?, ?it/s]

Epoch : [11] Train Loss : [0.19560] Val Loss : [0.20853] 유형 F1 : [0.88540] 극성 F1 : [0.97402] 시제 F1 : [0.89854] 확실성 F1 : [0.93174]


  0%|          | 0/89 [00:00<?, ?it/s]



  0%|          | 0/23 [00:00<?, ?it/s]

Epoch : [12] Train Loss : [0.19174] Val Loss : [0.20489] 유형 F1 : [0.88192] 극성 F1 : [0.97569] 시제 F1 : [0.90114] 확실성 F1 : [0.93152]


  0%|          | 0/89 [00:00<?, ?it/s]



  0%|          | 0/23 [00:00<?, ?it/s]

Epoch : [13] Train Loss : [0.18856] Val Loss : [0.20000] 유형 F1 : [0.88293] 극성 F1 : [0.97368] 시제 F1 : [0.90230] 확실성 F1 : [0.93212]


  0%|          | 0/89 [00:00<?, ?it/s]



  0%|          | 0/23 [00:00<?, ?it/s]

Epoch : [14] Train Loss : [0.18602] Val Loss : [0.20450] 유형 F1 : [0.87994] 극성 F1 : [0.97626] 시제 F1 : [0.90007] 확실성 F1 : [0.93091]


  0%|          | 0/89 [00:00<?, ?it/s]



  0%|          | 0/23 [00:00<?, ?it/s]

Epoch : [15] Train Loss : [0.18322] Val Loss : [0.19856] 유형 F1 : [0.88060] 극성 F1 : [0.97513] 시제 F1 : [0.90183] 확실성 F1 : [0.93085]


  0%|          | 0/89 [00:00<?, ?it/s]



  0%|          | 0/23 [00:00<?, ?it/s]

Epoch : [16] Train Loss : [0.18101] Val Loss : [0.20004] 유형 F1 : [0.88750] 극성 F1 : [0.97540] 시제 F1 : [0.90227] 확실성 F1 : [0.93395]


  0%|          | 0/89 [00:00<?, ?it/s]



  0%|          | 0/23 [00:00<?, ?it/s]

Epoch : [17] Train Loss : [0.17922] Val Loss : [0.19596] 유형 F1 : [0.88448] 극성 F1 : [0.97550] 시제 F1 : [0.90315] 확실성 F1 : [0.93124]


  0%|          | 0/89 [00:00<?, ?it/s]



  0%|          | 0/23 [00:00<?, ?it/s]

Epoch : [18] Train Loss : [0.17691] Val Loss : [0.20622] 유형 F1 : [0.88326] 극성 F1 : [0.97707] 시제 F1 : [0.90333] 확실성 F1 : [0.93014]


  0%|          | 0/89 [00:00<?, ?it/s]



  0%|          | 0/23 [00:00<?, ?it/s]

Epoch : [19] Train Loss : [0.17289] Val Loss : [0.19774] 유형 F1 : [0.88543] 극성 F1 : [0.97591] 시제 F1 : [0.90283] 확실성 F1 : [0.93027]


  0%|          | 0/89 [00:00<?, ?it/s]



  0%|          | 0/23 [00:00<?, ?it/s]

Epoch : [20] Train Loss : [0.17132] Val Loss : [0.20060] 유형 F1 : [0.88754] 극성 F1 : [0.97650] 시제 F1 : [0.90427] 확실성 F1 : [0.92803]


  0%|          | 0/89 [00:00<?, ?it/s]



  0%|          | 0/23 [00:00<?, ?it/s]

Epoch : [21] Train Loss : [0.17014] Val Loss : [0.20532] 유형 F1 : [0.88809] 극성 F1 : [0.97591] 시제 F1 : [0.90320] 확실성 F1 : [0.93227]


  0%|          | 0/89 [00:00<?, ?it/s]



  0%|          | 0/23 [00:00<?, ?it/s]

Epoch : [22] Train Loss : [0.16751] Val Loss : [0.20585] 유형 F1 : [0.88521] 극성 F1 : [0.97600] 시제 F1 : [0.90109] 확실성 F1 : [0.92938]


  0%|          | 0/89 [00:00<?, ?it/s]



  0%|          | 0/23 [00:00<?, ?it/s]

Epoch : [23] Train Loss : [0.16413] Val Loss : [0.20290] 유형 F1 : [0.88779] 극성 F1 : [0.97744] 시제 F1 : [0.90298] 확실성 F1 : [0.92884]


  0%|          | 0/89 [00:00<?, ?it/s]



  0%|          | 0/23 [00:00<?, ?it/s]

Epoch : [24] Train Loss : [0.16219] Val Loss : [0.21315] 유형 F1 : [0.88706] 극성 F1 : [0.97835] 시제 F1 : [0.90302] 확실성 F1 : [0.92725]


  0%|          | 0/89 [00:00<?, ?it/s]



  0%|          | 0/23 [00:00<?, ?it/s]

Epoch : [25] Train Loss : [0.16096] Val Loss : [0.20266] 유형 F1 : [0.88402] 극성 F1 : [0.97768] 시제 F1 : [0.90341] 확실성 F1 : [0.92966]


  0%|          | 0/89 [00:00<?, ?it/s]



  0%|          | 0/23 [00:00<?, ?it/s]

Epoch : [26] Train Loss : [0.15833] Val Loss : [0.20720] 유형 F1 : [0.89095] 극성 F1 : [0.97776] 시제 F1 : [0.90080] 확실성 F1 : [0.92774]


  0%|          | 0/89 [00:00<?, ?it/s]



  0%|          | 0/23 [00:00<?, ?it/s]

Epoch : [27] Train Loss : [0.15578] Val Loss : [0.20914] 유형 F1 : [0.88925] 극성 F1 : [0.97684] 시제 F1 : [0.90415] 확실성 F1 : [0.92979]


  0%|          | 0/89 [00:00<?, ?it/s]



  0%|          | 0/23 [00:00<?, ?it/s]

Epoch : [28] Train Loss : [0.15459] Val Loss : [0.20078] 유형 F1 : [0.88858] 극성 F1 : [0.97748] 시제 F1 : [0.90236] 확실성 F1 : [0.92959]


  0%|          | 0/89 [00:00<?, ?it/s]



  0%|          | 0/23 [00:00<?, ?it/s]

Epoch : [29] Train Loss : [0.15279] Val Loss : [0.20777] 유형 F1 : [0.88780] 극성 F1 : [0.97667] 시제 F1 : [0.90010] 확실성 F1 : [0.92918]


  0%|          | 0/89 [00:00<?, ?it/s]



  0%|          | 0/23 [00:00<?, ?it/s]

Epoch : [30] Train Loss : [0.15187] Val Loss : [0.20973] 유형 F1 : [0.88785] 극성 F1 : [0.97655] 시제 F1 : [0.90229] 확실성 F1 : [0.92992]


  0%|          | 0/89 [00:00<?, ?it/s]



  0%|          | 0/23 [00:00<?, ?it/s]

Epoch : [31] Train Loss : [0.14787] Val Loss : [0.21944] 유형 F1 : [0.88414] 극성 F1 : [0.97618] 시제 F1 : [0.90237] 확실성 F1 : [0.92893]


  0%|          | 0/89 [00:00<?, ?it/s]



  0%|          | 0/23 [00:00<?, ?it/s]

Epoch : [32] Train Loss : [0.14585] Val Loss : [0.21142] 유형 F1 : [0.88676] 극성 F1 : [0.97635] 시제 F1 : [0.90166] 확실성 F1 : [0.92819]


  0%|          | 0/89 [00:00<?, ?it/s]



KeyboardInterrupt: ignored

### Inference

In [None]:
pretrained_dict = torch.load(r'/content/drive/MyDrive/NLP/saved_models/model_state_dict0038.pth')
model.load_state_dict(pretrained_dict)
infer_model = model

In [None]:
data_test = ElectraDataset("/content/drive/MyDrive/NLP/trainset/test.csv", False)
test_dataloader = DataLoader(data_test, batch_size=batch_size)

In [None]:
len(data_test)

7090

In [None]:
def inference(infer_model, test_dataloader, device):
    infer_model.to(device)
    infer_model.eval()
    
    type_preds, polarity_preds, tense_preds, certainty_preds = [], [], [], []
    
    with torch.no_grad():
      for i in tqdm(test_dataloader):

        type_logit, polarity_logit, tense_logit, certainty_logit = infer_model(i[0].to(device), i[1].to(device))
        
        type_preds += type_logit.argmax(1).detach().cpu().numpy().tolist()
        polarity_preds += polarity_logit.argmax(1).detach().cpu().numpy().tolist()
        tense_preds += tense_logit.argmax(1).detach().cpu().numpy().tolist()
        certainty_preds += certainty_logit.argmax(1).detach().cpu().numpy().tolist()
        
    return type_preds, polarity_preds, tense_preds, certainty_preds

In [None]:
type_preds, polarity_preds, tense_preds, certainty_preds = inference(infer_model, test_dataloader, device)

  0%|          | 0/48 [00:00<?, ?it/s]



In [None]:
label_dict = {"유형":["사실형", "추론형", "대화형", "예측형"], "극성":["긍정", "부정", "미정"], "시제":["과거", "현재", "미래"], "확실성":["확실", "불확실"]}

In [None]:
len(type_preds)

7090

In [None]:
type_preds = [label_dict["유형"][x] for x in type_preds]
polarity_preds = [label_dict["극성"][x] for x in polarity_preds]
tense_preds = [label_dict["시제"][x] for x in tense_preds]
certainty_preds = [label_dict["확실성"][x] for x in certainty_preds]

In [None]:
predictions = []
for type_pred, polarity_pred, tense_pred, certainty_pred in zip(type_preds, polarity_preds, tense_preds, certainty_preds):
    predictions.append(type_pred+'-'+polarity_pred+'-'+tense_pred+'-'+certainty_pred)

In [None]:
# print(len(val["문장"]), len(polarity_preds))
# pd.DataFrame({"sentence":val["문장"], "유형":[label_dict["유형"][x] for x in val["유형"]], "극성":[label_dict["극성"][x] for x in val["극성"]], "시제":[label_dict["시제"][x] for x in val["시제"]], "확실성":[label_dict["확실성"][x] for x in val["확실성"]], "type_preds":type_preds, "polarity_preds":polarity_preds, "tense_preds":tense_preds, "certainty_preds":certainty_preds, "predictions":predictions}).to_csv("./prediction.csv")

In [None]:
len(predictions)

7090

### Submission

In [None]:
submit = pd.read_csv('/content/drive/MyDrive/NLP/sample_submission.csv')
submit['label'] = predictions

In [None]:
submit.head()

Unnamed: 0,ID,label
0,TEST_0000,사실형-긍정-현재-확실
1,TEST_0001,사실형-긍정-현재-확실
2,TEST_0002,사실형-긍정-과거-확실
3,TEST_0003,사실형-긍정-과거-확실
4,TEST_0004,사실형-긍정-과거-확실


In [None]:
submit.to_csv('/content/drive/MyDrive/NLP/submissions/submission.csv', index=False)

### validation to csv

In [None]:
pretrained_dict = torch.load(r'/content/drive/MyDrive/NLP/saved_models/model_state_dict0038.pth', map_location=device)
model.load_state_dict(pretrained_dict)
infer_model = model

In [None]:
data_test = ElectraDataset("/content/drive/MyDrive/NLP/trainset/validation_split_by_polarity.csv", True)
test_dataloader = DataLoader(data_test, batch_size=batch_size)

In [None]:
def inference(infer_model, test_dataloader, device):
    infer_model.to(device)
    infer_model.eval()
    
    type_preds, polarity_preds, tense_preds, certainty_preds = [], [], [], []
    
    with torch.no_grad():
      for i in tqdm(test_dataloader):

        type_logit, polarity_logit, tense_logit, certainty_logit = infer_model(i[0].to(device), i[1].to(device))
        
        type_preds += type_logit.argmax(1).detach().cpu().numpy().tolist()
        polarity_preds += polarity_logit.argmax(1).detach().cpu().numpy().tolist()
        tense_preds += tense_logit.argmax(1).detach().cpu().numpy().tolist()
        certainty_preds += certainty_logit.argmax(1).detach().cpu().numpy().tolist()
        
    return type_preds, polarity_preds, tense_preds, certainty_preds

In [None]:
type_preds, polarity_preds, tense_preds, certainty_preds = inference(infer_model, test_dataloader, device)

  0%|          | 0/23 [00:00<?, ?it/s]

In [None]:
label_dict = {"유형":["사실형", "추론형", "대화형", "예측형"], "극성":["긍정", "부정", "미정"], "시제":["과거", "현재", "미래"], "확실성":["확실", "불확실"]}

In [None]:
len(type_preds)

In [None]:
type_preds = [label_dict["유형"][x] for x in type_preds]
polarity_preds = [label_dict["극성"][x] for x in polarity_preds]
tense_preds = [label_dict["시제"][x] for x in tense_preds]
certainty_preds = [label_dict["확실성"][x] for x in certainty_preds]

In [None]:
predictions = []
for type_pred, polarity_pred, tense_pred, certainty_pred in zip(type_preds, polarity_preds, tense_preds, certainty_preds):
    predictions.append(type_pred+'-'+polarity_pred+'-'+tense_pred+'-'+certainty_pred)

In [None]:
val = pd.read_csv("/content/drive/MyDrive/NLP/trainset/validation_split_by_polarity.csv")
print(len(val["문장"]), len(polarity_preds))
pd.DataFrame({"sentence":val["문장"], "유형":[label_dict["유형"][x] for x in val["유형"]], "극성":[label_dict["극성"][x] for x in val["극성"]], "시제":[label_dict["시제"][x] for x in val["시제"]], "확실성":[label_dict["확실성"][x] for x in val["확실성"]], "type_preds":type_preds, "polarity_preds":polarity_preds, "tense_preds":tense_preds, "certainty_preds":certainty_preds, "predictions":predictions}).to_csv("/content/drive/MyDrive/NLP/submissions/prediction.csv")