In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import gc
gc.collect()

torch.cuda.empty_cache()

In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.25.1-py3-none-any.whl (5.8 MB)
[K     |████████████████████████████████| 5.8 MB 9.1 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 58.7 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 53.3 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.11.1 tokenizers-0.13.2 transformers-4.25.1


In [None]:
import random
import pandas as pd
import numpy as np
import os

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import preprocessing
from sklearn.metrics import f1_score

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from transformers import AutoModel, AutoTokenizer

from tqdm.auto import tqdm

import warnings
warnings.filterwarnings(action='ignore') 

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(f"temp device :  {device}")

temp device :  cpu


In [None]:
CFG = {
    'EPOCHS':18,
    # 'LEARNING_RATE':1e-4,
    'LEARNING_RATE':1e-5,
    'BATCH_SIZE':98,
    'SEED':41
}

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

In [None]:
df = pd.read_csv('/content/drive/MyDrive/GITHUB/Dacon_JminJ_Sentence_Category_Classification/dataset/train.csv')
test = pd.read_csv('/content/drive/MyDrive/GITHUB/Dacon_JminJ_Sentence_Category_Classification/dataset/test.csv')

In [None]:
# 제공된 학습데이터를 학습 / 검증 데이터셋으로 재 분할
# train, val, _, _ = train_test_split(df, df['label'], test_size=0.2, random_state=CFG['SEED'])
# ## split 되어있는 학습/검증 데이터셋 불러오기
# my_train = pd.read_csv("/content/drive/MyDrive/GITHUB/Dacon_JminJ_Sentence_Category_Classification/dataset/preprocessed_dataset/train_valid/train_preprocessed_dataset_13232.csv")
# my_val = pd.read_csv("/content/drive/MyDrive/GITHUB/Dacon_JminJ_Sentence_Category_Classification/dataset/preprocessed_dataset/train_valid/converted_split_validset.csv")

already_split_train = pd.read_csv("/content/drive/MyDrive/GITHUB/Dacon_JminJ_Sentence_Category_Classification/dataset/preprocessed_dataset/base_code_train_valid/base_code_train_split.csv")
already_split_val = pd.read_csv("/content/drive/MyDrive/GITHUB/Dacon_JminJ_Sentence_Category_Classification/dataset/preprocessed_dataset/base_code_train_valid/base_code_val_split.csv")

back_translation_train = pd.read_csv("/content/drive/MyDrive/GITHUB/Dacon_JminJ_Sentence_Category_Classification/dataset/preprocessed_dataset/base_code_train_valid/extracted_backtranslation_only_overlab_from_base_train.csv")

train = pd.concat([already_split_train, back_translation_train], ignore_index=True)
train = train.dropna()
train.index = range(len(train))
val = already_split_val

# train = already_split_train
# val = already_split_val

print(train.index)
print(val.index)

RangeIndex(start=0, stop=15220, step=1)
RangeIndex(start=0, stop=3309, step=1)


In [None]:
print(f"train len : {len(train)}")
print(f"val len : {len(val)}")

train len : 15220
val len : 3309


In [None]:
# 1. 문장(Text) 준비 -> Dict

train_texts = {"sentence" : list(train["문장"])}
val_texts = {"sentence" : list(val["문장"])}
test_texts = {"sentence" : list(test["문장"])}

print(f"train_texts : {len(train_texts['sentence'])}")
print(f"val_texts : {len(val_texts['sentence'])}")
print(f"test_texts : {len(test_texts['sentence'])}")

train_texts : 15220
val_texts : 3309
test_texts : 7090


In [None]:
# 2. Label Encoding (유형, 극성, 시제, 확실성)
type_le = preprocessing.LabelEncoder()
train["유형"] = type_le.fit_transform(train["유형"].values)
val["유형"] = type_le.transform(val["유형"].values)

polarity_le = preprocessing.LabelEncoder()
train["극성"] = polarity_le.fit_transform(train["극성"].values)
val["극성"] = polarity_le.transform(val["극성"].values)

tense_le = preprocessing.LabelEncoder()
train["시제"] = tense_le.fit_transform(train["시제"].values)
val["시제"] = tense_le.transform(val["시제"].values)

certainty_le = preprocessing.LabelEncoder()
train["확실성"] = certainty_le.fit_transform(train["확실성"].values)
val["확실성"] = certainty_le.transform(val["확실성"].values)

In [None]:
print(type(train["유형"][0]))

<class 'numpy.int64'>


In [None]:
import numpy as np

label_unique, label_cnts = np.unique(train["유형"], return_counts=True)
print(label_unique)
print(label_cnts)

[0 1 2 3]
[  670 10855   408  3287]


In [None]:
train_type = train["유형"].values # sentence type
train_polarity = train["극성"].values # sentence polarity
train_tense = train["시제"].values # sentence tense
train_certainty = train["확실성"].values # sentence certainty

train_labels = {
    'type' : train_type,
    'polarity' : train_polarity,
    'tense' : train_tense,
    'certainty' : train_certainty
}

In [None]:
print(train_labels)

{'type': array([1, 1, 1, ..., 0, 0, 3]), 'polarity': array([0, 0, 0, ..., 0, 1, 0]), 'tense': array([0, 0, 2, ..., 2, 2, 2]), 'certainty': array([1, 1, 1, ..., 1, 0, 1])}


In [None]:
val_type = val["유형"].values # sentence type
val_polarity = val["극성"].values # sentence polarity
val_tense = val["시제"].values # sentence tense
val_certainty = val["확실성"].values # sentence certainty

val_labels = {
    'type' : val_type,
    'polarity' : val_polarity,
    'tense' : val_tense,
    'certainty' : val_certainty
}

In [None]:
print(val_labels)

{'type': array([1, 1, 1, ..., 1, 1, 1]), 'polarity': array([0, 0, 0, ..., 0, 0, 0]), 'tense': array([0, 0, 2, ..., 0, 0, 0]), 'certainty': array([1, 1, 1, ..., 1, 1, 1])}


**custom dataset, collate fn**

In [None]:
class CustomDataset(Dataset):
    def __init__(self, st_data:dict, st_labels:dict=None):
        self.st_data = st_data
        self.st_labels = st_labels

    def __getitem__(self, index):
        st_text = self.st_data["sentence"][index]
        if self.st_labels != None:
        
            st_type = self.st_labels['type'][index]
            st_polarity = self.st_labels['polarity'][index]
            st_tense = self.st_labels['tense'][index]
            st_certainty = self.st_labels['certainty'][index]
        
            return {"sentence" : st_text, "type" : st_type, "polarity" : st_polarity, "tense" : st_tense, "certainty" : st_certainty}
        else:
            return {"sentence" : st_text}
            
    def __len__(self):
        return len(self.st_data["sentence"])

In [None]:
from typing import List, Dict

class MyCollateFN:
    def __init__(self, base_ckpt:str, device:str, mode="train"):
        self.tokenizer = AutoTokenizer.from_pretrained(base_ckpt)
        self.device = device
        self.mode = mode

    def __call__(self, batch:List)->Dict:
        if self.mode == "train":
            text_list = []
            type_list = []
            polarity_list = []
            tense_list = []
            certainty_list = []

            for b in batch:
                text_list.append(b["sentence"])
                type_list.append(b["type"])
                polarity_list.append(b["polarity"])
                tense_list.append(b["tense"])
                certainty_list.append(b["certainty"])

            toked_result = self.tokenizer(text_list, return_tensors='pt', padding = 'longest')
            type_batch_tensor = torch.tensor(type_list).long()
            polarity_batch_tensor = torch.tensor(polarity_list).long()
            tense_batch_tensor = torch.tensor(tense_list).long()
            certainty_batch_tensor = torch.tensor(certainty_list).long()

            return_dict = {
                "input_ids" : toked_result["input_ids"].to(self.device),
                "attention_mask" : toked_result["attention_mask"].to(self.device),
                "type" : type_batch_tensor.to(self.device),
                "polarity" : polarity_batch_tensor.to(self.device),
                "tense" : tense_batch_tensor.to(self.device),
                "certainty" : certainty_batch_tensor.to(self.device)
            }

            return return_dict
        else:
            text_list = []
            for b in batch:
                text_list.append(b["sentence"])

            toked_result = self.tokenizer(text_list, return_tensors='pt', padding = 'longest')

            return_dict = {
                "input_ids" : toked_result["input_ids"].to(self.device),
                "attention_mask" : toked_result["attention_mask"].to(self.device)
            }
            return return_dict

In [None]:
BASE_CKPT = "monologg/koelectra-base-v3-discriminator"

custom_collate_fn = MyCollateFN(base_ckpt=BASE_CKPT, device=device, mode="train")

train_dataset = CustomDataset(train_texts, train_labels)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0, collate_fn = custom_collate_fn)

val_dataset = CustomDataset(val_texts, val_labels)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0, collate_fn = custom_collate_fn)

**custom code model**

In [None]:
class BaseModel(nn.Module):
    def __init__(self, base_ckpt:str):
        super(BaseModel, self).__init__()
        self.base_model = AutoModel.from_pretrained(base_ckpt)
        self.gelu = nn.GELU()

        self.type_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=768, out_features=4),
        )
        self.polarity_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=768, out_features=3),
        )
        self.tense_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=768, out_features=3),
        )
        self.certainty_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=768, out_features=2),
        )
            
    def forward(self, input_ids, attention_mask):
        x = self.base_model(input_ids=input_ids, attention_mask=attention_mask)
        x = x["last_hidden_state"][:, 0, :]
        pooler = self.gelu(x)

        # 문장 유형, 극성, 시제, 확실성을 각각 분류
        type_output = self.type_classifier(pooler)
        polarity_output = self.polarity_classifier(pooler)
        tense_output = self.tense_classifier(pooler)
        certainty_output = self.certainty_classifier(pooler)

        return type_output, polarity_output, tense_output, certainty_output

**calculate each label weights**

In [None]:
def _calc_loss_weights(train_labels):
  type_unique, type_cnts = np.unique(train_labels["type"], return_counts=True)
  polarity_unique, polarity_cnts = np.unique(train_labels["polarity"], return_counts=True)
  tense_unique, tense_cnts = np.unique(train_labels["tense"], return_counts=True)
  certainty_unique, certainty_cnts = np.unique(train_labels["certainty"], return_counts=True)

  # type_weight = torch.tensor([1.0 + (np.log(max(type_cnts)/type_cnts[i]))/len(type_cnts) for i in range(len(type_cnts))]).float()
  # polarity_weight = torch.tensor([1.0 + (np.log(max(polarity_cnts)/polarity_cnts[i]))/len(polarity_cnts) for i in range(len(polarity_cnts))]).float()
  # tense_weight = torch.tensor([1.0 + (np.log(max(tense_cnts)/tense_cnts[i]))/len(tense_cnts) for i in range(len(tense_cnts))]).float()
  # certainty_weight = torch.tensor([1.0 + (np.log(max(certainty_cnts)/certainty_cnts[i]))/len(certainty_cnts) for i in range(len(certainty_cnts))]).float()

  type_weight = torch.tensor([1.0 + (np.log(max(type_cnts)/type_cnts[i])) for i in range(len(type_cnts))]).float()
  polarity_weight = torch.tensor([1.0 + (np.log(max(polarity_cnts)/polarity_cnts[i])) for i in range(len(polarity_cnts))]).float()
  tense_weight = torch.tensor([1.0 + (np.log(max(tense_cnts)/tense_cnts[i])) for i in range(len(tense_cnts))]).float()
  certainty_weight = torch.tensor([1.0 + (np.log(max(certainty_cnts)/certainty_cnts[i])) for i in range(len(certainty_cnts))]).float()


  return type_weight, polarity_weight, tense_weight, certainty_weight

_calc_loss_weights(train_labels)

(tensor([3.7851, 1.0000, 4.2811, 2.1947]),
 tensor([1.0000, 5.1844, 4.2573]),
 tensor([1.0000, 2.3231, 1.0159]),
 tensor([3.1556, 1.0000]))

In [None]:
def __calc_label_cnt_weight(target_cnts:List):
  class_weight= 1./target_cnts
  class_weight = class_weight/min(class_weight)

  return class_weight

# def __calc_label_cnt_weight(target_cnts:List):
#   class_weight= [max(target_cnts) / temp_cnt for temp_cnt in target_cnts]

#   return class_weight

def _other_calc_loss_weights(train_labels):
  type_unique, type_cnts = np.unique(train_labels["type"], return_counts=True)
  polarity_unique, polarity_cnts = np.unique(train_labels["polarity"], return_counts=True)
  tense_unique, tense_cnts = np.unique(train_labels["tense"], return_counts=True)
  certainty_unique, certainty_cnts = np.unique(train_labels["certainty"], return_counts=True)

  type_weight = torch.tensor(__calc_label_cnt_weight(type_cnts)).float()
  polarity_weight = torch.tensor(__calc_label_cnt_weight(polarity_cnts)).float()
  tense_weight = torch.tensor(__calc_label_cnt_weight(tense_cnts)).float()
  certainty_weight = torch.tensor(__calc_label_cnt_weight(certainty_cnts)).float()

  return type_weight, polarity_weight, tense_weight, certainty_weight

_other_calc_loss_weights(train_labels)

(tensor([16.2015,  1.0000, 26.6054,  3.3024]),
 tensor([ 1.0000, 65.6545, 25.9784]),
 tensor([1.0000, 3.7551, 1.0161]),
 tensor([8.6329, 1.0000]))

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class FocalLoss(nn.modules.loss._WeightedLoss):
    def __init__(self, weight=None, gamma=2,reduction='mean'):
        super(FocalLoss, self).__init__(weight,reduction=reduction)
        self.gamma = gamma
        self.weight = weight #weight parameter will act as the alpha parameter to balance class weights

    def forward(self, input, target):

        ce_loss = F.cross_entropy(input, target,reduction=self.reduction,weight=self.weight)
        pt = torch.exp(-ce_loss)
        focal_loss = ((1 - pt) ** self.gamma * ce_loss).mean()
        return focal_loss

In [None]:
def train(model, optimizer, train_loader, val_loader, scheduler, device, use_weight_loss = False):
    model.to(device)

    type_weight, polarity_weight, tense_weight, certainty_weight = _calc_loss_weights(train_labels)
    # type_weight, polarity_weight, tense_weight, certainty_weight = _other_calc_loss_weights(train_labels)
    if use_weight_loss:

      criterion = {
          'type' : nn.CrossEntropyLoss(weight=type_weight).to(device),
          'polarity' : nn.CrossEntropyLoss(weight=polarity_weight).to(device),
          'tense' : nn.CrossEntropyLoss(weight=tense_weight).to(device),
          'certainty' : nn.CrossEntropyLoss(weight=certainty_weight).to(device)
      }
    else:
      criterion = {
          'type' : nn.CrossEntropyLoss().to(device),
          'polarity' : nn.CrossEntropyLoss().to(device),
          'tense' : nn.CrossEntropyLoss().to(device),
          'certainty' : nn.CrossEntropyLoss().to(device)
      }
    best_loss = 999999
    best_model = None
    
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for batch in tqdm(iter(train_loader)):
            input_ids = batch["input_ids"]
            attention_mask = batch["attention_mask"]

            type_label = batch["type"]
            polarity_label = batch["polarity"]
            tense_label = batch["tense"]
            certainty_label = batch["certainty"]
            
            optimizer.zero_grad()
            
            type_logit, polarity_logit, tense_logit, certainty_logit = model(input_ids, attention_mask)
            
            loss = 0.25 * criterion['type'](type_logit, type_label) + \
                    0.25 * criterion['polarity'](polarity_logit, polarity_label) + \
                    0.25 * criterion['tense'](tense_logit, tense_label) + \
                    0.25 * criterion['certainty'](certainty_logit, certainty_label)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
        
        val_loss, val_type_f1, val_polarity_f1, val_tense_f1, val_certainty_f1 = validation(model, val_loader, criterion, device)
        print(f'Epoch : [{epoch}] Train Loss : [{np.mean(train_loss):.5f}] Val Loss : [{val_loss:.5f}] 유형 F1 : [{val_type_f1:.5f}] 극성 F1 : [{val_polarity_f1:.5f}] 시제 F1 : [{val_tense_f1:.5f}] 확실성 F1 : [{val_certainty_f1:.5f}]')
        
        if scheduler is not None:
            scheduler.step(val_loss)
            
        if best_loss > val_loss:
            best_loss = val_loss
            best_model = model
            
    return best_model, model

In [None]:
def train_with_focal(model, optimizer, train_loader, val_loader, scheduler, device, use_weight_loss=False):
    model.to(device)

    if use_weight_loss:
        type_weight, polarity_weight, tense_weight, certainty_weight = _calc_loss_weights(train_labels)
        criterion = {
            'type' : FocalLoss(weight=type_weight).to(device),
            'polarity' : FocalLoss(weight=polarity_weight).to(device),
            'tense' : FocalLoss(weight=tense_weight).to(device),
            'certainty' : FocalLoss(weight=certainty_weight).to(device)
        }
    else:
        criterion = {
           'type' : FocalLoss().to(device),
           'polarity' : FocalLoss().to(device),
           'tense' : FocalLoss().to(device),
            'certainty' : FocalLoss().to(device)
        }
    best_loss = 999999
    best_model = None
    
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for batch in tqdm(iter(train_loader)):
            input_ids = batch["input_ids"]
            attention_mask = batch["attention_mask"]

            type_label = batch["type"]
            polarity_label = batch["polarity"]
            tense_label = batch["tense"]
            certainty_label = batch["certainty"]
            
            optimizer.zero_grad()
            
            type_logit, polarity_logit, tense_logit, certainty_logit = model(input_ids, attention_mask)
            
            loss = 0.25 * criterion['type'](type_logit, type_label) + \
                    0.25 * criterion['polarity'](polarity_logit, polarity_label) + \
                    0.25 * criterion['tense'](tense_logit, tense_label) + \
                    0.25 * criterion['certainty'](certainty_logit, certainty_label)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
        
        val_loss, val_type_f1, val_polarity_f1, val_tense_f1, val_certainty_f1 = validation(model, val_loader, criterion, device)
        print(f'Epoch : [{epoch}] Train Loss : [{np.mean(train_loss):.5f}] Val Loss : [{val_loss:.5f}] 유형 F1 : [{val_type_f1:.5f}] 극성 F1 : [{val_polarity_f1:.5f}] 시제 F1 : [{val_tense_f1:.5f}] 확실성 F1 : [{val_certainty_f1:.5f}]')
        
        if scheduler is not None:
            scheduler.step(val_loss)
            
        if best_loss > val_loss:
            best_loss = val_loss
            best_model = model
            
    return best_model, model

In [None]:
def validation(model, val_loader, criterion, device):
    model.eval()
    val_loss = []
    
    type_preds, polarity_preds, tense_preds, certainty_preds = [], [], [], []
    type_labels, polarity_labels, tense_labels, certainty_labels = [], [], [], []
    
    
    with torch.no_grad():
        for batch in tqdm(iter(val_loader)):
            input_ids = batch["input_ids"]
            attention_mask = batch["attention_mask"]

            type_label = batch["type"]
            polarity_label = batch["polarity"]
            tense_label = batch["tense"]
            certainty_label = batch["certainty"]
            
            type_logit, polarity_logit, tense_logit, certainty_logit = model(input_ids, attention_mask)
            
            loss = 0.25 * criterion['type'](type_logit, type_label) + \
                    0.25 * criterion['polarity'](polarity_logit, polarity_label) + \
                    0.25 * criterion['tense'](tense_logit, tense_label) + \
                    0.25 * criterion['certainty'](certainty_logit, certainty_label)
            
            val_loss.append(loss.item())
            
            type_preds += type_logit.argmax(1).detach().cpu().numpy().tolist()
            type_labels += type_label.detach().cpu().numpy().tolist()
            
            polarity_preds += polarity_logit.argmax(1).detach().cpu().numpy().tolist()
            polarity_labels += polarity_label.detach().cpu().numpy().tolist()
            
            tense_preds += tense_logit.argmax(1).detach().cpu().numpy().tolist()
            tense_labels += tense_label.detach().cpu().numpy().tolist()
            
            certainty_preds += certainty_logit.argmax(1).detach().cpu().numpy().tolist()
            certainty_labels += certainty_label.detach().cpu().numpy().tolist()
    
    type_f1 = f1_score(type_labels, type_preds, average='weighted')
    polarity_f1 = f1_score(polarity_labels, polarity_preds, average='weighted')
    tense_f1 = f1_score(tense_labels, tense_preds, average='weighted')
    certainty_f1 = f1_score(certainty_labels, certainty_preds, average='weighted')
    
    return np.mean(val_loss), type_f1, polarity_f1, tense_f1, certainty_f1

### original train code

In [None]:
model = BaseModel(base_ckpt=BASE_CKPT)
model.train()
print(f"learning rate : {CFG['LEARNING_RATE']}")
# optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
optimizer = torch.optim.RAdam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2,threshold_mode='abs',min_lr=1e-8, verbose=True)

Some weights of the model checkpoint at monologg/koelectra-base-v3-discriminator were not used when initializing ElectraModel: ['discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.weight']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


learning rate : 1e-05


In [None]:
# cross entropy loss 사용
infer_best_model, last_model = train(model, optimizer, train_loader, val_loader, scheduler, device, use_weight_loss=True)

In [None]:
# focal loss 사용 
infer_best_model, last_model = train_with_focal(model, optimizer, train_loader, val_loader, scheduler, device, use_weight_loss=True)

  0%|          | 0/156 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

Epoch : [1] Train Loss : [0.41916] Val Loss : [0.31861] 유형 F1 : [0.75428] 극성 F1 : [0.93030] 시제 F1 : [0.61301] 확실성 F1 : [0.87274]


  0%|          | 0/156 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

Epoch : [2] Train Loss : [0.30059] Val Loss : [0.21225] 유형 F1 : [0.77513] 극성 F1 : [0.93030] 시제 F1 : [0.74091] 확실성 F1 : [0.87274]


  0%|          | 0/156 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

Epoch : [3] Train Loss : [0.20428] Val Loss : [0.12033] 유형 F1 : [0.81024] 극성 F1 : [0.93030] 시제 F1 : [0.87497] 확실성 F1 : [0.90948]


  0%|          | 0/156 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

Epoch : [4] Train Loss : [0.12950] Val Loss : [0.07849] 유형 F1 : [0.86110] 극성 F1 : [0.93321] 시제 F1 : [0.88649] 확실성 F1 : [0.91797]


  0%|          | 0/156 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

Epoch : [5] Train Loss : [0.09397] Val Loss : [0.06177] 유형 F1 : [0.84759] 극성 F1 : [0.96416] 시제 F1 : [0.88612] 확실성 F1 : [0.91540]


  0%|          | 0/156 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

Epoch : [6] Train Loss : [0.07364] Val Loss : [0.05192] 유형 F1 : [0.85283] 극성 F1 : [0.96220] 시제 F1 : [0.88556] 확실성 F1 : [0.90126]


  0%|          | 0/156 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

Epoch : [7] Train Loss : [0.06142] Val Loss : [0.04335] 유형 F1 : [0.87087] 극성 F1 : [0.96889] 시제 F1 : [0.89316] 확실성 F1 : [0.91970]


  0%|          | 0/156 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

Epoch : [8] Train Loss : [0.05266] Val Loss : [0.04245] 유형 F1 : [0.84578] 극성 F1 : [0.96880] 시제 F1 : [0.88865] 확실성 F1 : [0.91452]


  0%|          | 0/156 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

Epoch : [9] Train Loss : [0.04510] Val Loss : [0.03730] 유형 F1 : [0.87328] 극성 F1 : [0.97087] 시제 F1 : [0.89615] 확실성 F1 : [0.91920]


  0%|          | 0/156 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

Epoch : [10] Train Loss : [0.03969] Val Loss : [0.03436] 유형 F1 : [0.87962] 극성 F1 : [0.97109] 시제 F1 : [0.89795] 확실성 F1 : [0.92809]


  0%|          | 0/156 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

Epoch : [11] Train Loss : [0.03390] Val Loss : [0.03323] 유형 F1 : [0.87426] 극성 F1 : [0.97319] 시제 F1 : [0.89746] 확실성 F1 : [0.92347]


  0%|          | 0/156 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

Epoch : [12] Train Loss : [0.03018] Val Loss : [0.03227] 유형 F1 : [0.88411] 극성 F1 : [0.97366] 시제 F1 : [0.89354] 확실성 F1 : [0.91849]


  0%|          | 0/156 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

Epoch : [13] Train Loss : [0.02613] Val Loss : [0.03091] 유형 F1 : [0.87536] 극성 F1 : [0.97418] 시제 F1 : [0.90091] 확실성 F1 : [0.92179]


  0%|          | 0/156 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

Epoch : [14] Train Loss : [0.02321] Val Loss : [0.02999] 유형 F1 : [0.88353] 극성 F1 : [0.97176] 시제 F1 : [0.89940] 확실성 F1 : [0.92567]


  0%|          | 0/156 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

Epoch : [15] Train Loss : [0.01987] Val Loss : [0.02941] 유형 F1 : [0.88022] 극성 F1 : [0.97325] 시제 F1 : [0.89718] 확실성 F1 : [0.92644]


  0%|          | 0/156 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

Epoch : [16] Train Loss : [0.01731] Val Loss : [0.02877] 유형 F1 : [0.88800] 극성 F1 : [0.97351] 시제 F1 : [0.89922] 확실성 F1 : [0.92430]


  0%|          | 0/156 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

Epoch : [17] Train Loss : [0.01541] Val Loss : [0.02908] 유형 F1 : [0.88839] 극성 F1 : [0.97438] 시제 F1 : [0.90139] 확실성 F1 : [0.92695]


  0%|          | 0/156 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

Epoch : [18] Train Loss : [0.01365] Val Loss : [0.02866] 유형 F1 : [0.89285] 극성 F1 : [0.97348] 시제 F1 : [0.90114] 확실성 F1 : [0.92604]


**save model**

In [None]:
save_ckpt_path = "/content/drive/MyDrive/GITHUB/Dacon_JminJ_Sentence_Category_Classification/model_save_dir/KoElectraBase/new_back_translation_best_model_1e-05_FocalLoss_use_loss_weight_ver2_18ep_98bat.pt"
torch.save({
    "model_state_dict" : infer_best_model.state_dict(),
    "optimizer_state_dict" : optimizer.state_dict()
}, save_ckpt_path)

In [None]:
save_ckpt_path = "/content/drive/MyDrive/GITHUB/Dacon_JminJ_Sentence_Category_Classification/model_save_dir/KoElectraBase/new_back_translation_last_model_1e-05_FocalLoss_not_use_loss_weight_18ep_98bat.pt"
torch.save({
    "model_state_dict" : last_model.state_dict(),
    "optimizer_state_dict" : optimizer.state_dict()
}, save_ckpt_path)

### Inference

In [None]:
test_dataset = CustomDataset(test_texts, None)
test_collate_fn = MyCollateFN(base_ckpt=BASE_CKPT, device=device, mode="infer")
test_loader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0, collate_fn = test_collate_fn)

In [None]:
print(next(iter(test_loader)))

{'input_ids': tensor([[    2,  3263,  4683,  ...,     0,     0,     0],
        [    2,  9566,    59,  ...,     0,     0,     0],
        [    2,  6346,    21,  ...,     0,     0,     0],
        ...,
        [    2,  6359,  4073,  ...,     0,     0,     0],
        [    2,  6389,  3240,  ...,     0,     0,     0],
        [    2, 14984,  4172,  ...,     0,     0,     0]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]], device='cuda:0')}


In [None]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    
    type_preds, polarity_preds, tense_preds, certainty_preds = [], [], [], []
    
    with torch.no_grad():
        for batch in tqdm(test_loader):
            input_ids = batch["input_ids"]
            attention_mask = batch["attention_mask"]
            
            type_logit, polarity_logit, tense_logit, certainty_logit = model(input_ids, attention_mask)
            
            type_preds += type_logit.argmax(1).detach().cpu().numpy().tolist()
            polarity_preds += polarity_logit.argmax(1).detach().cpu().numpy().tolist()
            tense_preds += tense_logit.argmax(1).detach().cpu().numpy().tolist()
            certainty_preds += certainty_logit.argmax(1).detach().cpu().numpy().tolist()
            
    return type_preds, polarity_preds, tense_preds, certainty_preds

In [None]:
type_preds, polarity_preds, tense_preds, certainty_preds = inference(infer_best_model, test_loader, device)

  0%|          | 0/73 [00:00<?, ?it/s]

In [None]:
type_preds = type_le.inverse_transform(type_preds)
polarity_preds = polarity_le.inverse_transform(polarity_preds)
tense_preds = tense_le.inverse_transform(tense_preds)
certainty_preds = certainty_le.inverse_transform(certainty_preds)

In [None]:
predictions = []
for type_pred, polarity_pred, tense_pred, certainty_pred in zip(type_preds, polarity_preds, tense_preds, certainty_preds):
    predictions.append(type_pred+'-'+polarity_pred+'-'+tense_pred+'-'+certainty_pred)

In [None]:
print(predictions)

['사실형-긍정-현재-확실', '사실형-긍정-현재-확실', '사실형-긍정-과거-확실', '사실형-부정-과거-확실', '사실형-긍정-과거-확실', '사실형-긍정-미래-불확실', '사실형-긍정-현재-확실', '사실형-긍정-과거-확실', '사실형-긍정-현재-확실', '사실형-긍정-과거-확실', '사실형-긍정-과거-확실', '사실형-부정-현재-확실', '추론형-긍정-과거-확실', '사실형-긍정-과거-확실', '사실형-긍정-현재-확실', '사실형-긍정-과거-확실', '사실형-긍정-현재-확실', '사실형-긍정-현재-확실', '추론형-긍정-과거-확실', '사실형-긍정-현재-확실', '사실형-긍정-현재-확실', '사실형-긍정-과거-확실', '대화형-부정-현재-확실', '사실형-긍정-과거-확실', '사실형-긍정-과거-확실', '추론형-긍정-현재-확실', '사실형-긍정-과거-확실', '추론형-긍정-현재-불확실', '사실형-긍정-미래-확실', '사실형-긍정-과거-확실', '사실형-긍정-과거-확실', '사실형-긍정-과거-불확실', '사실형-긍정-현재-확실', '사실형-긍정-과거-확실', '대화형-긍정-과거-확실', '추론형-긍정-과거-확실', '사실형-긍정-과거-확실', '사실형-긍정-과거-확실', '사실형-긍정-과거-확실', '사실형-긍정-미래-확실', '사실형-긍정-과거-확실', '사실형-부정-현재-확실', '추론형-긍정-미래-확실', '추론형-긍정-미래-불확실', '사실형-긍정-현재-확실', '사실형-긍정-과거-확실', '사실형-긍정-과거-확실', '추론형-긍정-과거-확실', '사실형-긍정-과거-확실', '사실형-긍정-과거-확실', '사실형-긍정-과거-확실', '사실형-긍정-과거-확실', '사실형-긍정-현재-확실', '사실형-긍정-과거-확실', '사실형-긍정-과거-확실', '사실형-긍정-과거-확실', '추론형-긍정-현재-불확실', '추론형-긍정-현재-확실', '추론형-긍정-현재-확실', '사실형-긍정-현재-확실', '사실형-긍정-과거-확실', '사실형-긍정-과거-확실', '사

In [None]:
sample_submission = pd.read_csv("/content/drive/MyDrive/GITHUB/Dacon_JminJ_Sentence_Category_Classification/dataset/sample_submission_original.csv")
sample_submission["label"] = predictions

sample_submission.to_csv("/content/drive/MyDrive/GITHUB/Dacon_JminJ_Sentence_Category_Classification/dataset/sample_submission_KoElectraBase_1e-05_FocalLoss_use_weight_loss_ver2_98bat_18ep.csv", index = False)