In [1]:
import os
import math
import random
import pandas as pd
import regex as re
import numpy as np
from typing import Optional, Sequence

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score, f1_score

from tqdm import tqdm
import torch
from torch import nn
from torch import Tensor
from torch.nn import functional as F
from torch.optim.lr_scheduler import _LRScheduler
from torch.utils.data import DataLoader, Dataset, random_split
from transformers import Trainer, TrainingArguments, AutoModelForSequenceClassification, AutoTokenizer, EarlyStoppingCallback, AutoModel, AutoConfig

import gc
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [2]:
train = pd.read_csv('train.csv').drop(['ID'], axis=1)
test = pd.read_csv('test.csv')

In [3]:
train

Unnamed: 0,문장,유형,극성,시제,확실성,label
0,0.75%포인트 금리 인상은 1994년 이후 28년 만에 처음이다.,사실형,긍정,현재,확실,사실형-긍정-현재-확실
1,이어 ＂앞으로 전문가들과 함께 4주 단위로 상황을 재평가할 예정＂이라며 ＂그 이전이...,사실형,긍정,과거,확실,사실형-긍정-과거-확실
2,정부가 고유가 대응을 위해 7월부터 연말까지 유류세 인하 폭을 30%에서 37%까지...,사실형,긍정,미래,확실,사실형-긍정-미래-확실
3,"서울시는 올해 3월 즉시 견인 유예시간 60분을 제공하겠다고 밝혔지만, 하루 만에 ...",사실형,긍정,과거,확실,사실형-긍정-과거-확실
4,익사한 자는 사다리에 태워 거꾸로 놓고 소금으로 코를 막아 가득 채운다.,사실형,긍정,현재,확실,사실형-긍정-현재-확실
...,...,...,...,...,...,...
16536,"＇신동덤＇은 ＇신비한 동물사전＇과 ＇해리 포터＇ 시리즈를 잇는 마법 어드벤처물로, ...",사실형,긍정,과거,확실,사실형-긍정-과거-확실
16537,"수족냉증은 어릴 때부터 심했으며 관절은 어디 한 곳이 아니고 목, 어깨, 팔꿈치, ...",사실형,긍정,과거,확실,사실형-긍정-과거-확실
16538,김금희 소설가는 ＂계약서 조정이 그리 어려운가 작가를 격려한다면서 그런 문구 하나 ...,사실형,긍정,과거,확실,사실형-긍정-과거-확실
16539,1만명이 넘는 방문자수를 기록한 이번 전시회는 총 77개 작품을 넥슨 사옥을 그대로...,사실형,긍정,과거,불확실,사실형-긍정-과거-불확실


In [4]:
train.유형.unique(), train.극성.unique(), train.시제.unique(), train.확실성.unique()

(array(['사실형', '추론형', '예측형', '대화형'], dtype=object),
 array(['긍정', '부정', '미정'], dtype=object),
 array(['현재', '과거', '미래'], dtype=object),
 array(['확실', '불확실'], dtype=object))

In [5]:
train['문장'] = train['문장'].apply(lambda x: re.sub("[^ A-Za-z0-9가-힣]", "", x))
train['문장'] = train['문장'].apply(lambda x: re.sub("[ +]", " ", x))

test['문장'] = test['문장'].apply(lambda x: re.sub("[^ A-Za-z0-9가-힣]", "", x))
test['문장'] = test['문장'].apply(lambda x: re.sub("[ +]", " ", x))

In [6]:
train['문장']

0                       075포인트 금리 인상은 1994년 이후 28년 만에 처음이다
1        이어 앞으로 전문가들과 함께 4주 단위로 상황을 재평가할 예정이라며 그 이전이라도 ...
2        정부가 고유가 대응을 위해 7월부터 연말까지 유류세 인하 폭을 30에서 37까지 확대한다
3        서울시는 올해 3월 즉시 견인 유예시간 60분을 제공하겠다고 밝혔지만 하루 만에 차...
4                  익사한 자는 사다리에 태워 거꾸로 놓고 소금으로 코를 막아 가득 채운다
                               ...                        
16536    신동덤은 신비한 동물사전과 해리 포터 시리즈를 잇는 마법 어드벤처물로 전편에 이어 ...
16537    수족냉증은 어릴 때부터 심했으며 관절은 어디 한 곳이 아니고 목 어깨 팔꿈치 등 허...
16538    김금희 소설가는 계약서 조정이 그리 어려운가 작가를 격려한다면서 그런 문구 하나 고...
16539    1만명이 넘는 방문자수를 기록한 이번 전시회는 총 77개 작품을 넥슨 사옥을 그대로...
16540                                           목민심서의 내용이다
Name: 문장, Length: 16541, dtype: object

# Text Aug

In [7]:
# train, X_val, _, _ = train_test_split(train, train.label, test_size=0.1, random_state=42)

In [8]:
# https://github.com/catSirup/KorEDA/blob/master/eda.py
def swap_word(new_words):
    random_idx_1 = random.randint(0, len(new_words)-1)
    random_idx_2 = random_idx_1
    counter = 0

    while random_idx_2 == random_idx_1:
        random_idx_2 = random.randint(0, len(new_words)-1)
        counter += 1
        if counter > 3:
            return new_words

    new_words[random_idx_1], new_words[random_idx_2] = new_words[random_idx_2], new_words[random_idx_1]
    return new_words

def random_swap(words, n):
    new_words = words.copy()
    for _ in range(n):
        new_words = swap_word(new_words)
    return new_words

def text_aug(sentence, alpha_rs = 0.1, num_aug=3):
    words = sentence.split(' ')
    words = [word for word in words if word != ""]
    num_words = len(words)

    augmented_sentences = []
    num_new_per_technique = num_aug

    n_rs = max(1, int(alpha_rs*num_words))

    for _ in range(num_new_per_technique):
        a_words = random_swap(words, n_rs)
        augmented_sentences.append(" ".join(a_words))

    augmented_sentences = [sentence for sentence in augmented_sentences]
    random.shuffle(augmented_sentences)

    if num_aug >= 1:
        augmented_sentences = augmented_sentences[:num_aug]
    else:
        keep_prob = num_aug / len(augmented_sentences)
        augmented_sentences = [s for s in augmented_sentences if random.uniform(0, 1) < keep_prob]
    return augmented_sentences

aug = train['문장'].apply(lambda x: text_aug(x))

In [9]:
tmp1 = train.copy()
tmp1['문장'] = list(map(lambda x: x[0], aug))

tmp2 = train.copy()
tmp2['문장'] = list(map(lambda x: x[1], aug))

tmp3 = train.copy()
tmp3['문장'] = list(map(lambda x: x[2], aug))

In [10]:
train = pd.concat([train,tmp1,tmp2,tmp3]).drop_duplicates(keep='first').sample(frac=1).reset_index(drop=True)
train

Unnamed: 0,문장,유형,극성,시제,확실성,label
0,이 교수가 한국푸드테크협의회 창립을 위해 있는 동분서주하고 것도 바로 이 때문이다,추론형,긍정,현재,확실,추론형-긍정-현재-확실
1,사고 충격으로 차량 파편이 차로도 차로로도 튀면서 서울 방향 일부 맞은편 통행이 일...,사실형,긍정,과거,확실,사실형-긍정-과거-확실
2,기술을 텍스트 마이닝과 머신러닝과 같은 SAP는 활용해 모든 의사결정 프로세스에서 ...,사실형,긍정,현재,확실,사실형-긍정-현재-확실
3,상속 재산 나니 이처럼 강제로 배정하는 유류분을 정해놓고 일부를 또 다른 문제가 생겼다,사실형,긍정,과거,확실,사실형-긍정-과거-확실
4,코로나19 이후 은행 디지털 전환이 가속화하며 금융업의 지점 수도 빠르게 줄고 있다,사실형,긍정,현재,확실,사실형-긍정-현재-확실
...,...,...,...,...,...,...
64677,코로나19 등으로 인해 미국 경제가 나빠지면 예상입니다 금리를 내릴 거라는 연준이,대화형,긍정,미래,불확실,대화형-긍정-미래-불확실
64678,지난 블록딜 20192020년에도 쇼크가 있었다,사실형,긍정,과거,확실,사실형-긍정-과거-확실
64679,손석희 앵커가 진행했고 이창현 국민대 교수와 정준희 한양대 겸임교수도 토론자로 참여했다,사실형,긍정,과거,확실,사실형-긍정-과거-확실
64680,에릭 쉬어마이어 미래 창업자 겸 대표는 개막식에서 갈라버스는 갈라게임즈의 커뮤니티에...,사실형,긍정,과거,확실,사실형-긍정-과거-확실


In [11]:
train['문장'].str.len().max(), test['문장'].str.len().max()

(496, 378)

# Dataset

In [12]:
device = torch.device("cuda")
model_path = "monologg/kobigbird-bert-base"
tokenizer = AutoTokenizer.from_pretrained(model_path)
length = train['문장'].str.len().max()

In [13]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels=None):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        if self.labels:
            st_type = self.labels['type'][idx]
            st_polarity = self.labels['polarity'][idx]
            st_tense = self.labels['tense'][idx]
            st_certainty = self.labels['certainty'][idx]
            item["labels"] = torch.tensor(st_type), torch.tensor(st_polarity), torch.tensor(st_tense), torch.tensor(st_certainty)
        return item

    def __len__(self):
        return len(self.encodings["input_ids"])

# HuggingFace Phase

## config

In [14]:
config=AutoConfig.from_pretrained(model_path)
config._name_or_path = 'kr.kim'
print(config.num_hidden_layers)
# config.num_hidden_layers = 17
config

12


BigBirdConfig {
  "_name_or_path": "kr.kim",
  "architectures": [
    "BigBirdForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "attention_type": "block_sparse",
  "block_size": 64,
  "bos_token_id": 5,
  "classifier_dropout": null,
  "eos_token_id": 6,
  "gradient_checkpointing": false,
  "hidden_act": "gelu_new",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 4096,
  "model_type": "big_bird",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "num_random_blocks": 3,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "rescale_embeddings": false,
  "sep_token_id": 3,
  "tokenizer_class": "BertTokenizer",
  "torch_dtype": "float32",
  "transformers_version": "4.25.1",
  "type_vocab_size": 2,
  "use_bias": true,
  "use_cache": true,
  "vocab_size": 32500
}

## custom model

In [15]:
class CustomModel(nn.Module):
    def __init__(self):
        super(CustomModel, self).__init__()
        if model_path == 'monologg/kobigbird-bert-base':
            config.attention_type = "original_full"
        self.base_model = AutoModel.from_pretrained(model_path, config=config)
        self.out = self.base_model.encoder.layer[-1].output.dense.out_features
        # self.linear = nn.Linear(768, 768//2)

        self.type_classifier = nn.Sequential(
            nn.Dropout(p=0.2),
            nn.Linear(in_features=self.out, out_features=4),
        )
        self.polarity_classifier = nn.Sequential(
            nn.Dropout(p=0.2),
            nn.Linear(in_features=self.out, out_features=3),
        )
        self.tense_classifier = nn.Sequential(
            nn.Dropout(p=0.2),
            nn.Linear(in_features=self.out, out_features=3),
        )
        self.certainty_classifier = nn.Sequential(
            nn.Dropout(p=0.2),
            nn.Linear(in_features=self.out, out_features=2),
        )
        
    def forward(self, input_ids, attention_mask, labels=None, token_type_ids=None):
        x = self.base_model(input_ids=input_ids, attention_mask=attention_mask)[0]
        # x = self.linear(x)
        # 문장 유형, 극성, 시제, 확실성을 각각 분류
        type_output = self.type_classifier(x[:,0,:].view(-1,self.out))
        polarity_output = self.polarity_classifier(x[:,0,:].view(-1,self.out))
        tense_output = self.tense_classifier(x[:,0,:].view(-1,self.out))
        certainty_output = self.certainty_classifier(x[:,0,:].view(-1,self.out))
        return type_output, polarity_output, tense_output, certainty_output

## arg

In [16]:
# Trainer arguments
lr = 1e-4
stop = 3
epoch = 1000
batch = 16
seed = 42

## loss

In [17]:
class FocalLoss(nn.Module):
    """ Focal Loss, as described in https://arxiv.org/abs/1708.02002.
    It is essentially an enhancement to cross entropy loss and is
    useful for classification tasks when there is a large class imbalance.
    x is expected to contain raw, unnormalized scores for each class.
    y is expected to contain class labels.
    Shape:
        - x: (batch_size, C) or (batch_size, C, d1, d2, ..., dK), K > 0.
        - y: (batch_size,) or (batch_size, d1, d2, ..., dK), K > 0.
    """

    def __init__(self,
                 alpha: Optional[Tensor] = None,
                 gamma: float = 0.,
                 reduction: str = 'mean',
                 ignore_index: int = -100):
        """Constructor.
        Args:
            alpha (Tensor, optional): Weights for each class. Defaults to None.
            gamma (float, optional): A constant, as described in the paper.
                Defaults to 0.
            reduction (str, optional): 'mean', 'sum' or 'none'.
                Defaults to 'mean'.
            ignore_index (int, optional): class label to ignore.
                Defaults to -100.
        """
        if reduction not in ('mean', 'sum', 'none'):
            raise ValueError(
                'Reduction must be one of: "mean", "sum", "none".')

        super().__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.ignore_index = ignore_index
        self.reduction = reduction

        self.nll_loss = nn.NLLLoss(
            weight=alpha, reduction='none', ignore_index=ignore_index)

    def __repr__(self):
        arg_keys = ['alpha', 'gamma', 'ignore_index', 'reduction']
        arg_vals = [self.__dict__[k] for k in arg_keys]
        arg_strs = [f'{k}={v!r}' for k, v in zip(arg_keys, arg_vals)]
        arg_str = ', '.join(arg_strs)
        return f'{type(self).__name__}({arg_str})'

    def forward(self, x: Tensor, y: Tensor) -> Tensor:
        if x.ndim > 2:
            # (N, C, d1, d2, ..., dK) --> (N * d1 * ... * dK, C)
            c = x.shape[1]
            x = x.permute(0, *range(2, x.ndim), 1).reshape(-1, c)
            # (N, d1, d2, ..., dK) --> (N * d1 * ... * dK,)
            y = y.view(-1)

        unignored_mask = y != self.ignore_index
        y = y[unignored_mask]
        if len(y) == 0:
            return torch.tensor(0.)
        x = x[unignored_mask]

        # compute weighted cross entropy term: -alpha * log(pt)
        # (alpha is already part of self.nll_loss)
        log_p = F.log_softmax(x, dim=-1)
        ce = self.nll_loss(log_p, y)

        # get true class column from each row
        all_rows = torch.arange(len(x))
        log_pt = log_p[all_rows, y]

        # compute focal term: (1 - pt)^gamma
        pt = log_pt.exp()
        focal_term = (1 - pt)**self.gamma

        # the full loss: -alpha * ((1 - pt)^gamma) * log(pt)
        loss = focal_term * ce

        if self.reduction == 'mean':
            loss = loss.mean()
        elif self.reduction == 'sum':
            loss = loss.sum()

        return loss


def focal_loss(alpha: Optional[Sequence] = None,
               gamma: float = 0.,
               reduction: str = 'mean',
               ignore_index: int = -100,
               device='cpu',
               dtype=torch.float32) -> FocalLoss:
    """Factory function for FocalLoss.
    Args:
        alpha (Sequence, optional): Weights for each class. Will be converted
            to a Tensor if not None. Defaults to None.
        gamma (float, optional): A constant, as described in the paper.
            Defaults to 0.
        reduction (str, optional): 'mean', 'sum' or 'none'.
            Defaults to 'mean'.
        ignore_index (int, optional): class label to ignore.
            Defaults to -100.
        device (str, optional): Device to move alpha to. Defaults to 'cpu'.
        dtype (torch.dtype, optional): dtype to cast alpha to.
            Defaults to torch.float32.
    Returns:
        A FocalLoss object
    """
    if alpha is not None:
        if not isinstance(alpha, Tensor):
            alpha = torch.tensor(alpha)
        alpha = alpha.to(device=device, dtype=dtype)

    fl = FocalLoss(
        alpha=alpha,
        gamma=gamma,
        reduction=reduction,
        ignore_index=ignore_index)
    return fl

class ASLSingleLabel(nn.Module):
    '''
    This loss is intended for single-label classification problems
    '''
    def __init__(self, gamma_pos=0, gamma_neg=4, eps: float = 0.1, reduction='mean'):
        super(ASLSingleLabel, self).__init__()

        self.eps = eps
        self.logsoftmax = nn.LogSoftmax(dim=-1)
        self.targets_classes = []
        self.gamma_pos = gamma_pos
        self.gamma_neg = gamma_neg
        self.reduction = reduction

    def forward(self, inputs, target):
        '''
        "input" dimensions: - (batch_size,number_classes)
        "target" dimensions: - (batch_size)
        '''
        num_classes = inputs.size()[-1]
        log_preds = self.logsoftmax(inputs)
        self.targets_classes = torch.zeros_like(inputs).scatter_(1, target.long().unsqueeze(1), 1)

        # ASL weights
        targets = self.targets_classes
        anti_targets = 1 - targets
        xs_pos = torch.exp(log_preds)
        xs_neg = 1 - xs_pos
        xs_pos = xs_pos * targets
        xs_neg = xs_neg * anti_targets
        asymmetric_w = torch.pow(1 - xs_pos - xs_neg,
                                 self.gamma_pos * targets + self.gamma_neg * anti_targets)
        log_preds = log_preds * asymmetric_w

        if self.eps > 0:  # label smoothing
            self.targets_classes = self.targets_classes.mul(1 - self.eps).add(self.eps / num_classes)

        # loss calculation
        loss = - self.targets_classes.mul(log_preds)

        loss = loss.sum(dim=-1)
        if self.reduction == 'mean':
            loss = loss.mean()

        return loss
    
def compute_metrics(pred):
    # label = [[cls1,cls2,...],]
    # preds = n list
    focal_loss = FocalLoss()
    labels = pred.label_ids
    preds = pred.predictions
    f1 = []
    focal = []
    for i in range(4):
        # focal.append(focal_loss(torch.tensor(preds[i], dtype=torch.float), torch.tensor(labels[::, i],dtype=torch.float)))
        f1.append(f1_score(y_true = labels[::, i], y_pred = preds[i], average='weighted'))
    return {
        #'focal': sum(focal),
        'f1-sum': sum(f1)/4
    }

## scheduler

In [18]:
class CosineAnnealingWarmUpRestarts(_LRScheduler):
    def __init__(self, optimizer, T_0, T_mult=1, eta_max=0.1, T_up=0, gamma=1., last_epoch=-1):
        if T_0 <= 0 or not isinstance(T_0, int):
            raise ValueError("Expected positive integer T_0, but got {}".format(T_0))
        if T_mult < 1 or not isinstance(T_mult, int):
            raise ValueError("Expected integer T_mult >= 1, but got {}".format(T_mult))
        if T_up < 0 or not isinstance(T_up, int):
            raise ValueError("Expected positive integer T_up, but got {}".format(T_up))
        self.T_0 = T_0
        self.T_mult = T_mult
        self.base_eta_max = eta_max
        self.eta_max = eta_max
        self.T_up = T_up
        self.T_i = T_0
        self.gamma = gamma
        self.cycle = 0
        self.T_cur = last_epoch
        super(CosineAnnealingWarmUpRestarts, self).__init__(optimizer, last_epoch)
    
    def get_lr(self):
        if self.T_cur == -1:
            return self.base_lrs
        elif self.T_cur < self.T_up:
            return [(self.eta_max - base_lr)*self.T_cur / self.T_up + base_lr for base_lr in self.base_lrs]
        else:
            return [base_lr + (self.eta_max - base_lr) * (1 + math.cos(math.pi * (self.T_cur-self.T_up) / (self.T_i - self.T_up))) / 2
                    for base_lr in self.base_lrs]

    def step(self, epoch=None):
        if epoch is None:
            epoch = self.last_epoch + 1
            self.T_cur = self.T_cur + 1
            if self.T_cur >= self.T_i:
                self.cycle += 1
                self.T_cur = self.T_cur - self.T_i
                self.T_i = (self.T_i - self.T_up) * self.T_mult + self.T_up
        else:
            if epoch >= self.T_0:
                if self.T_mult == 1:
                    self.T_cur = epoch % self.T_0
                    self.cycle = epoch // self.T_0
                else:
                    n = int(math.log((epoch / self.T_0 * (self.T_mult - 1) + 1), self.T_mult))
                    self.cycle = n
                    self.T_cur = epoch - self.T_0 * (self.T_mult ** n - 1) / (self.T_mult - 1)
                    self.T_i = self.T_0 * self.T_mult ** (n)
            else:
                self.T_i = self.T_0
                self.T_cur = epoch
                
        self.eta_max = self.base_eta_max * (self.gamma**self.cycle)
        self.last_epoch = math.floor(epoch)
        for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
            param_group['lr'] = lr

# optimizer = torch.optim.Adam(model.parameters(), lr = 0)
# scheduler = CosineAnnealingWarmUpRestarts(optimizer, T_0=150, T_mult=1, eta_max=0.1,  T_up=10, gamma=0.5)
# optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)
# scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=1, 
#                                                                  T_mult=2, eta_min=1e-6)

## trainer

In [19]:
# Define trainer
class CustomTrainer(Trainer):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def compute_loss(self, model, inputs, return_outputs=False):
        # forward pass
        labels = inputs.pop("labels").to(torch.int64)
        
        type_logit, polarity_logit, tense_logit, certainty_logit = model(**inputs)
        
        # # simple loss
        # criterion = {
        #     'type' : nn.CrossEntropyLoss().to(device),
        #     'polarity' : nn.CrossEntropyLoss().to(device),
        #     'tense' : nn.CrossEntropyLoss().to(device),
        #     'certainty' : nn.CrossEntropyLoss().to(device)
        # }
        # loss = criterion['type'](type_logit, labels[::, 0]) + \
        #             criterion['polarity'](polarity_logit, labels[::, 1]) + \
        #             criterion['tense'](tense_logit,labels[::, 2]) + \
        #             criterion['certainty'](certainty_logit, labels[::, 3])
        
        # # focal loss
        # criterion = {
        #     'type' : FocalLoss().to(device),
        #     'polarity' : FocalLoss().to(device),
        #     'tense' : FocalLoss().to(device),
        #     'certainty' : FocalLoss().to(device)
        # }
        
        # ASLoss
        criterion = {
            'type' : ASLSingleLabel().to(device),
            'polarity' : ASLSingleLabel().to(device),
            'tense' : ASLSingleLabel().to(device),
            'certainty' : ASLSingleLabel().to(device)
        }
        
        # labels = labels.type(torch.float).clone().detach()
        loss = criterion['type'](type_logit, labels[::, 0]) + \
                    criterion['polarity'](polarity_logit, labels[::, 1]) + \
                    criterion['tense'](tense_logit, labels[::, 2]) + \
                    criterion['certainty'](certainty_logit, labels[::, 3])

        outputs = None, \
                    torch.argmax(type_logit, dim = 1), \
                    torch.argmax(polarity_logit, dim = 1),\
                    torch.argmax(tense_logit, dim = 1),\
                    torch.argmax(certainty_logit, dim = 1)
        return (loss, outputs) if return_outputs else loss

# Fold

In [20]:
유형 = LabelEncoder()
유형.fit(train['유형'])

극성 = LabelEncoder()
극성.fit(train['극성'])

시제 = LabelEncoder()
시제.fit(train['시제'])

확실성 = LabelEncoder()
확실성.fit(train['확실성'])

def encoding(X_train, X_val):
    X_train['유형'] = 유형.transform(X_train['유형'])
    X_val['유형'] = 유형.transform(X_val['유형'])

    X_train['극성'] = 극성.transform(X_train['극성'])
    X_val['극성'] = 극성.transform(X_val['극성'])

    X_train['시제'] = 시제.transform(X_train['시제'])
    X_val['시제'] = 시제.transform(X_val['시제'])

    X_train['확실성'] = 확실성.transform(X_train['확실성'])
    X_val['확실성'] = 확실성.transform(X_val['확실성'])

    train_labels = {
        'type' : X_train['유형'].values,
        'polarity' : X_train['극성'].values,
        'tense' : X_train['시제'].values,
        'certainty' : X_train['확실성'].values
    }

    val_labels = {
        'type' : X_val['유형'].values,
        'polarity' : X_val['극성'].values,
        'tense' : X_val['시제'].values,
        'certainty' : X_val['확실성'].values
    }
    return train_labels, val_labels

In [21]:
config=AutoConfig.from_pretrained(model_path)
config._name_or_path = 'kr.kim'
print(f'hidden_layers : {config.num_hidden_layers}')
config.num_hidden_layers = 12
print(f'now_hidden_layers : {config.num_hidden_layers}')

kf = KFold(n_splits=5, random_state=seed, shuffle=True)
for i, (train_index, test_index) in enumerate(kf.split(train)):
    print(f'Round {i}')
    X_train, X_val = train.loc[train_index, :], train.loc[test_index, :]
    train_labels, val_labels = encoding(X_train, X_val)
    token_train, token_val = tokenizer(X_train.문장.tolist(), padding=True, truncation=True, max_length=length), tokenizer(X_val.문장.tolist(), padding=True, truncation=True, max_length=length)
    train_dataset, val_dataset = CustomDataset(token_train, train_labels), CustomDataset(token_val, val_labels)
    model = CustomModel()
    model.to(device)
    args = TrainingArguments(run_name = f'fold_{i}',                                # 모델이름
                             output_dir= f"fold_{i}",                               # 모델저장경로
                             evaluation_strategy="steps",                           # 모델의 평가를 언제 진행할지
                             eval_steps=100,                                        # 500 스텝 마다 모델 평가
                             save_steps=100,                                        # 500 스텝 마다 모델 저장
                             save_total_limit = 2,                                  # 저장할 모델의 갯수
                             logging_steps=100,                                     # 학습로스 로깅
                             per_device_train_batch_size=batch,                     # GPU에 학습데이터를 몇개씩 올려서 학습할지
                             per_device_eval_batch_size=batch,                      # GPU에 학습데이터를 몇개씩 올려서 평가할지
                             gradient_accumulation_steps=16,                        # 가상배치
                             num_train_epochs=epoch,                                # 전체 학습 진행 횟수
                             learning_rate=lr,                                      # 학습률 정의 
                             seed=seed,                                             # seed
                             load_best_model_at_end=True,                           # 평가기준 스코어가 좋은 모델만 저장할지 여부
                             fp16=True,
                             do_train=True,
                             do_eval=True,
                             # metric_for_best_model
                             # greater_is_better = True,
    )
    trainer = CustomTrainer(model=model,
                            args=args,                                                        # args
                            train_dataset=train_dataset,                                      # 학습데이터
                            eval_dataset=val_dataset,                                         # validation 데이터
                            compute_metrics=compute_metrics,                                  # 모델 평가 방식
                            callbacks=[EarlyStoppingCallback(early_stopping_patience=stop)],) # callback
    trainer.train()
    del model
    del trainer
    gc.collect() # python 자원 관리 
    torch.cuda.empty_cache() # gpu 자원관리   

hidden_layers : 12
now_hidden_layers : 12
Round 0


Some weights of the model checkpoint at monologg/kobigbird-bert-base were not used when initializing BigBirdModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BigBirdModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BigBirdModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 51745
  Num Epochs 

Step,Training Loss,Validation Loss,F1-sum
100,1.1362,0.721278,0.926444
200,0.6864,0.568114,0.945651
300,0.4922,0.426769,0.960439
400,0.3548,0.286595,0.975543
500,0.2134,0.204295,0.98234
600,0.1736,0.172119,0.985814
700,0.1032,0.12276,0.990843
800,0.0919,0.093342,0.993308
900,0.069,0.105542,0.992091
1000,0.0725,0.067124,0.995235


***** Running Evaluation *****
  Num examples = 12937
  Batch size = 16
Saving model checkpoint to fold_0/checkpoint-100
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Deleting older checkpoint [fold_0/checkpoint-1400] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12937
  Batch size = 16
Saving model checkpoint to fold_0/checkpoint-200
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Deleting older checkpoint [fold_0/checkpoint-1700] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12937
  Batch size = 16
Saving model checkpoint to fold_0/checkpoint-300
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Deleting older checkpoint [fold_0/checkpoint-100] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12937
  Batch size = 16
Saving model checkpoint to fold_0/checkpoint-400
Trainer.model is not a `PreTrainedModel`, only saving its state dic

Round 1


loading weights file pytorch_model.bin from cache at /home/ubuntu/.cache/huggingface/hub/models--monologg--kobigbird-bert-base/snapshots/ceacda477e20abef2c929adfa4a07c6f811323be/pytorch_model.bin
Some weights of the model checkpoint at monologg/kobigbird-bert-base were not used when initializing BigBirdModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BigBirdModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BigBirdModel from the checkpoint of a model that you expect to be exactly identical (ini

Step,Training Loss,Validation Loss,F1-sum
100,1.0983,0.754025,0.926232
200,0.7066,0.607893,0.937193
300,0.5458,0.496999,0.954038
400,0.4272,0.376867,0.965319
500,0.2673,0.265384,0.976727
600,0.2042,0.184631,0.985249
700,0.1295,0.140802,0.988996
800,0.1096,0.117293,0.991648
900,0.0761,0.098238,0.992935
1000,0.0779,0.090223,0.993148


***** Running Evaluation *****
  Num examples = 12937
  Batch size = 16
Saving model checkpoint to fold_1/checkpoint-100
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Deleting older checkpoint [fold_1/checkpoint-2000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12937
  Batch size = 16
Saving model checkpoint to fold_1/checkpoint-200
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Deleting older checkpoint [fold_1/checkpoint-2300] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12937
  Batch size = 16
Saving model checkpoint to fold_1/checkpoint-300
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Deleting older checkpoint [fold_1/checkpoint-100] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12937
  Batch size = 16
Saving model checkpoint to fold_1/checkpoint-400
Trainer.model is not a `PreTrainedModel`, only saving its state dic

Round 2


loading weights file pytorch_model.bin from cache at /home/ubuntu/.cache/huggingface/hub/models--monologg--kobigbird-bert-base/snapshots/ceacda477e20abef2c929adfa4a07c6f811323be/pytorch_model.bin
Some weights of the model checkpoint at monologg/kobigbird-bert-base were not used when initializing BigBirdModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BigBirdModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BigBirdModel from the checkpoint of a model that you expect to be exactly identical (ini

Step,Training Loss,Validation Loss,F1-sum
100,1.1623,0.732084,0.926359
200,0.6947,0.562514,0.943489
300,0.4918,0.444685,0.958245
400,0.3615,0.292196,0.974436
500,0.2076,0.230282,0.981486
600,0.1668,0.166107,0.987031
700,0.1157,0.137551,0.988904
800,0.1025,0.114381,0.991552
900,0.0735,0.097787,0.993116
1000,0.0744,0.095191,0.993188


***** Running Evaluation *****
  Num examples = 12936
  Batch size = 16
Saving model checkpoint to fold_2/checkpoint-100
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Deleting older checkpoint [fold_2/checkpoint-1200] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12936
  Batch size = 16
Saving model checkpoint to fold_2/checkpoint-200
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Deleting older checkpoint [fold_2/checkpoint-1500] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12936
  Batch size = 16
Saving model checkpoint to fold_2/checkpoint-300
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Deleting older checkpoint [fold_2/checkpoint-100] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12936
  Batch size = 16
Saving model checkpoint to fold_2/checkpoint-400
Trainer.model is not a `PreTrainedModel`, only saving its state dic

Round 3


loading weights file pytorch_model.bin from cache at /home/ubuntu/.cache/huggingface/hub/models--monologg--kobigbird-bert-base/snapshots/ceacda477e20abef2c929adfa4a07c6f811323be/pytorch_model.bin
Some weights of the model checkpoint at monologg/kobigbird-bert-base were not used when initializing BigBirdModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BigBirdModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BigBirdModel from the checkpoint of a model that you expect to be exactly identical (ini

Step,Training Loss,Validation Loss,F1-sum
100,1.0934,0.724629,0.923564
200,0.6735,0.582788,0.942185
300,0.4847,0.449138,0.959389
400,0.3658,0.306664,0.972896
500,0.2138,0.237742,0.980092
600,0.1732,0.174725,0.986177
700,0.1139,0.158647,0.987749
800,0.0999,0.131041,0.990297
900,0.0702,0.109578,0.99203
1000,0.0693,0.111931,0.991613


***** Running Evaluation *****
  Num examples = 12936
  Batch size = 16
Saving model checkpoint to fold_3/checkpoint-100
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Deleting older checkpoint [fold_3/checkpoint-1700] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12936
  Batch size = 16
Saving model checkpoint to fold_3/checkpoint-200
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Deleting older checkpoint [fold_3/checkpoint-1800] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12936
  Batch size = 16
Saving model checkpoint to fold_3/checkpoint-300
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Deleting older checkpoint [fold_3/checkpoint-100] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12936
  Batch size = 16
Saving model checkpoint to fold_3/checkpoint-400
Trainer.model is not a `PreTrainedModel`, only saving its state dic

Round 4


loading weights file pytorch_model.bin from cache at /home/ubuntu/.cache/huggingface/hub/models--monologg--kobigbird-bert-base/snapshots/ceacda477e20abef2c929adfa4a07c6f811323be/pytorch_model.bin
Some weights of the model checkpoint at monologg/kobigbird-bert-base were not used when initializing BigBirdModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BigBirdModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BigBirdModel from the checkpoint of a model that you expect to be exactly identical (ini

Step,Training Loss,Validation Loss,F1-sum
100,1.094,0.741077,0.92824
200,0.6789,0.583059,0.942165
300,0.4662,0.445183,0.959274
400,0.3737,0.328783,0.970844
500,0.2273,0.248431,0.979765
600,0.1716,0.164955,0.987238
700,0.1134,0.166926,0.9866
800,0.1083,0.115492,0.991637
900,0.0783,0.108671,0.992168
1000,0.0701,0.1319,0.990097


***** Running Evaluation *****
  Num examples = 12936
  Batch size = 16
Saving model checkpoint to fold_4/checkpoint-100
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
***** Running Evaluation *****
  Num examples = 12936
  Batch size = 16
Saving model checkpoint to fold_4/checkpoint-200
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
***** Running Evaluation *****
  Num examples = 12936
  Batch size = 16
Saving model checkpoint to fold_4/checkpoint-300
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Deleting older checkpoint [fold_4/checkpoint-100] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12936
  Batch size = 16
Saving model checkpoint to fold_4/checkpoint-400
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Deleting older checkpoint [fold_4/checkpoint-200] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 12936
  Batch size = 16
Saving

# Predict

In [22]:
def recent_file(path):
    file_name_and_time_lst = []
    # 해당 경로에 있는 파일들의 생성시간을 함께 리스트로 넣어줌. 
    for f_name in os.listdir(f"{path}"):
        written_time = os.path.getctime(f"{path}/{f_name}")
        file_name_and_time_lst.append((f_name, written_time))
    # 생성시간 역순으로 정렬하고, 
    sorted_file_lst = sorted(file_name_and_time_lst, key=lambda x: x[1], reverse=True)
    # 가장 앞에 이는 놈을 넣어준다.
    recent_file = sorted_file_lst[0]
    recent_file_name = recent_file[0]
    return f"{path}/{recent_file_name}"

In [23]:
gc.collect() # python 자원 관리 
torch.cuda.empty_cache() # gpu 자원관리
tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenized = tokenizer(test.문장.tolist(), padding=True, truncation=True, max_length=length, return_tensors="pt")
test_dataset = CustomDataset(tokenized, None)
test_args = TrainingArguments(
    output_dir = './',
    do_train = False,
    do_predict = True,
    per_device_eval_batch_size = 512,   
    dataloader_drop_last = False    
)

tmp = 0
while os.path.isdir(f'fold_{tmp}'):
    tmp += 1

test_results = []
for i in range(tmp):
    print(f'Round {i}')
    # model = AutoModel.from_pretrained(recent_file('custom_model'), config=config)
    model = CustomModel().to(device)
    model.load_state_dict(torch.load(f"{recent_file(f'fold_{i}')}/pytorch_model.bin"))
    trainer = CustomTrainer(
                  model = model, 
                  args = test_args, 
                  compute_metrics = compute_metrics)
    test_results.append(trainer.predict(test_dataset))
    gc.collect() # python 자원 관리 
    torch.cuda.empty_cache() # gpu 자원관리
    del model
    del trainer

loading file vocab.txt from cache at /home/ubuntu/.cache/huggingface/hub/models--monologg--kobigbird-bert-base/snapshots/ceacda477e20abef2c929adfa4a07c6f811323be/vocab.txt
loading file tokenizer.json from cache at /home/ubuntu/.cache/huggingface/hub/models--monologg--kobigbird-bert-base/snapshots/ceacda477e20abef2c929adfa4a07c6f811323be/tokenizer.json
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at /home/ubuntu/.cache/huggingface/hub/models--monologg--kobigbird-bert-base/snapshots/ceacda477e20abef2c929adfa4a07c6f811323be/special_tokens_map.json
loading file tokenizer_config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--monologg--kobigbird-bert-base/snapshots/ceacda477e20abef2c929adfa4a07c6f811323be/tokenizer_config.json
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get 

Round 0


Some weights of the model checkpoint at monologg/kobigbird-bert-base were not used when initializing BigBirdModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BigBirdModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BigBirdModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of BigBirdModel were initialized from the model checkpoint at monologg/kobigbird-bert-b

loading weights file pytorch_model.bin from cache at /home/ubuntu/.cache/huggingface/hub/models--monologg--kobigbird-bert-base/snapshots/ceacda477e20abef2c929adfa4a07c6f811323be/pytorch_model.bin


Round 1


Some weights of the model checkpoint at monologg/kobigbird-bert-base were not used when initializing BigBirdModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BigBirdModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BigBirdModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of BigBirdModel were initialized from the model checkpoint at monologg/kobigbird-bert-b

loading weights file pytorch_model.bin from cache at /home/ubuntu/.cache/huggingface/hub/models--monologg--kobigbird-bert-base/snapshots/ceacda477e20abef2c929adfa4a07c6f811323be/pytorch_model.bin


Round 2


Some weights of the model checkpoint at monologg/kobigbird-bert-base were not used when initializing BigBirdModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BigBirdModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BigBirdModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of BigBirdModel were initialized from the model checkpoint at monologg/kobigbird-bert-b

loading weights file pytorch_model.bin from cache at /home/ubuntu/.cache/huggingface/hub/models--monologg--kobigbird-bert-base/snapshots/ceacda477e20abef2c929adfa4a07c6f811323be/pytorch_model.bin


Round 3


Some weights of the model checkpoint at monologg/kobigbird-bert-base were not used when initializing BigBirdModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BigBirdModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BigBirdModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of BigBirdModel were initialized from the model checkpoint at monologg/kobigbird-bert-b

loading weights file pytorch_model.bin from cache at /home/ubuntu/.cache/huggingface/hub/models--monologg--kobigbird-bert-base/snapshots/ceacda477e20abef2c929adfa4a07c6f811323be/pytorch_model.bin


Round 4


Some weights of the model checkpoint at monologg/kobigbird-bert-base were not used when initializing BigBirdModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BigBirdModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BigBirdModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of BigBirdModel were initialized from the model checkpoint at monologg/kobigbird-bert-b

In [24]:
test['유형'] = list(map(lambda x : 유형.inverse_transform([np.argmax(x)]), sum(list(map(lambda x: x.predictions[0], test_results)))/len(test_results)))
test['극성'] = list(map(lambda x : 극성.inverse_transform([np.argmax(x)]), sum(list(map(lambda x: x.predictions[1], test_results)))/len(test_results)))
test['시제'] = list(map(lambda x : 시제.inverse_transform([np.argmax(x)]), sum(list(map(lambda x: x.predictions[2], test_results)))/len(test_results)))
test['확실성'] = list(map(lambda x : 확실성.inverse_transform([np.argmax(x)]), sum(list(map(lambda x: x.predictions[3], test_results)))/len(test_results)))

test['유형'] = list(map(lambda x : x[0], test['유형']))
test['극성'] = list(map(lambda x : x[0], test['극성']))
test['시제'] = list(map(lambda x : x[0], test['시제']))
test['확실성'] = list(map(lambda x : x[0], test['확실성']))

In [25]:
test

Unnamed: 0,ID,문장,유형,극성,시제,확실성
0,TEST_0000,장욱진의 가족은 허물 없는 가족애를 처음 공개되는 정약용의 정효자전과 정부인전은 강...,사실형,긍정,현재,확실
1,TEST_0001,조지 W 부시 버락 오바마 전 대통령도 전쟁 위험 때문에 버린 카드다,사실형,긍정,현재,확실
2,TEST_0002,지난해 1분기 128억원이었던 영업이익이 올해 1분기 505억원으로 급증했다,사실형,긍정,과거,확실
3,TEST_0003,수상 작가와 맺으려던 계약서 내용 가운데 일부가 독소 조항으로 해석돼 수정을 요청받...,사실형,긍정,과거,확실
4,TEST_0004,결국 최근 KDB산업은행은 대규모 손실 위기에 닥친 에어부산에 140억원 금융지원을...,사실형,긍정,과거,확실
...,...,...,...,...,...,...
7085,TEST_7085,2020 세계국가편람 모바일 앱은 세계 216개국의 국가개황과 주요 경제지표 사회개...,사실형,긍정,현재,확실
7086,TEST_7086,탈세계화 징후들이 반갑지 않은 이유다,추론형,긍정,현재,확실
7087,TEST_7087,틱톡은 6월 인터넷 안전의 달을 맞아 올바른 개인정보 보호 관리 방법 앱 내 유용한...,사실형,긍정,현재,확실
7088,TEST_7088,만약 3개월 간 채굴자들의 투표를 거쳐 23 이상의 해시파워가 채굴세 도입에 찬성한...,추론형,긍정,미래,확실


In [26]:
test['label'] = test['유형'] + '-' + test['극성'] + '-' + test['시제'] + '-' + test['확실성']
test

Unnamed: 0,ID,문장,유형,극성,시제,확실성,label
0,TEST_0000,장욱진의 가족은 허물 없는 가족애를 처음 공개되는 정약용의 정효자전과 정부인전은 강...,사실형,긍정,현재,확실,사실형-긍정-현재-확실
1,TEST_0001,조지 W 부시 버락 오바마 전 대통령도 전쟁 위험 때문에 버린 카드다,사실형,긍정,현재,확실,사실형-긍정-현재-확실
2,TEST_0002,지난해 1분기 128억원이었던 영업이익이 올해 1분기 505억원으로 급증했다,사실형,긍정,과거,확실,사실형-긍정-과거-확실
3,TEST_0003,수상 작가와 맺으려던 계약서 내용 가운데 일부가 독소 조항으로 해석돼 수정을 요청받...,사실형,긍정,과거,확실,사실형-긍정-과거-확실
4,TEST_0004,결국 최근 KDB산업은행은 대규모 손실 위기에 닥친 에어부산에 140억원 금융지원을...,사실형,긍정,과거,확실,사실형-긍정-과거-확실
...,...,...,...,...,...,...,...
7085,TEST_7085,2020 세계국가편람 모바일 앱은 세계 216개국의 국가개황과 주요 경제지표 사회개...,사실형,긍정,현재,확실,사실형-긍정-현재-확실
7086,TEST_7086,탈세계화 징후들이 반갑지 않은 이유다,추론형,긍정,현재,확실,추론형-긍정-현재-확실
7087,TEST_7087,틱톡은 6월 인터넷 안전의 달을 맞아 올바른 개인정보 보호 관리 방법 앱 내 유용한...,사실형,긍정,현재,확실,사실형-긍정-현재-확실
7088,TEST_7088,만약 3개월 간 채굴자들의 투표를 거쳐 23 이상의 해시파워가 채굴세 도입에 찬성한...,추론형,긍정,미래,확실,추론형-긍정-미래-확실


In [27]:
sub = pd.read_csv('/home/ubuntu/competition/dacon/문장분류대회/sample_submission.csv')
sub['label'] = test['label']
tmp = 0
while os.path.exists(f'제출{tmp}.csv'):
    tmp += 1
sub.to_csv(f'제출{tmp}.csv', index=False, mode='w')