### setting

In [1]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.25.1-py3-none-any.whl (5.8 MB)
[K     |████████████████████████████████| 5.8 MB 35.2 MB/s 
[?25hCollecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 62.7 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 77.4 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.11.1 tokenizers-0.13.2 transformers-4.25.1


In [2]:
import pandas as pd
import torch
from torch.nn import functional as F
from torch.utils.data import DataLoader, Dataset
# from transformers import AutoTokenizer, ElectraForSequenceClassification, AdamW
from transformers import AutoModel, AutoTokenizer, AdamW
# from transformers import BertForSequenceClassification, BertTokenizerFast, AdamW
from tqdm.notebook import tqdm
import numpy as np
from torch import nn
from sklearn.metrics import f1_score
import random
import os
from sklearn.model_selection import train_test_split

In [3]:
epochs = 100
batch_size = 50
learning_rate = 1e-6
seed = 0

In [4]:
# GPU 사용
device = 'cuda' if torch.cuda.is_available() else 'cpu'
max_grad_norm = 1

In [5]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(seed) # Seed 고정

### Dataset 만들어서 불러오기 

In [6]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
class Dataset(Dataset):
  
  def __init__(self, dataset, train):
    self.dataset = dataset
    self.tokenizer = AutoTokenizer.from_pretrained("jinmang2/kpfbert")
    self.sentences = self.dataset["문장"].tolist()
    self.train = train
    if self.train:
      self.type_labels = [np.int32(i) for i in self.dataset["유형"]]
      self.polarity_labels = [np.int32(i) for i in self.dataset["극성"]]
      self.tense_labels = [np.int32(i) for i in self.dataset["시제"]]
      self.certainty_labels = [np.int32(i) for i in self.dataset["확실성"]]

  def __len__(self):
    return len(self.dataset)
  
  def __getitem__(self, idx):
    
      inputs = self.tokenizer(
          self.sentences[idx], 
          return_tensors='pt',
          truncation=True,
          max_length=256,
          pad_to_max_length=True,
          add_special_tokens=True
          )
      input_ids = inputs['input_ids'][0]
      attention_mask = inputs['attention_mask'][0]
      if self.train:
        return input_ids, attention_mask, self.type_labels[idx], self.polarity_labels[idx], self.tense_labels[idx], self.certainty_labels[idx]
      return input_ids, attention_mask

In [8]:
df = pd.read_csv('/content/drive/MyDrive/dacon/dataset/train_aug.csv')

label_dict = {"유형":{"사실형":0, "추론형":1, "대화형":2, "예측형":3}, "극성":{"긍정":0, "부정":1, "미정":2}, "시제":{"과거":0, "현재":1, "미래":2}, "확실성":{"확실":0, "불확실":1}}

for label in label_dict:
  for i in range(len(df)):
    df[label].iloc[i] = label_dict[label][df[label].iloc[i]]

In [9]:
train, val, _, _ = train_test_split(df, df['label'], test_size=0.2, random_state=seed, stratify = df["유형"])

In [10]:
train_dataset = Dataset(train, True)
val_dataset = Dataset(val, True)

Downloading:   0%|          | 0.00/335 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/276k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

### Create Model

In [11]:
# electra = ElectraForSequenceClassification.from_pretrained("monologg/koelectra-small-v2-discriminator")
model = AutoModel.from_pretrained("jinmang2/kpfbert")

# 한번 실행해보기
# text, attention_mask, y1, y2, y3, y4 = train_dataset[0]
# model(text.unsqueeze(0).to(device), attention_mask=attention_mask.unsqueeze(0).to(device))

Downloading:   0%|          | 0.00/622 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/454M [00:00<?, ?B/s]

Some weights of BertModel were not initialized from the model checkpoint at jinmang2/kpfbert and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:
# model.load_state_dict(torch.load("model.pt"))

In [13]:
# model.classifier = torch.nn.Sequential(*(list(model.classifier.children())[:-1]))

In [14]:
# 모델 레이어 보기
model.to(device)

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(36440, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0): BertLayer(
        (attention): BertAttention(
          (self): BertSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          

In [15]:
# text, attention_mask, y1, y2, y3, y4 = train_dataset[0]
# electra(text.unsqueeze(0).to(device), attention_mask=attention_mask.unsqueeze(0).to(device))

In [16]:
class Classifier(nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model = model

        self.fc1 = nn.Linear(768 , 768)
        # self.relu = nn.ReLU()
        self.do = nn.Dropout(p=0.1, inplace=False)
        self.type_classifier = nn.Linear(768 , 4)
        self.polarity_classifier = nn.Linear(768 , 3)
        self.tense_classifier = nn.Linear(768 , 3)
        self.certainty_classifier = nn.Linear(768 , 2)
        # self.softmax = nn.Softmax(dim=1)
  
    def forward(self, input_ids, attention_mask):
        # input_ids : token's id / attention_mask : make a model to focus on which token
        model_out = self.model(input_ids= input_ids, attention_mask = attention_mask)[0][:,0]

        x = self.fc1(model_out)
        x = self.do(x)
        # x = self.relu(x)

        type_output = self.type_classifier(x)
        # type_output = self.softmax(type_output)
        polarity_output = self.polarity_classifier(x)
        # polarity_output = self.softmax(polarity_output)
        tense_output = self.tense_classifier(x)
        # tense_output = self.softmax(tense_output)
        certainty_output = self.certainty_classifier(x)
        # certainty_output = self.softmax(certainty_output)

        return type_output, polarity_output, tense_output, certainty_output

In [17]:
model = Classifier(model)

In [18]:
model.to(device)

Classifier(
  (model): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(36440, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)


In [19]:
class FocalLoss(nn.Module):
    def __init__(self, gamma=2, alpha=0.25):
        super(FocalLoss, self).__init__()
        self.loss_fn = nn.BCEWithLogitsLoss()
        self.gamma = gamma
        self.alpha = alpha

    def forward(self, pred, true):
        bceloss = self.loss_fn(pred, true.float())
        pred_prob = torch.sigmoid(pred)  # p  pt는 p가 true 이면 pt = p / false 이면 pt = 1 - p
        alpha_factor = true * self.alpha + (1-true) * (1 - self.alpha)  # add balance
        modulating_factor = torch.abs(true - pred_prob) ** self.gamma  # focal term
        loss = alpha_factor * modulating_factor * bceloss  # bceloss에 이미 음수가 들어가 있음

        return loss.mean()

### Learn

In [20]:
optimizer = AdamW(model.parameters(), lr=learning_rate)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)



In [21]:
def validation(model, val_loader, criterion, device):
    model.eval()
    val_loss = []
    
    type_preds, polarity_preds, tense_preds, certainty_preds = [], [], [], []
    type_labels, polarity_labels, tense_labels, certainty_labels = [], [], [], []
    
    
    with torch.no_grad():
      for input_ids_batch, attention_masks_batch, type_label, polarity_label, tense_label, certainty_label in tqdm(val_loader):
        type_label = type_label.long().to(device)
        polarity_label = polarity_label.long().to(device)
        tense_label = tense_label.long().to(device)
        certainty_label = certainty_label.long().to(device)
        type_logit, polarity_logit, tense_logit, certainty_logit = model(input_ids_batch.to(device), attention_masks_batch.to(device))
        loss = 0.25 * criterion['type'](type_logit, type_label) + \
                    0.25 * criterion['polarity'](polarity_logit, polarity_label) + \
                    0.25 * criterion['tense'](tense_logit, tense_label) + \
                    0.25 * criterion['certainty'](certainty_logit, certainty_label)
          
        val_loss.append(loss.item())
        
        type_preds += type_logit.argmax(1).detach().cpu().numpy().tolist()
        type_labels += type_label.detach().cpu().numpy().tolist()
        
        polarity_preds += polarity_logit.argmax(1).detach().cpu().numpy().tolist()
        polarity_labels += polarity_label.detach().cpu().numpy().tolist()
        
        tense_preds += tense_logit.argmax(1).detach().cpu().numpy().tolist()
        tense_labels += tense_label.detach().cpu().numpy().tolist()
        
        certainty_preds += certainty_logit.argmax(1).detach().cpu().numpy().tolist()
        certainty_labels += certainty_label.detach().cpu().numpy().tolist()
    type_f1 = f1_score(type_labels, type_preds, average='weighted')
    polarity_f1 = f1_score(polarity_labels, polarity_preds, average='weighted')
    tense_f1 = f1_score(tense_labels, tense_preds, average='weighted')
    certainty_f1 = f1_score(certainty_labels, certainty_preds, average='weighted')
    
    return np.mean(val_loss), type_f1, polarity_f1, tense_f1, certainty_f1

In [22]:
def train(model, optimizer, train_loader, val_loader, device):
  log_df = {"train_losses":[], "val_losses":[], "type_f1s":[], "polarity_f1s":[], "tense_f1s":[], "certainty_f1s":[]}

  criterion = {
      'type' : nn.CrossEntropyLoss().to(device),
      'polarity' : nn.CrossEntropyLoss().to(device),
      'tense' : nn.CrossEntropyLoss().to(device),
      'certainty' : nn.CrossEntropyLoss().to(device)
  }
  
  for e in range(epochs):
    train_loss = []
    for input_ids_batch, attention_masks_batch, type_label, polarity_label, tense_label, certainty_label in tqdm(train_loader):
      model.train()
      optimizer.zero_grad()
      type_label = type_label.long().to(device)
      polarity_label = polarity_label.long().to(device)
      tense_label = tense_label.long().to(device)
      certainty_label = certainty_label.long().to(device)
      type_logit, polarity_logit, tense_logit, certainty_logit = model(input_ids_batch.to(device), attention_masks_batch.to(device))

      loss = 0.25 * criterion['type'](type_logit, type_label) + \
                  0.25 * criterion['polarity'](polarity_logit, polarity_label) + \
                  0.25 * criterion['tense'](tense_logit, tense_label) + \
                  0.25 * criterion['certainty'](certainty_logit, certainty_label)

      loss.backward()
      torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
      optimizer.step()
      train_loss.append(loss.item())

    model.eval()
    val_loss, type_f1, polarity_f1, tense_f1, certainty_f1 = validation(model, val_loader, criterion, device)
    print(f'Epoch : [{e+1}] Train Loss : [{np.mean(train_loss):.5f}] Val Loss : [{val_loss:.5f}] 유형 F1 : [{type_f1:.5f}] 극성 F1 : [{polarity_f1:.5f}] 시제 F1 : [{tense_f1:.5f}] 확실성 F1 : [{certainty_f1:.5f}]')
    
    log_df["val_losses"].append(val_loss)
    log_df["train_losses"].append(np.mean(train_loss))
    log_df["type_f1s"].append(type_f1)
    log_df["polarity_f1s"].append(polarity_f1)
    log_df["tense_f1s"].append(tense_f1)
    log_df["certainty_f1s"].append(certainty_f1)
    
    pd.DataFrame(log_df).to_csv('/content/drive/MyDrive/dacon/log/log.csv')
    torch.save(model.state_dict(), f"/content/drive/MyDrive/dacon/saved_models/model_state_dict{str(e+1).zfill(4)}.pth")

In [23]:
model = train(model, optimizer, train_loader, val_loader, device)

  0%|          | 0/382 [00:00<?, ?it/s]



  0%|          | 0/96 [00:00<?, ?it/s]

Epoch : [1] Train Loss : [0.76271] Val Loss : [0.55233] 유형 F1 : [0.68684] 극성 F1 : [0.86945] 시제 F1 : [0.60652] 확실성 F1 : [0.85315]


  0%|          | 0/382 [00:00<?, ?it/s]



  0%|          | 0/96 [00:00<?, ?it/s]

Epoch : [2] Train Loss : [0.46718] Val Loss : [0.38947] 유형 F1 : [0.78876] 극성 F1 : [0.88001] 시제 F1 : [0.84041] 확실성 F1 : [0.89219]


  0%|          | 0/382 [00:00<?, ?it/s]



  0%|          | 0/96 [00:00<?, ?it/s]

Epoch : [3] Train Loss : [0.38022] Val Loss : [0.35362] 유형 F1 : [0.80061] 극성 F1 : [0.90708] 시제 F1 : [0.85719] 확실성 F1 : [0.89407]


  0%|          | 0/382 [00:00<?, ?it/s]



  0%|          | 0/96 [00:00<?, ?it/s]

Epoch : [4] Train Loss : [0.34377] Val Loss : [0.32078] 유형 F1 : [0.82208] 극성 F1 : [0.93578] 시제 F1 : [0.86210] 확실성 F1 : [0.89725]


  0%|          | 0/382 [00:00<?, ?it/s]



  0%|          | 0/96 [00:00<?, ?it/s]

Epoch : [5] Train Loss : [0.32049] Val Loss : [0.30878] 유형 F1 : [0.82856] 극성 F1 : [0.94834] 시제 F1 : [0.86395] 확실성 F1 : [0.89872]


  0%|          | 0/382 [00:00<?, ?it/s]



  0%|          | 0/96 [00:00<?, ?it/s]

Epoch : [6] Train Loss : [0.30504] Val Loss : [0.30050] 유형 F1 : [0.83257] 극성 F1 : [0.95014] 시제 F1 : [0.86648] 확실성 F1 : [0.89979]


  0%|          | 0/382 [00:00<?, ?it/s]



  0%|          | 0/96 [00:00<?, ?it/s]

Epoch : [7] Train Loss : [0.29408] Val Loss : [0.28977] 유형 F1 : [0.84209] 극성 F1 : [0.95461] 시제 F1 : [0.87058] 확실성 F1 : [0.90163]


  0%|          | 0/382 [00:00<?, ?it/s]



  0%|          | 0/96 [00:00<?, ?it/s]

Epoch : [8] Train Loss : [0.28403] Val Loss : [0.28482] 유형 F1 : [0.84391] 극성 F1 : [0.95451] 시제 F1 : [0.87383] 확실성 F1 : [0.90125]


  0%|          | 0/382 [00:00<?, ?it/s]



  0%|          | 0/96 [00:00<?, ?it/s]

Epoch : [9] Train Loss : [0.27572] Val Loss : [0.28160] 유형 F1 : [0.84480] 극성 F1 : [0.95698] 시제 F1 : [0.87778] 확실성 F1 : [0.90568]


  0%|          | 0/382 [00:00<?, ?it/s]



  0%|          | 0/96 [00:00<?, ?it/s]

Epoch : [10] Train Loss : [0.26815] Val Loss : [0.27535] 유형 F1 : [0.84747] 극성 F1 : [0.95824] 시제 F1 : [0.88177] 확실성 F1 : [0.90365]


  0%|          | 0/382 [00:00<?, ?it/s]



  0%|          | 0/96 [00:00<?, ?it/s]

Epoch : [11] Train Loss : [0.26150] Val Loss : [0.27249] 유형 F1 : [0.85008] 극성 F1 : [0.95988] 시제 F1 : [0.87901] 확실성 F1 : [0.90674]


  0%|          | 0/382 [00:00<?, ?it/s]



  0%|          | 0/96 [00:00<?, ?it/s]

Epoch : [12] Train Loss : [0.25569] Val Loss : [0.26896] 유형 F1 : [0.85154] 극성 F1 : [0.96219] 시제 F1 : [0.88595] 확실성 F1 : [0.90641]


  0%|          | 0/382 [00:00<?, ?it/s]



  0%|          | 0/96 [00:00<?, ?it/s]

Epoch : [13] Train Loss : [0.24908] Val Loss : [0.26706] 유형 F1 : [0.85256] 극성 F1 : [0.96292] 시제 F1 : [0.88452] 확실성 F1 : [0.90934]


  0%|          | 0/382 [00:00<?, ?it/s]



  0%|          | 0/96 [00:00<?, ?it/s]

Epoch : [14] Train Loss : [0.24263] Val Loss : [0.26359] 유형 F1 : [0.85505] 극성 F1 : [0.96469] 시제 F1 : [0.88611] 확실성 F1 : [0.91018]


  0%|          | 0/382 [00:00<?, ?it/s]



  0%|          | 0/96 [00:00<?, ?it/s]

Epoch : [15] Train Loss : [0.23838] Val Loss : [0.26125] 유형 F1 : [0.85495] 극성 F1 : [0.96438] 시제 F1 : [0.88762] 확실성 F1 : [0.90906]


  0%|          | 0/382 [00:00<?, ?it/s]



  0%|          | 0/96 [00:00<?, ?it/s]

Epoch : [16] Train Loss : [0.23412] Val Loss : [0.26005] 유형 F1 : [0.85680] 극성 F1 : [0.96475] 시제 F1 : [0.88876] 확실성 F1 : [0.90948]


  0%|          | 0/382 [00:00<?, ?it/s]



  0%|          | 0/96 [00:00<?, ?it/s]

Epoch : [17] Train Loss : [0.22868] Val Loss : [0.25864] 유형 F1 : [0.85700] 극성 F1 : [0.96700] 시제 F1 : [0.89026] 확실성 F1 : [0.91139]


  0%|          | 0/382 [00:00<?, ?it/s]



  0%|          | 0/96 [00:00<?, ?it/s]

Epoch : [18] Train Loss : [0.22292] Val Loss : [0.25657] 유형 F1 : [0.85893] 극성 F1 : [0.96679] 시제 F1 : [0.89046] 확실성 F1 : [0.91192]


  0%|          | 0/382 [00:00<?, ?it/s]



  0%|          | 0/96 [00:00<?, ?it/s]

Epoch : [19] Train Loss : [0.21752] Val Loss : [0.25400] 유형 F1 : [0.85978] 극성 F1 : [0.96795] 시제 F1 : [0.89135] 확실성 F1 : [0.91184]


  0%|          | 0/382 [00:00<?, ?it/s]



  0%|          | 0/96 [00:00<?, ?it/s]

Epoch : [20] Train Loss : [0.21264] Val Loss : [0.25563] 유형 F1 : [0.85910] 극성 F1 : [0.96609] 시제 F1 : [0.89112] 확실성 F1 : [0.91399]


  0%|          | 0/382 [00:00<?, ?it/s]



  0%|          | 0/96 [00:00<?, ?it/s]

Epoch : [21] Train Loss : [0.20821] Val Loss : [0.25621] 유형 F1 : [0.85942] 극성 F1 : [0.96823] 시제 F1 : [0.89134] 확실성 F1 : [0.91460]


  0%|          | 0/382 [00:00<?, ?it/s]



KeyboardInterrupt: ignored

### Inference

In [24]:
pretrained_dict = torch.load(r'/content/drive/MyDrive/dacon/saved_models/model_state_dict0019.pth')
model.load_state_dict(pretrained_dict)
infer_model = model

In [30]:
testset = pd.read_csv('/content/drive/MyDrive/dacon/dataset/test.csv')

data_test = Dataset(testset, False)
test_dataloader = DataLoader(data_test, batch_size=batch_size)

In [31]:
len(data_test)

7090

In [32]:
def inference(infer_model, test_dataloader, device):
    infer_model.to(device)
    infer_model.eval()
    
    type_preds, polarity_preds, tense_preds, certainty_preds = [], [], [], []
    
    with torch.no_grad():
      for i in tqdm(test_dataloader):

        type_logit, polarity_logit, tense_logit, certainty_logit = infer_model(i[0].to(device), i[1].to(device))
        
        type_preds += type_logit.argmax(1).detach().cpu().numpy().tolist()
        polarity_preds += polarity_logit.argmax(1).detach().cpu().numpy().tolist()
        tense_preds += tense_logit.argmax(1).detach().cpu().numpy().tolist()
        certainty_preds += certainty_logit.argmax(1).detach().cpu().numpy().tolist()
        
    return type_preds, polarity_preds, tense_preds, certainty_preds

In [33]:
type_preds, polarity_preds, tense_preds, certainty_preds = inference(infer_model, test_dataloader, device)

  0%|          | 0/142 [00:00<?, ?it/s]

In [34]:
label_dict = {"유형":["사실형", "추론형", "대화형", "예측형"], "극성":["긍정", "부정", "미정"], "시제":["과거", "현재", "미래"], "확실성":["확실", "불확실"]}

In [35]:
len(type_preds)

7090

In [36]:
type_preds = [label_dict["유형"][x] for x in type_preds]
polarity_preds = [label_dict["극성"][x] for x in polarity_preds]
tense_preds = [label_dict["시제"][x] for x in tense_preds]
certainty_preds = [label_dict["확실성"][x] for x in certainty_preds]

In [37]:
predictions = []
for type_pred, polarity_pred, tense_pred, certainty_pred in zip(type_preds, polarity_preds, tense_preds, certainty_preds):
    predictions.append(type_pred+'-'+polarity_pred+'-'+tense_pred+'-'+certainty_pred)

In [38]:
# print(len(val["문장"]), len(polarity_preds))
# pd.DataFrame({"sentence":val["문장"], "유형":[label_dict["유형"][x] for x in val["유형"]], "극성":[label_dict["극성"][x] for x in val["극성"]], "시제":[label_dict["시제"][x] for x in val["시제"]], "확실성":[label_dict["확실성"][x] for x in val["확실성"]], "type_preds":type_preds, "polarity_preds":polarity_preds, "tense_preds":tense_preds, "certainty_preds":certainty_preds, "predictions":predictions}).to_csv("./prediction.csv")

In [39]:
len(predictions)

7090

### Submission

In [40]:
submit = pd.read_csv('/content/drive/MyDrive/dacon/result/sample_submission.csv')
submit['label'] = predictions

In [41]:
submit.head()

Unnamed: 0,ID,label
0,TEST_0000,사실형-긍정-현재-확실
1,TEST_0001,사실형-긍정-현재-확실
2,TEST_0002,사실형-긍정-과거-확실
3,TEST_0003,사실형-긍정-과거-확실
4,TEST_0004,사실형-긍정-과거-확실


In [42]:
submit.to_csv('/content/drive/MyDrive/dacon/result/submission.csv', index=False)

### validation to csv

In [43]:
pretrained_dict = torch.load(r'/content/drive/MyDrive/dacon/saved_models/model_state_dict0050.pth', map_location=device)
model.load_state_dict(pretrained_dict)
infer_model = model

FileNotFoundError: ignored

In [None]:
data_test = Dataset("/content/drive/MyDrive/dacon/dataset/validation_split_by_polarity.csv", True)
test_dataloader = DataLoader(data_test, batch_size=batch_size)

In [None]:
def inference(infer_model, test_dataloader, device):
    infer_model.to(device)
    infer_model.eval()
    
    type_preds, polarity_preds, tense_preds, certainty_preds = [], [], [], []
    
    with torch.no_grad():
      for i in tqdm(test_dataloader):

        type_logit, polarity_logit, tense_logit, certainty_logit = infer_model(i[0].to(device), i[1].to(device))
        
        type_preds += type_logit.argmax(1).detach().cpu().numpy().tolist()
        polarity_preds += polarity_logit.argmax(1).detach().cpu().numpy().tolist()
        tense_preds += tense_logit.argmax(1).detach().cpu().numpy().tolist()
        certainty_preds += certainty_logit.argmax(1).detach().cpu().numpy().tolist()
        
    return type_preds, polarity_preds, tense_preds, certainty_preds

In [None]:
type_preds, polarity_preds, tense_preds, certainty_preds = inference(infer_model, test_dataloader, device)

  0%|          | 0/67 [00:00<?, ?it/s]



In [None]:
label_dict = {"유형":["사실형", "추론형", "대화형", "예측형"], "극성":["긍정", "부정", "미정"], "시제":["과거", "현재", "미래"], "확실성":["확실", "불확실"]}

In [None]:
len(type_preds)

3309

In [None]:
type_preds = [label_dict["유형"][x] for x in type_preds]
polarity_preds = [label_dict["극성"][x] for x in polarity_preds]
tense_preds = [label_dict["시제"][x] for x in tense_preds]
certainty_preds = [label_dict["확실성"][x] for x in certainty_preds]

In [None]:
predictions = []
for type_pred, polarity_pred, tense_pred, certainty_pred in zip(type_preds, polarity_preds, tense_preds, certainty_preds):
    predictions.append(type_pred+'-'+polarity_pred+'-'+tense_pred+'-'+certainty_pred)

In [None]:
val = pd.read_csv("/content/drive/MyDrive/dacon/dataset/validation_split_by_polarity.csv")
print(len(val["문장"]), len(polarity_preds))
pd.DataFrame({"sentence":val["문장"], "유형":[label_dict["유형"][x] for x in val["유형"]], "극성":[label_dict["극성"][x] for x in val["극성"]], "시제":[label_dict["시제"][x] for x in val["시제"]], "확실성":[label_dict["확실성"][x] for x in val["확실성"]], "type_preds":type_preds, "polarity_preds":polarity_preds, "tense_preds":tense_preds, "certainty_preds":certainty_preds, "predictions":predictions}).to_csv("/content/drive/MyDrive/dacon/result/prediction.csv")

3309 3309
