<a href="https://colab.research.google.com/github/Taewon-Park/Dacon/blob/main/%EA%B0%90%EC%A0%95_%EC%9D%B8%EC%8B%9D_(NLP).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
import os
os.chdir('/content/drive/MyDrive/dacon/발화자의 감정인식/data/open')

In [None]:
!gdown https://drive.google.com/uc?=1-QNs8sk5X3u_1rK-dv5ESgQIt0ZDQSwY
!unzip -qq "./data/databffhw4rntmp"

In [10]:
# Requirements
import pandas as pd
import numpy as np
from sklearn.metrics import f1_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
import random
import os

import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, WeightedRandomSampler
from tqdm import tqdm
from transformers import BertTokenizer, RobertaModel, AutoModel
from transformers import BertModel, RobertaTokenizer, AutoTokenizer
from torch.optim import Adam

import matplotlib as mpl
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings(action='ignore')

In [25]:
# Train Data Load
data = pd.read_csv("train.csv")

In [26]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9989 entries, 0 to 9988
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   ID           9989 non-null   object
 1   Utterance    9989 non-null   object
 2   Speaker      9989 non-null   object
 3   Dialogue_ID  9989 non-null   int64 
 4   Target       9989 non-null   object
dtypes: int64(1), object(4)
memory usage: 390.3+ KB


In [28]:
data = data[:5000]

In [29]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   ID           5000 non-null   object
 1   Utterance    5000 non-null   object
 2   Speaker      5000 non-null   object
 3   Dialogue_ID  5000 non-null   int64 
 4   Target       5000 non-null   object
dtypes: int64(1), object(4)
memory usage: 195.4+ KB


In [57]:
# Preprocessing
device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
os.environ["TOKENIZERS_PARALLELISM"] = "false"

CFG = {
  'EPOCHS' : 50,
  'LEARNING_RATE' : 1e-5,
  'BATCH_SIZE' : 2,
  'SEED' : 42
}


def seed_everything(seed):
  random.seed(seed)
  os.environ['PYTHONHASHSEED'] = str(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
  torch.cuda.manual_seed(seed)
  torch.backends.cudnn.deterministic = True
  torch.backends.cudnn.benchmark = True


def Encoding(data):
  le = LabelEncoder()
  le = le.fit(data['Target'])
  data['Target'] = le.transform(data['Target'])

  return data['Target']

def Class_Weights(data):
  class_counts = data['Target'].value_counts()
  class_weights = 1./class_counts
  class_weights = class_weights/class_weights.min()
  class_weights = class_weights.to_dict()
  class_weights = {k : v for k, v in sorted(class_weights.items(), key=lambda item : item[0])}
  class_weights = list(class_weights.values())
  class_weights = torch.FloatTensor(class_weights).to(device)

  return class_weights

def Strat_Split(nsplit, data):

  folds = StratifiedKFold(n_splits=nsplit, shuffle=True, random_state=CFG['SEED'])
  data['fold'] = -1
  for i in range(nsplit):
    trn_idx, val_idx = list(folds.split(data, data['Target']))[i]
    valid = data.iloc[val_idx]
    data.loc[data[data.ID.isin(valid.ID) == True].index.to_list(), 'fold'] = i

  data.to_csv('train_fold.csv', index=False)
  fold = pd.read_csv("train_fold.csv")

  le = LabelEncoder()
  le = le.fit(fold['Target'])
  fold['Target'] = le.transform(fold['Target'])

  strat_train = fold[fold['fold'] != 1].reset_index(drop=True)
  strat_valid = fold[fold['fold'] == 1].reset_index(drop=True)

  return strat_train, strat_valid, le


def Tokenizer_Define():
  bert = AutoModel.from_pretrained("tae898/emoberta-large").to(device)
  bert_pool = bert.pooler
  bert.pooler = torch.nn.Identity()

  return bert, bert_pool


class CustomDataset(torch.utils.data.Dataset):
  def __init__(self, data, mode = "train"):
    self.dataset = data
    self.mode = mode
    self.feature = []

    tokenizer = AutoTokenizer.from_pretrained("tae898/emoberta-large")

    for text in tqdm(data['Utterance']):
      inputs = tokenizer(text, padding='max_length', max_length=328, truncation=True, return_tensors="pt")
      input_ids = inputs['input_ids'][0][None].to(device)
      attention_mask = inputs['attention_mask'][0][None].to(device)

      _, pooled_output = bert(input_ids=input_ids, attention_mask=attention_mask, return_dict=False)
      self.feature.append(pooled_output.detach()[0])

  def __len__(self):
    return len(self.dataset)

  def __getitem__(self, idx):
    if self.mode == "train":
      return self.feature[idx], self.dataset['Target'][idx]
    else:
      return self.feature[idx]

In [50]:
# Run Preprocessing
seed_everything(CFG['SEED'])

Encoding(data)
class_weights = Class_Weights(data)
strat_train, strat_valid, le = Strat_Split(35, data)

bert, bert_pool = Tokenizer_Define()

Some weights of RobertaModel were not initialized from the model checkpoint at tae898/emoberta-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [35]:
# Stratified Dataset Check
print("train set : ", len(strat_train))
print("test set : ", len(strat_valid))

train set :  4857
test set :  143


In [41]:
# Make CustomDataset & DataLoader
train_ds = CustomDataset(strat_train, mode = "train")
valid_ds = CustomDataset(strat_valid, mode = "train")
train_dataloader = torch.utils.data.DataLoader(train_ds, batch_size= CFG['BATCH_SIZE'], shuffle=True)
val_dataloader = torch.utils.data.DataLoader(valid_ds, batch_size= CFG['BATCH_SIZE'], shuffle=True)

100%|██████████| 4857/4857 [06:23<00:00, 12.67it/s]
100%|██████████| 143/143 [00:10<00:00, 13.15it/s]


In [55]:
# Modeling
class BaseModel(nn.Module):
  def __init__(self, dropout=0.5, num_classes=len(le.classes_)):
    super(BaseModel, self).__init__()
    self.bert_pool = bert_pool
    self.dropout = nn.Dropout(dropout)
    self.linear = nn.Sequential(
      nn.Linear(1024, 512),
      nn.ReLU(),
      nn.Linear(512, num_classes),
    )

  def forward(self, pooled_output):
    pooled_output = self.bert_pool(pooled_output)
    dropout_output = self.dropout(pooled_output)
    linear_output = self.linear(pooled_output)

    return linear_output


def train(model, optimizer, train_loader, test_loader, scheduler, class_weights, device):
  model.to(device)
  criterion = nn.CrossEntropyLoss(weight=class_weights).to(device)

  best_score = 0
  best_model = "None"
  for epoch_num in range(CFG["EPOCHS"]):
    model.train()
    train_loss = []
    for pooled_output, train_label in tqdm(train_loader):
      optimizer.zero_grad()
      pooled_output = pooled_output.to(device)
      train_label = train_label.to(device)

      output = model(pooled_output).to(device)
      batch_loss = criterion(output, train_label.long())

      batch_loss.backward()
      optimizer.step()

      train_loss.append(batch_loss.item())

    val_loss, val_score = validation(model, criterion, test_loader, device)
    print(f'Epoch [{epoch_num}], Train Loss : [{np.mean(train_loss) :.5f}] Val Loss : [{np.mean(val_loss) :.5f}] Val F1 Score : [{val_score : .5f}]')
    if scheduler is not None:
      scheduler.step(val_score)

    best_model = model
    best_score = val_score

  return best_model


def competition_metric(true, pred):
  return f1_score(true, pred, average="macro")


def validation(model, criterion, test_loader, device):
  model.eval()

  val_loss = []
  model_preds = []
  true_labels = []
  with torch.no_grad():
    for pooled_output, valid_label in tqdm(test_loader):
      valid_label = valid_label.to(device)
      pooled_output = pooled_output.to(device)

      output = model(pooled_output).to(device)

      batch_loss = criterion(output, valid_label.long())
      val_loss.append(batch_loss.item())

      model_preds += output.argmax(1).detach().cpu().numpy().tolist()
      true_labels += valid_label.detach().cpu().numpy().tolist()
      val_f1 = competition_metric(true_labels, model_preds)

  return val_loss, val_f1

In [58]:
# Run
model = BaseModel()
model.eval()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2, verbose=True, threshold=0.0001, threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-08)
infer_model = train(model, optimizer, train_dataloader, val_dataloader, scheduler, class_weights, device)

100%|██████████| 2429/2429 [00:07<00:00, 304.74it/s]
100%|██████████| 72/72 [00:00<00:00, 456.23it/s]


Epoch [0], Train Loss : [0.91105] Val Loss : [0.99467] Val F1 Score : [ 0.61517]


100%|██████████| 2429/2429 [00:07<00:00, 322.07it/s]
100%|██████████| 72/72 [00:00<00:00, 159.74it/s]


Epoch [1], Train Loss : [0.74238] Val Loss : [0.85119] Val F1 Score : [ 0.65599]


100%|██████████| 2429/2429 [00:10<00:00, 239.82it/s]
100%|██████████| 72/72 [00:00<00:00, 303.28it/s]


Epoch [2], Train Loss : [0.70954] Val Loss : [0.88904] Val F1 Score : [ 0.61767]


100%|██████████| 2429/2429 [00:05<00:00, 411.98it/s]
100%|██████████| 72/72 [00:00<00:00, 350.08it/s]


Epoch [3], Train Loss : [0.68084] Val Loss : [0.94985] Val F1 Score : [ 0.61752]


100%|██████████| 2429/2429 [00:05<00:00, 405.40it/s]
100%|██████████| 72/72 [00:00<00:00, 463.01it/s]


Epoch [4], Train Loss : [0.66953] Val Loss : [0.88827] Val F1 Score : [ 0.63861]
Epoch 00005: reducing learning rate of group 0 to 5.0000e-06.


100%|██████████| 2429/2429 [00:05<00:00, 433.99it/s]
100%|██████████| 72/72 [00:00<00:00, 316.30it/s]


Epoch [5], Train Loss : [0.64093] Val Loss : [0.91349] Val F1 Score : [ 0.62272]


100%|██████████| 2429/2429 [00:11<00:00, 209.06it/s]
100%|██████████| 72/72 [00:00<00:00, 316.20it/s]


Epoch [6], Train Loss : [0.63210] Val Loss : [0.88437] Val F1 Score : [ 0.64420]


100%|██████████| 2429/2429 [00:09<00:00, 259.49it/s]
100%|██████████| 72/72 [00:00<00:00, 143.30it/s]


Epoch [7], Train Loss : [0.62035] Val Loss : [0.91609] Val F1 Score : [ 0.65938]


100%|██████████| 2429/2429 [00:09<00:00, 268.19it/s]
100%|██████████| 72/72 [00:00<00:00, 322.33it/s]


Epoch [8], Train Loss : [0.60353] Val Loss : [0.91702] Val F1 Score : [ 0.64168]


100%|██████████| 2429/2429 [00:11<00:00, 216.32it/s]
100%|██████████| 72/72 [00:00<00:00, 180.38it/s]


Epoch [9], Train Loss : [0.60144] Val Loss : [0.90097] Val F1 Score : [ 0.58640]


100%|██████████| 2429/2429 [00:09<00:00, 250.02it/s]
100%|██████████| 72/72 [00:00<00:00, 331.05it/s]


Epoch [10], Train Loss : [0.59207] Val Loss : [0.86793] Val F1 Score : [ 0.61846]
Epoch 00011: reducing learning rate of group 0 to 2.5000e-06.


100%|██████████| 2429/2429 [00:08<00:00, 271.54it/s]
100%|██████████| 72/72 [00:00<00:00, 120.80it/s]


Epoch [11], Train Loss : [0.56905] Val Loss : [0.88913] Val F1 Score : [ 0.62569]


100%|██████████| 2429/2429 [00:08<00:00, 275.41it/s]
100%|██████████| 72/72 [00:00<00:00, 316.92it/s]


Epoch [12], Train Loss : [0.56701] Val Loss : [0.93903] Val F1 Score : [ 0.62569]


100%|██████████| 2429/2429 [00:09<00:00, 264.75it/s]
100%|██████████| 72/72 [00:00<00:00, 169.12it/s]


Epoch [13], Train Loss : [0.56289] Val Loss : [0.95303] Val F1 Score : [ 0.66021]


100%|██████████| 2429/2429 [00:09<00:00, 259.59it/s]
100%|██████████| 72/72 [00:00<00:00, 202.02it/s]


Epoch [14], Train Loss : [0.56128] Val Loss : [0.90480] Val F1 Score : [ 0.62899]


100%|██████████| 2429/2429 [00:11<00:00, 203.67it/s]
100%|██████████| 72/72 [00:00<00:00, 322.00it/s]


Epoch [15], Train Loss : [0.54613] Val Loss : [0.95218] Val F1 Score : [ 0.67526]


100%|██████████| 2429/2429 [00:07<00:00, 329.68it/s]
100%|██████████| 72/72 [00:00<00:00, 311.10it/s]


Epoch [16], Train Loss : [0.54628] Val Loss : [0.95845] Val F1 Score : [ 0.59725]


100%|██████████| 2429/2429 [00:12<00:00, 198.45it/s]
100%|██████████| 72/72 [00:00<00:00, 184.87it/s]


Epoch [17], Train Loss : [0.53263] Val Loss : [0.95033] Val F1 Score : [ 0.59202]


100%|██████████| 2429/2429 [00:09<00:00, 247.57it/s]
100%|██████████| 72/72 [00:00<00:00, 339.61it/s]


Epoch [18], Train Loss : [0.54043] Val Loss : [0.84679] Val F1 Score : [ 0.58872]
Epoch 00019: reducing learning rate of group 0 to 1.2500e-06.


100%|██████████| 2429/2429 [00:05<00:00, 419.80it/s]
100%|██████████| 72/72 [00:00<00:00, 456.08it/s]


Epoch [19], Train Loss : [0.52697] Val Loss : [0.90363] Val F1 Score : [ 0.58872]


100%|██████████| 2429/2429 [00:06<00:00, 368.33it/s]
100%|██████████| 72/72 [00:01<00:00, 68.75it/s]


Epoch [20], Train Loss : [0.52275] Val Loss : [0.90925] Val F1 Score : [ 0.58872]


100%|██████████| 2429/2429 [00:08<00:00, 283.93it/s]
100%|██████████| 72/72 [00:00<00:00, 318.48it/s]


Epoch [21], Train Loss : [0.52295] Val Loss : [0.97620] Val F1 Score : [ 0.58872]
Epoch 00022: reducing learning rate of group 0 to 6.2500e-07.


100%|██████████| 2429/2429 [00:06<00:00, 375.57it/s]
100%|██████████| 72/72 [00:00<00:00, 489.12it/s]


Epoch [22], Train Loss : [0.51306] Val Loss : [0.91967] Val F1 Score : [ 0.58872]


100%|██████████| 2429/2429 [00:07<00:00, 315.52it/s]
100%|██████████| 72/72 [00:00<00:00, 278.10it/s]


Epoch [23], Train Loss : [0.51281] Val Loss : [0.98925] Val F1 Score : [ 0.62379]


100%|██████████| 2429/2429 [00:08<00:00, 298.78it/s]
100%|██████████| 72/72 [00:00<00:00, 312.59it/s]


Epoch [24], Train Loss : [0.51131] Val Loss : [0.98219] Val F1 Score : [ 0.62775]
Epoch 00025: reducing learning rate of group 0 to 3.1250e-07.


100%|██████████| 2429/2429 [00:08<00:00, 286.35it/s]
100%|██████████| 72/72 [00:00<00:00, 461.76it/s]


Epoch [25], Train Loss : [0.51211] Val Loss : [0.95027] Val F1 Score : [ 0.62775]


100%|██████████| 2429/2429 [00:07<00:00, 330.01it/s]
100%|██████████| 72/72 [00:00<00:00, 322.50it/s]


Epoch [26], Train Loss : [0.51501] Val Loss : [0.92880] Val F1 Score : [ 0.62775]


100%|██████████| 2429/2429 [00:08<00:00, 271.18it/s]
100%|██████████| 72/72 [00:00<00:00, 314.04it/s]


Epoch [27], Train Loss : [0.51385] Val Loss : [1.01221] Val F1 Score : [ 0.62775]
Epoch 00028: reducing learning rate of group 0 to 1.5625e-07.


100%|██████████| 2429/2429 [00:06<00:00, 364.03it/s]
100%|██████████| 72/72 [00:00<00:00, 305.29it/s]


Epoch [28], Train Loss : [0.50565] Val Loss : [0.96253] Val F1 Score : [ 0.58872]


100%|██████████| 2429/2429 [00:12<00:00, 201.32it/s]
100%|██████████| 72/72 [00:00<00:00, 324.50it/s]


Epoch [29], Train Loss : [0.50540] Val Loss : [0.85654] Val F1 Score : [ 0.58872]


100%|██████████| 2429/2429 [00:06<00:00, 382.15it/s]
100%|██████████| 72/72 [00:00<00:00, 311.26it/s]


Epoch [30], Train Loss : [0.51490] Val Loss : [1.00427] Val F1 Score : [ 0.58872]
Epoch 00031: reducing learning rate of group 0 to 7.8125e-08.


100%|██████████| 2429/2429 [00:06<00:00, 372.36it/s]
100%|██████████| 72/72 [00:00<00:00, 350.46it/s]


Epoch [31], Train Loss : [0.51636] Val Loss : [0.89492] Val F1 Score : [ 0.62775]


100%|██████████| 2429/2429 [00:05<00:00, 435.08it/s]
100%|██████████| 72/72 [00:00<00:00, 457.30it/s]


Epoch [32], Train Loss : [0.51801] Val Loss : [0.96422] Val F1 Score : [ 0.62775]


100%|██████████| 2429/2429 [00:05<00:00, 451.33it/s]
100%|██████████| 72/72 [00:00<00:00, 470.63it/s]


Epoch [33], Train Loss : [0.50706] Val Loss : [0.85359] Val F1 Score : [ 0.62775]
Epoch 00034: reducing learning rate of group 0 to 3.9063e-08.


100%|██████████| 2429/2429 [00:06<00:00, 384.62it/s]
100%|██████████| 72/72 [00:00<00:00, 297.65it/s]


Epoch [34], Train Loss : [0.50382] Val Loss : [0.91670] Val F1 Score : [ 0.62775]


100%|██████████| 2429/2429 [00:05<00:00, 450.69it/s]
100%|██████████| 72/72 [00:00<00:00, 469.53it/s]


Epoch [35], Train Loss : [0.51199] Val Loss : [0.89988] Val F1 Score : [ 0.62775]


100%|██████████| 2429/2429 [00:05<00:00, 432.63it/s]
100%|██████████| 72/72 [00:00<00:00, 338.89it/s]


Epoch [36], Train Loss : [0.51014] Val Loss : [0.96095] Val F1 Score : [ 0.62775]
Epoch 00037: reducing learning rate of group 0 to 1.9531e-08.


100%|██████████| 2429/2429 [00:06<00:00, 395.76it/s]
100%|██████████| 72/72 [00:00<00:00, 499.71it/s]


Epoch [37], Train Loss : [0.50259] Val Loss : [0.92565] Val F1 Score : [ 0.62775]


100%|██████████| 2429/2429 [00:07<00:00, 319.67it/s]
100%|██████████| 72/72 [00:00<00:00, 328.15it/s]


Epoch [38], Train Loss : [0.51392] Val Loss : [0.93822] Val F1 Score : [ 0.62775]


100%|██████████| 2429/2429 [00:09<00:00, 253.19it/s]
100%|██████████| 72/72 [00:00<00:00, 222.42it/s]


Epoch [39], Train Loss : [0.51109] Val Loss : [0.92674] Val F1 Score : [ 0.62775]


100%|██████████| 2429/2429 [00:10<00:00, 224.92it/s]
100%|██████████| 72/72 [00:01<00:00, 62.50it/s]


Epoch [40], Train Loss : [0.51090] Val Loss : [0.94170] Val F1 Score : [ 0.62775]


100%|██████████| 2429/2429 [00:09<00:00, 245.82it/s]
100%|██████████| 72/72 [00:00<00:00, 296.27it/s]


Epoch [41], Train Loss : [0.50604] Val Loss : [0.98902] Val F1 Score : [ 0.62775]


100%|██████████| 2429/2429 [00:07<00:00, 309.46it/s]
100%|██████████| 72/72 [00:00<00:00, 192.27it/s]


Epoch [42], Train Loss : [0.50729] Val Loss : [0.86990] Val F1 Score : [ 0.62775]


100%|██████████| 2429/2429 [00:07<00:00, 312.38it/s]
100%|██████████| 72/72 [00:00<00:00, 297.26it/s]


Epoch [43], Train Loss : [0.50713] Val Loss : [0.95339] Val F1 Score : [ 0.62775]


100%|██████████| 2429/2429 [00:07<00:00, 322.73it/s]
100%|██████████| 72/72 [00:00<00:00, 107.41it/s]


Epoch [44], Train Loss : [0.50623] Val Loss : [0.95333] Val F1 Score : [ 0.62775]


100%|██████████| 2429/2429 [00:11<00:00, 203.86it/s]
100%|██████████| 72/72 [00:00<00:00, 150.62it/s]


Epoch [45], Train Loss : [0.50956] Val Loss : [0.89178] Val F1 Score : [ 0.62775]


100%|██████████| 2429/2429 [00:09<00:00, 253.41it/s]
100%|██████████| 72/72 [00:00<00:00, 242.27it/s]


Epoch [46], Train Loss : [0.50860] Val Loss : [0.96644] Val F1 Score : [ 0.62775]


100%|██████████| 2429/2429 [00:06<00:00, 352.54it/s]
100%|██████████| 72/72 [00:00<00:00, 312.49it/s]


Epoch [47], Train Loss : [0.50745] Val Loss : [0.87630] Val F1 Score : [ 0.62775]


100%|██████████| 2429/2429 [00:09<00:00, 259.73it/s]
100%|██████████| 72/72 [00:00<00:00, 248.61it/s]


Epoch [48], Train Loss : [0.50846] Val Loss : [0.95146] Val F1 Score : [ 0.62775]


100%|██████████| 2429/2429 [00:07<00:00, 318.69it/s]
100%|██████████| 72/72 [00:00<00:00, 312.53it/s]

Epoch [49], Train Loss : [0.50583] Val Loss : [0.94158] Val F1 Score : [ 0.62775]





In [70]:
torch.save(model.state_dict(), 'model.pt')

In [71]:
torch.save(infer_model.state_dict(), 'infer_model.pt')

In [67]:
torch.cuda.empty_cache()

In [64]:
# Test Data Load
test = pd.read_csv("test.csv")
test = test[:1000]

test_ds = CustomDataset(test, mode = "test")
test_dataloader = torch.utils.data.DataLoader(test_ds, batch_size = CFG['BATCH_SIZE'], shuffle=False)

 37%|███▋      | 366/1000 [00:28<00:50, 12.66it/s]


OutOfMemoryError: ignored

In [None]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()

    test_predict = []
    for pooled_output in tqdm(test_loader):
        pooled_output = pooled_output.to(device)
        y_pred = model(pooled_output)
        test_predict += y_pred.argmax(1).detach().cpu().numpy().tolist()
    print('Done.')

    return test_predict

In [None]:
preds = inference(infer_model, test_dataloader, device)
preds = le.inverse_transfrom(preds)

submit = pd.read_csv('sample_submission.csv')
submit['Target'] = preds

# submit에서 Target이 0이면 anger로 변환
submit['Target'] = submit['Target'].apply(lambda x: 'anger' if x == 0 else x)
submit['Target'] = submit['Target'].apply(lambda x: 'disgust' if x == 1 else x)
submit['Target'] = submit['Target'].apply(lambda x: 'fear' if x == 2 else x)
submit['Target'] = submit['Target'].apply(lambda x: 'joy' if x == 3 else x)
submit['Target'] = submit['Target'].apply(lambda x: 'neutral' if x == 4 else x)
submit['Target'] = submit['Target'].apply(lambda x: 'sadness' if x == 5 else x)
submit['Target'] = submit['Target'].apply(lambda x: 'surprise' if x == 6 else x)

submit.to_csv('submit.csv', index=False)