In [None]:
# !pip install konlpy
# !bash <(curl -s https://raw.githubusercontent.com/konlpy/konlpy/master/scripts/mecab.sh)
# !pip install soynlp
# !pip install git+https://github.com/haven-jeon/PyKoSpacing.git

In [None]:
import pandas as pd
import os
import json
import numpy as np
import shutil
import math

from sklearn.metrics import f1_score, recall_score, precision_score, accuracy_score
from sklearn.model_selection import StratifiedShuffleSplit

from datetime import datetime, timezone, timedelta
import random
from tqdm import tqdm

from attrdict import AttrDict
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import _LRScheduler
from torch.nn.modules.loss import _Loss

from transformers import logging, get_linear_schedule_with_warmup

from transformers import ( 
    BertConfig,
    ElectraConfig
)

from transformers import (
    BertTokenizer,  
    AutoTokenizer,
    ElectraTokenizer,
    AlbertTokenizer,
    RobertaTokenizer
)

from transformers import (
    BertModel,
    AutoModelForSequenceClassification, 
    ElectraForSequenceClassification,
    BertForSequenceClassification,
    AlbertForSequenceClassification,
    RobertaForSequenceClassification
)

# preprocessing
import re
from soynlp.normalizer import *
from pykospacing import Spacing
from konlpy.tag import *

In [None]:
def seed_everything(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)

In [None]:
# hugging face models
# preprocess
# input
# dropout
# learning rate
# epoch

# Preprocessing

In [None]:
def preprocessing(x): # konlpy
    return processor.morphs(x)

def clean_punc(x): # normalize
    punc_map = {"“": '"', "‘": "'", '”':'"', "’":"'", "♥":" 사랑 "}
    tmp = ''
    for st in x:
        if st in punc_map.keys():
            tmp += punc_map[st]
        else:
            tmp += st
    return tmp

def remove_bracket(x): # bracket 내 의미없는 것 제거
    p1 = re.compile(r"(\[.{,9}\])")
    tmp = p1.sub('', x)
    p2 = re.compile(r"\([(종합)(인터뷰)(전문)]+\)")
    return p2.sub('', tmp)

def make_space(x):
    # cnt = 0
    # if len(x) > 20:
    #     for i in x:
    #         if i == ' ':
    #             cnt += 1
    # else:
    #     cnt = 5
    # if cnt < 5:
    #     return spacing(x)
    # else:
    #     return x
    return spacing(x)

def remove_repeat(x):
    p = re.compile(r"[⁉️+❎+☝+(( ° ͜ʖ͡°)╭∩╮)+ᆢ+(^„^)+♨+(\.){2}]")
    x = p.sub(' ', x)
    return repeat_normalize(x, num_repeats=3)

def remove_doubledot(x):
    return x.replace('ᆢ', '')

def remove_space(x):
    p = re.compile(r"[\s+]")
    return p.sub(' ', x)

# Dataset

In [None]:
class MyDataset(Dataset):
    def __init__(self, dataset, tokenizer, max_len):
        self.title = dataset[:,0]
        self.comment = dataset[:,1]
        self.labels = dataset[:,2]
        self.tokenizer = tokenizer
        self.max_len = max_len
    
    def __len__(self):
        return len(self.title)
    
    def __getitem__(self, idx):
        title = self.title[idx]
        comment = self.comment[idx]
        label = self.labels[idx]
        
        tokenized_text = self.tokenizer(title, comment,
                             padding= 'max_length',
                             max_length=128,
                             truncation=True,
                             return_token_type_ids=True,
                             return_attention_mask=True,
                             return_tensors = "pt",
                             is_split_into_words=True)
        
        data = {'input_ids': tokenized_text['input_ids'].clone().detach().long(),
               'attention_mask': tokenized_text['attention_mask'].clone().detach().long(),
               'token_type_ids': tokenized_text['token_type_ids'].clone().detach().long(),
               }
        
        return data, label


In [None]:
def split_stratified_shuffle_split(df, fold, n_split, seed):
    skf = StratifiedShuffleSplit(n_splits=n_split, train_size=0.8, test_size=0.2, random_state=seed)
    for idx, i in enumerate(skf.split(df[['title', 'comment','label']], df['label'])):
        if idx == fold:
            train_data = df[['title', 'comment','label']].values[i[0]]
            valid_data = df[['title', 'comment','label']].values[i[1]]
    return train_data, valid_data

# Model

In [None]:
class MyModel(nn.Module):
    def __init__(self, hidden_size = 768, num_classes=None, selected_layers=False, params=None):
        super(MyModel, self).__init__()

        self.model = BASE_MODELS[CFG['architecture']].from_pretrained(CFG['pretrained_model'], 
                                                         num_labels = CFG['num_classes'], 
                                                         output_attentions = False, # Whether the model returns attentions weights.
                                                         output_hidden_states = True # Whether the model returns all hidden-states.
                                                        )
        self.softmax = nn.Softmax(dim=1) 
        self.selected_layers = selected_layers
        self.num_classes = num_classes
        
        self.dropout = nn.Dropout(0.1)


    def forward(self, token_ids, attention_mask, segment_ids):      
        outputs = self.model(input_ids = token_ids, 
                             token_type_ids = segment_ids.long(), 
                             attention_mask = attention_mask.float().to(token_ids.device))

        if self.selected_layers == True:
            hidden_states = outputs.hidden_states
            pooled_output = torch.cat(tuple([hidden_states[i] for i in [-4, -3, -2, -1]]), dim=-1)

            pooled_output = pooled_output[:, 0, :]
            pooled_output = self.dropout(pooled_output)

            Model = nn.Linear(pooled_output.shape[1], self.num_classes).to(token_ids.device)
            logits = Model(pooled_output)
        
        else:
            logits=outputs.logits
            
        prob= self.softmax(logits)
        return logits, prob 

# EarlyStopper

In [None]:
class EarlyStopper():
    def __init__(self, patience: int)-> None:
        self.patience = patience
        self.patience_counter = 0
        self.max_score = 0.
        self.stop = False
        self.save_model = False

    def check_early_stopping(self, score: float)-> None:

        if self.max_score == 0.:
            self.max_score = score
           
        elif score <= self.max_score:
            self.patience_counter += 1
            if self.patience_counter == self.patience:
                self.stop = True
            self.save_model = False
            print(f"Early stopping counter {self.patience_counter}/{self.patience}")
        elif score > self.max_score:
            self.patience_counter = 0
            self.max_score = score
            self.save_model = True

            print(f"Validation score increased {self.max_score} -> {score}")

# Loss

In [None]:
class F1_Loss(nn.Module):
    def __init__(self, epsilon=1e-7):
        super().__init__()
        self.epsilon = epsilon
        
    def forward(self, y_pred, y_true,):
        assert y_pred.ndim == 2
        assert y_true.ndim == 1
        y_true = F.one_hot(y_true, 2).to(torch.float32)
        y_pred = F.softmax(y_pred, dim=1)
        
        tp = (y_true * y_pred).sum(dim=0).to(torch.float32)
        tn = ((1 - y_true) * (1 - y_pred)).sum(dim=0).to(torch.float32)
        fp = ((1 - y_true) * y_pred).sum(dim=0).to(torch.float32)
        fn = (y_true * (1 - y_pred)).sum(dim=0).to(torch.float32)

        precision = tp / (tp + fp + self.epsilon)
        recall = tp / (tp + fn + self.epsilon)

        f1 = 2* (precision*recall) / (precision + recall + self.epsilon)
        f1 = f1.clamp(min=self.epsilon, max=1-self.epsilon)
        return 1 - f1.mean()

In [None]:
class MyLoss(_Loss):
    def __init__(self): 
        super(MyLoss, self).__init__()
        self.lossCE = nn.CrossEntropyLoss() # weight=torch.tensor([1-(3646/(4721+3646)), 1-(4721/(4721+3646))]
        self.lossF1 = F1_Loss()
        
    def forward(self, preds, trg):
        # return (self.lossCE(preds, trg) + self.lossF1(preds, trg)) / 2
        return self.lossCE(preds, trg)

# Scheduler

In [None]:
class CosineAnnealingWarmupRestarts(_LRScheduler):
    """
        optimizer (Optimizer): Wrapped optimizer.
        first_cycle_steps (int): First cycle step size.
        cycle_mult(float): Cycle steps magnification. Default: -1.
        max_lr(float): First cycle's max learning rate. Default: 0.1.
        min_lr(float): Min learning rate. Default: 0.001.
        warmup_steps(int): Linear warmup step size. Default: 0.
        gamma(float): Decrease rate of max learning rate by cycle. Default: 1.
        last_epoch (int): The index of last epoch. Default: -1.
    """

    def __init__(self,
                 optimizer: torch.optim.Optimizer,
                 first_cycle_steps: int,
                 cycle_mult: float = 1.,
                 max_lr: float = 0.1,
                 min_lr: float = 0.001,
                 warmup_steps: int = 0,
                 gamma: float = 1.,
                 last_epoch: int = -1
                 ):
        assert warmup_steps < first_cycle_steps

        self.first_cycle_steps = first_cycle_steps  # first cycle step size
        self.cycle_mult = cycle_mult  # cycle steps magnification
        self.base_max_lr = max_lr  # first max learning rate
        self.max_lr = max_lr  # max learning rate in the current cycle
        self.min_lr = min_lr  # min learning rate
        self.warmup_steps = warmup_steps  # warmup step size
        self.gamma = gamma  # decrease rate of max learning rate by cycle

        self.cur_cycle_steps = first_cycle_steps  # first cycle step size
        self.cycle = 0  # cycle count
        self.step_in_cycle = last_epoch  # step size of the current cycle

        super(CosineAnnealingWarmupRestarts, self).__init__(optimizer, last_epoch)

        self.init_lr()

    def init_lr(self):
        self.base_lrs = []
        for param_group in self.optimizer.param_groups:
            param_group['lr'] = self.min_lr
            self.base_lrs.append(self.min_lr)

    def get_lr(self):
        if self.step_in_cycle == -1:
            return self.base_lrs
        elif self.step_in_cycle < self.warmup_steps:
            return [(self.max_lr - base_lr) * self.step_in_cycle / self.warmup_steps + base_lr for base_lr in
                    self.base_lrs]
        else:
            return [base_lr + (self.max_lr - base_lr) \
                    * (1 + math.cos(math.pi * (self.step_in_cycle - self.warmup_steps) \
                                    / (self.cur_cycle_steps - self.warmup_steps))) / 2
                    for base_lr in self.base_lrs]

    def step(self, epoch=None):
        if epoch is None:
            epoch = self.last_epoch + 1
            self.step_in_cycle = self.step_in_cycle + 1
            if self.step_in_cycle >= self.cur_cycle_steps:
                self.cycle += 1
                self.step_in_cycle = self.step_in_cycle - self.cur_cycle_steps
                self.cur_cycle_steps = int(
                    (self.cur_cycle_steps - self.warmup_steps) * self.cycle_mult) + self.warmup_steps
        else:
            if epoch >= self.first_cycle_steps:
                if self.cycle_mult == 1.:
                    self.step_in_cycle = epoch % self.first_cycle_steps
                    self.cycle = epoch // self.first_cycle_steps
                else:
                    n = int(math.log((epoch / self.first_cycle_steps * (self.cycle_mult - 1) + 1), self.cycle_mult))
                    self.cycle = n
                    self.step_in_cycle = epoch - int(
                        self.first_cycle_steps * (self.cycle_mult ** n - 1) / (self.cycle_mult - 1))
                    self.cur_cycle_steps = self.first_cycle_steps * self.cycle_mult ** (n)
            else:
                self.cur_cycle_steps = self.first_cycle_steps
                self.step_in_cycle = epoch

        self.max_lr = self.base_max_lr * (self.gamma ** self.cycle)
        self.last_epoch = math.floor(epoch)
        for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
            param_group['lr'] = lr

# Train Function

In [None]:
def train_one_epoch(model, criterion, train_loader, optimizer1, scheduler1, device): # optimizer2, scheduler2, 
    model.train()

    train_losses = 0.
    train_acc = 0.
    train_score = 0.
    
    preds = []
    targets = []

    for src, trg in tqdm(train_loader):
        
        mask = src['attention_mask'].to(device)
        input_id = src['input_ids'].squeeze(1).to(device)
        segment_ids = src['token_type_ids'].squeeze(1).to(device)
        trg = trg.long().to(device)  
        
        output = model(input_id, mask, segment_ids)
        loss = criterion(output[0].view(-1,2), trg.view(-1))
        train_losses += loss.item()

        acc = (output[0].argmax(dim=-1) == trg).sum().item()
        train_acc += acc

        preds.extend(output[0].argmax(dim=-1).detach().cpu().numpy())
        targets.extend(trg.detach().cpu().numpy())
        
        optimizer1.zero_grad()
        # optimizer2.zero_grad()
        loss.backward()
        optimizer1.step()
        # optimizer2.step()
        scheduler1.step()
        # scheduler2.step()
    
    train_score = f1_score(targets, preds)
    
    print("recall", recall_score(targets, preds), "precision", precision_score(targets, preds))
    print(accuracy_score(targets, preds))
    return train_losses, train_acc, train_score

In [None]:
def valid_one_epoch(model, criterion, valid_loader, device):
    model.eval()

    valid_losses = 0.
    valid_acc = 0.
    valid_score = 0.

    preds = []
    targets = []

    with torch.no_grad():
        for src, trg in tqdm(valid_loader):
            mask = src['attention_mask'].to(device)
            input_id = src['input_ids'].squeeze(1).to(device)
            segment_ids = src['token_type_ids'].squeeze(1).to(device)
            trg = trg.long().to(device)  
            
            output = model(input_id, mask, segment_ids)
            loss = criterion(output[0].view(-1,2), trg.view(-1))
            valid_losses += loss.item()

            acc = (output[0].argmax(dim=-1) == trg).sum().item()
            valid_acc += acc

            preds.extend(output[0].argmax(dim=-1).detach().cpu().numpy())
            targets.extend(trg.detach().cpu().numpy())
    
    valid_score = f1_score(preds, targets)
    print("recall", recall_score(targets, preds), "precision", precision_score(targets, preds))
    print(accuracy_score(targets, preds))
    print(targets)
    print(preds)
    return valid_losses, valid_acc, valid_score, preds

# Train

In [None]:
CFG = {
    'root_dir': '/content/drive/Othercomputers/내 컴퓨터/workspace/nlp_classification_',
    "result_dir": '/content/drive/Othercomputers/내 컴퓨터/workspace/nlp_classification_/result',
    'num_classes': 2,
    "max_seq_len": None,
    "epochs": 40,
    "seed": 2022,
    "batch_size": 16,
    "lr": 1e-5,
    "warmup_proportion": 0.1,
    "patience": 10,
    "pretrained_model": "beomi/KcELECTRA-base", # "beomi/KcELECTRA-base",  
    "architecture": "ElectraForSequenceClassification", # "ElectraForSequenceClassification",
    "tokenizer_class": "ElectraTokenizer", # "ElectraTokenizer",
    'experiment_name': 'hate_pre+',
    'n_split': 5,
    'fold': 4
}

In [None]:
logging.set_verbosity_error()
seed_everything(CFG['seed'])
TOKENIZER_CLASSES = {
    "BertTokenizer": BertTokenizer,
    "AutoTokenizer": AutoTokenizer,
    "ElectraTokenizer": ElectraTokenizer,
    "AlbertTokenizer": AlbertTokenizer,
    "RobertaTokenizer": RobertaTokenizer
}
BASE_MODELS = {
    "BertForSequenceClassification": BertForSequenceClassification,
    "AutoModelForSequenceClassification": AutoModelForSequenceClassification,
    "ElectraForSequenceClassification": ElectraForSequenceClassification,
    "AlbertForSequenceClassification": AlbertForSequenceClassification,
    "RobertaForSequenceClassification": RobertaForSequenceClassification
}

TOKENIZER = TOKENIZER_CLASSES[CFG['tokenizer_class']].from_pretrained(CFG['pretrained_model'])
TOKENIZER.add_tokens(['', ])

print("number of GPUs: ", torch.cuda.device_count())
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
use_cuda = torch.cuda.is_available()
print("Does GPU exist? : ", use_cuda)
device = torch.device("cuda" if use_cuda else "cpu")

DEBUG = True

train_df = pd.read_csv(CFG['root_dir'] + '/train/train.csv', encoding = 'UTF-8-SIG')

spacing = Spacing()
train_df['title'] = train_df['title'].map(remove_repeat)
train_df['comment'] = train_df['comment'].map(remove_repeat)
# train_df['title'] = train_df['title'].map(remove_doubledot)
# train_df['comment'] = train_df['comment'].map(remove_doubledot)
# train_df['title'] = train_df['title'].map(clean_punc)
# train_df['title'] = train_df['title'].map(remove_bracket)
train_df['title'] = train_df['title'].map(remove_space)
train_df['comment'] = train_df['comment'].map(remove_space)

# preprocessing
# processor = Mecab()
# train_df['title'] = train_df['title'].map(preprocessing)
# train_df['comment'] = train_df['comment'].map(preprocessing)

train_df['label'] = train_df['hate'].map({'none':0, 'hate':1})

train_data, valid_data = split_stratified_shuffle_split(train_df, CFG['fold'], CFG['n_split'], CFG['seed'])

train_dataset = MyDataset(train_data, TOKENIZER, CFG['max_seq_len'])
valid_dataset = MyDataset(valid_data, TOKENIZER, CFG['max_seq_len'])
train_loader = DataLoader(train_dataset, batch_size=CFG['batch_size'], shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=CFG['batch_size'], shuffle=False) 

early_stopper = EarlyStopper(patience=CFG['patience'])
model = MyModel(selected_layers=False).to(device)

criterion = MyLoss().to(device)
cosine_annealing_scheduler_arg1 = dict(
    first_cycle_steps=len(train_dataset)//CFG['batch_size'] * CFG['epochs'],
    cycle_mult=1.0,
    max_lr=CFG['lr'],
    min_lr=1e-08,
    warmup_steps=len(train_dataset)//CFG['batch_size'] * 3,
    gamma=0.9
)
cosine_annealing_scheduler_arg2 = dict(
    first_cycle_steps=len(train_dataset)//CFG['batch_size'] * CFG['epochs'],
    cycle_mult=1.0,
    max_lr=CFG['lr'],
    min_lr=1e-08,
    warmup_steps=len(train_dataset)//CFG['batch_size'] * 3,
    gamma=0.9
)

optimizer1 = optim.AdamW(model.parameters(), lr=0.000, weight_decay=0) # .model.electra.
scheduler1 = CosineAnnealingWarmupRestarts(optimizer1, **cosine_annealing_scheduler_arg1)
# optimizer2 = optim.AdamW(model.model.classifier.parameters(), lr=0.000, weight_decay=0)
# scheduler2 = CosineAnnealingWarmupRestarts(optimizer2, **cosine_annealing_scheduler_arg2)

In [None]:
print('Start Training!')
for i in range(CFG['epochs']):
    print(f"Epoch :", i)
    train_losses, train_match, train_score = train_one_epoch(
        model, criterion, train_loader, optimizer1, scheduler1, device) # optimizer2, scheduler2, 
    
    valid_losses, valid_match, valid_score, preds = valid_one_epoch(
        model, criterion, valid_loader, device)
    
    print(f"Train loss, acc, score: {train_losses / len(train_loader):.3f}, {train_match / len(train_loader.dataset):.3f}, {train_score:.3f}")
    print(f"Valid loss, acc, score: {valid_losses / len(valid_loader):.3f}, {valid_match / len(valid_loader.dataset):.3f}, {valid_score:.3f}")
    early_stopper.check_early_stopping(valid_score)

    # wandb_dict = {
    #     'train loss': train_losses / len(train_loader),
    #     'train score': train_match / len(train_loader.dataset),
    #     'valid loss': valid_losses / len(valid_loader),
    #     'valid score': valid_match / len(valid_loader.dataset),
    #     'learning rate': scheduler.get_lr()[0]
    # }

    # wandb.log(wandb_dict)

    print("learning rate :", scheduler1.get_lr())

    if early_stopper.save_model == True:
        dic = {
            'model':model.state_dict(),
            'optimizer':optimizer1.state_dict(),
            'scheduler':scheduler1.state_dict(),
        }
        torch.save(dic, "/content/drive/MyDrive/Colab Notebooks" + f"/pth/{CFG['fold']}_best_{CFG['experiment_name']}.pth")
        print('save_model')

    if early_stopper.stop:
        break

os.rename(
    "/content/drive/MyDrive/Colab Notebooks" + f"/pth/{CFG['fold']}_best_{CFG['experiment_name']}.pth", 
    "/content/drive/MyDrive/Colab Notebooks" + f"/pth/{CFG['fold']}_{early_stopper.max_score:.3f}_{CFG['experiment_name']}.pth"
    )

In [None]:
model.load_state_dict(torch.load('/content/drive/MyDrive/Colab Notebooks/pth/0_best_hate.pth')['model'])
valid_losses, valid_acc, valid_score, preds = valid_one_epoch(model, criterion, valid_loader, device)
preds

In [None]:
tmp = pd.DataFrame(valid_data)
tmp['pred'] = preds
tmp

# Inference

In [None]:
# 테스트 데이터셋 불러오기
def test(model, test_loader):
    model.eval()

    outputs = None

    with torch.no_grad():
        for test_input, _ in test_loader:

            mask = test_input['attention_mask'].to(device)
            input_id = test_input['input_ids'].squeeze(1).to(device)
            segment_ids = test_input['token_type_ids'].squeeze(1).to(device)

            output = model(input_id, mask, segment_ids)
            if outputs is None:
                outputs = output[0].detach().cpu().numpy()
            else:
                outputs = np.concatenate((outputs, output[0].detach().cpu().numpy()), axis=0)
    return outputs

In [None]:
weights = [
           '/content/drive/MyDrive/Colab Notebooks/pth/0_840_hate_pre+.pth',
           '/content/drive/MyDrive/Colab Notebooks/pth/1_848_hate_pre+.pth',
           '/content/drive/MyDrive/Colab Notebooks/pth/2_841_hate_pre+.pth',
           '/content/drive/MyDrive/Colab Notebooks/pth/3_0.847_hate_pre+.pth',
           '/content/drive/MyDrive/Colab Notebooks/pth/4_0.830_hate_pre+.pth'
]
test_df = pd.read_csv(CFG['root_dir'] + '/test/test.csv')
test_df['label'] = 0

test_df['title'] = test_df['title'].map(remove_repeat)
test_df['comment'] = test_df['comment'].map(remove_repeat)
test_df['title'] = test_df['title'].map(remove_doubledot)
test_df['comment'] = test_df['comment'].map(remove_doubledot)
test_df['title'] = test_df['title'].map(clean_punc)
test_df['title'] = test_df['title'].map(remove_bracket)
test_df['title'] = test_df['title'].map(remove_space)
test_df['comment'] = test_df['comment'].map(remove_space)

test_dataset = MyDataset(test_df[['title', 'comment', 'label']].values, TOKENIZER, CFG['max_seq_len'])
test_loader = DataLoader(test_dataset, batch_size=CFG['batch_size'])

In [None]:
preds = None
for weight in weights:
    model = MyModel().to(device)
    model.load_state_dict(torch.load(weight)['model'])

    # test_df = pd.read_csv(CFG['root_dir'] + '/test/test.csv')
    # test_df['label'] = 0
    # test_dataset = MyDataset(test_df[['title', 'comment', 'label']].values, tokenizer = TOKENIZER, max_len= CFG['max_seq_len'])
    # test_loader = DataLoader(test_dataset, batch_size=CFG['batch_size'])

    if preds is None:
        preds = test(model, test_loader)
    else:
        preds += test(model, test_loader)

outputs = preds.argmax(axis=-1)


In [None]:
tmp = pd.read_csv('/content/drive/Othercomputers/내 컴퓨터/workspace/nlp_classification_/tmp.csv')
tmp

In [None]:
tmp['hate'] = outputs
tmp['hate'] = tmp['hate'].map({0: 'none', 1: 'hate'})
tmp['bias'] = tmp['bias'].map({0: 'none', 1: 'others', 2: 'gender'})
tmp.to_csv('/content/drive/Othercomputers/내 컴퓨터/workspace/nlp_classification_/v02.csv', index=False)

In [None]:
# df = pd.read_csv('./train.csv')
# df['comment_title'] = df['comment'] + ' ' + df['title']
# loss_fn = nn.CrossEntropyLoss()