In [1]:
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from CustomDataCollatorForSequenceClassification import CustomDataCollatorForSequenceClassification
from torch.optim import AdamW
from datasets import Dataset
import pandas as pd
import numpy as np
from WordPieceTokenizer import WordPieceTokenizer as Tokenizer
from sklearn.model_selection import train_test_split
from tqdm.auto import tqdm
from CustomBertSequenceClassification import CustomBertSequenceClassification
from CustomBert import CustomBertConfig
import CustomBert
from collections import Counter
import os
from Model import LSTM
from Model import Transformer, PositionalEncoding
from sklearn.metrics import f1_score
from torch.optim.lr_scheduler import _LRScheduler
from torch.utils.tensorboard import SummaryWriter
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

dataFilePath = 'datasets/'
saveFilePath = 'saves/'
device = 'cuda' if torch.cuda.is_available() else 'cpu'
tokenizer = Tokenizer(f'{saveFilePath}vocab.txt',do_lower_case=False,strip_accents=False,clean_text=True)
VOCAB_SIZE = tokenizer.get_vocab_size()
MAX_SEQUENCE_LENGTH = 128
BATCH_SIZE = 64

In [2]:
df = pd.read_csv(f'{dataFilePath}sentiment_train.csv',index_col=0)
df.head()

Unnamed: 0,발화,감정,str_len,attention_mask,token_type_ids
0,2 2346 2340 2003 10573 1077 2712 30987 1 3 0 0...,불안,24,1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 ...,0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
1,2 2085 163 2423 16734 2153 1 3 0 0 0 0 0 0 0 0...,불안,12,1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...,0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
2,2 2105 4407 1265 18055 1088 5069 1365 1 3 0 0 ...,불안,14,1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 ...,0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
3,2 10073 19740 2668 17073 2565 3 0 0 0 0 0 0 0 ...,불안,13,1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...,0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
4,2 2233 2870 10316 1076 1078 2580 3 0 0 0 0 0 0...,불안,11,1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...,0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...


In [3]:
df.loc[(df['감정'] == '불안'),'감정'] = 0
df.loc[(df['감정'] == '당황'),'감정'] = 1
df.loc[(df['감정'] == '분노'),'감정'] = 2
df.loc[(df['감정'] == '슬픔'),'감정'] = 3
df.loc[(df['감정'] == '중립'),'감정'] = 4
df.loc[(df['감정'] == '행복'),'감정'] = 5
df.loc[(df['감정'] == '혐오'),'감정'] = 6

In [4]:
def prepare_classification_dataset(data_frame, tokenizer):
    processed_tokens = []
    processed_attentions = []
    processed_token_type_ids = []

    for i in tqdm(range(len(data_frame)), desc="데이터 파싱 중"):
        token_str = data_frame.iloc[i, 0]
        attention_str = data_frame.iloc[i, 3]
        token_type_ids_str = data_frame.iloc[i, 4]

        processed_tokens.append([int(t) for t in token_str.split(" ")])
        processed_attentions.append([int(a) for a in attention_str.split(" ")])
        processed_token_type_ids.append([int(t) for t in token_type_ids_str.split(" ")])

    dataset_dict = {
        "input_ids": processed_tokens,
        "attention_mask": processed_attentions,
        "token_type_ids": processed_token_type_ids,
        "labels": data_frame["감정"].values.tolist()
    }
        
    return dataset_dict
def tensor_dataset(dataset_dict):
    input_ids = torch.tensor(dataset_dict["input_ids"],dtype=torch.long)
    attention_mask = torch.tensor(dataset_dict["attention_mask"],dtype=torch.long)
    labels = torch.tensor(dataset_dict["labels"],dtype=torch.long)
    tensorDataset = TensorDataset(input_ids,attention_mask,labels)
    return tensorDataset

In [5]:
train_df, val_df = train_test_split(df,train_size=0.8,test_size=0.2)

print(f"학습 세트의 크기: {len(train_df)} 행")
print(f"검증 세트의 크기: {len(val_df)} 행")

train_datasets_dict = prepare_classification_dataset(train_df,tokenizer)
train_datasets = tensor_dataset(train_datasets_dict)
print(len(train_datasets))
val_datasets_dict = prepare_classification_dataset(val_df,tokenizer)
val_datasets = tensor_dataset(val_datasets_dict)
print(len(val_datasets))

학습 세트의 크기: 110931 행
검증 세트의 크기: 27733 행


데이터 파싱 중:   0%|          | 0/110931 [00:00<?, ?it/s]

110931


데이터 파싱 중:   0%|          | 0/27733 [00:00<?, ?it/s]

27733


In [6]:
train_loader = DataLoader(
    train_datasets,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=0
)
print(len(train_loader))
val_loader = DataLoader(
    val_datasets,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0
)
print(len(val_loader))

1734
434


## LSTM

In [7]:
# def process_dataframe(data_frame, device,batch_size,shuffle=False):
#     tensor_x_list = []
#     attentions = []
#     token_type_ids_ = []
#     for i in tqdm(range(len(data_frame))):
#         token = data_frame.iloc[i,0]
#         token = token.split(" ")
#         token_list = []
#         for t in token:
#             token_list.append(int(t))
#         tensor_x_list.append(token_list)
        
#         attention = data_frame.iloc[i,3]
#         attention = attention.split(" ")
#         attention_list = []
#         for a in attention:
#             attention_list.append(int(a))
#         attentions.append(attention_list)

#         token_type_ids = data_frame.iloc[i,4]
#         token_type_ids = token_type_ids.split(" ")
#         token_type_ids_list = []
#         for t in token_type_ids:
#             token_type_ids_list.append(int(t))
#         token_type_ids_.append(attention_list)
        
#     tensor_x = torch.tensor(tensor_x_list, dtype=torch.long, device=device)
#     tensor_attention = torch.tensor(attentions, dtype=torch.long, device=device)
#     tensor_token_type_ids = torch.tensor(token_type_ids_, dtype=torch.long, device=device)
#     tensor_t = torch.tensor(data_frame["감정"].values.tolist(), dtype=torch.long, device=device)

#     dataset = TensorDataset(tensor_x,tensor_attention,tensor_t,tensor_token_type_ids)
#     loader = DataLoader(dataset,batch_size=batch_size,shuffle=shuffle,drop_last=True)
#     return loader
    
#     dataset = {"input_ids" : tensor_x, "attention_mask":tensor_attention,"token_type_ids":tensor_token_type_ids,"labels":tensor_t}
    
    

#     data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [8]:
def LSTM_Train(epoch,device,train_loader,val_loader,NN,loss_function,optimizer):
    acc = 0
    prev_acc = 0
    cnt = 0
    for e in range(epoch):
        NN.to(device)
        loss_sum = 0
        NN.train()
        for x, attention,t in train_loader:
            y = NN(x,attention)
            loss = loss_function(y,t)
            loss_sum += loss.item()
    
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        loss_sum /= len(train_loader)
    
        NN.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for x, attention, t in val_loader:
                x = x.to(device)
                attention = attention.to(device)
                t = t.to(device)
    
                y = NN(x, attention)
                correct += (y.argmax(dim=-1) == t).sum().item()
                total += len(x)
        acc = correct / total
    
        if acc <= prev_acc:
            cnt += 1
        else :
            torch.save(NN.state_dict(), "Sentiment.pt")
            cnt = 0
            prev_acc = acc
        
        print(f"epoch  {e+1}\t\tloss {loss_sum:.12f}\tacc {acc:.4f}\tcnt {cnt}")
        
        if cnt >= 5:
            print("train halted")
            break
            
    print("---------- 학습 종료 ----------")

In [9]:
# NN = LSTM(vocab_size=vocab_size,embedding_dim=embedding_dim,hidden_dim=64,output_dim=7,n_layers=4,bidirectional=True,dropout_p=0.1)
# NN.to(device)
# loss_function = nn.CrossEntropyLoss()
# optimizer = torch.optim.Adam(NN.parameters(),lr=0.001)
# epoch = 500
# LSTM_Train(epoch,device,train_loader,val_loader,NN,loss_function,optimizer)

## Transformer

In [10]:
class GradualWarmupScheduler(_LRScheduler):
    def __init__(self, optimizer, multiplier, total_epoch, after_scheduler=None):
        self.multiplier = multiplier
        self.total_epoch = total_epoch
        self.after_scheduler = after_scheduler
        self.finished = False
        super().__init__(optimizer)

    def get_lr(self):
        if self.last_epoch > self.total_epoch:
            if self.after_scheduler:
                if not self.finished:
                    self.after_scheduler.base_lrs = [base_lr * self.multiplier for base_lr in self.base_lrs]
                    self.finished = True
                return self.after_scheduler.get_last_lr()
            return [base_lr * self.multiplier for base_lr in self.base_lrs]

        return [base_lr * ((self.multiplier - 1.) * self.last_epoch / self.total_epoch + 1.) for base_lr in self.base_lrs]

    def step(self, epoch=None):
        if self.finished and self.after_scheduler:
            return self.after_scheduler.step(epoch)
        else:
            return super(GradualWarmupScheduler, self).step(epoch)

def Transformer_Train(epoch, device, train_loader, val_loader, NN, loss_function, optimizer, scheduler,
                      warmup_epochs=5, f1_average_mode='weighted', log_dir="runs/sentiment_experiment", save_path="saves/models/Sentiment.pt",multiplier = 1.0):
    
    writer = SummaryWriter(log_dir) # TensorBoard writer 초기화

    combined_scheduler = GradualWarmupScheduler(optimizer, multiplier=multiplier, total_epoch=warmup_epochs, after_scheduler=scheduler)
    
    best_f1 = 0.0
    epochs_no_improve = 0
    
    for e in range(epoch):
        NN.to(device)
        
        train_loss_sum = 0
        NN.train()
        for x, attention, t in tqdm(train_loader, desc=f"Epoch {e+1} Training"):
            x = x.to(device)
            attention = attention.to(device)
            t = t.to(device)

            y = NN(x, attention)
            loss = loss_function(y, t)
            train_loss_sum += loss.item()
            
            optimizer.zero_grad()
            loss.backward()
            nn.utils.clip_grad_norm_(NN.parameters(), 1.0)
            optimizer.step()
        train_loss_sum /= len(train_loader)
        
        NN.eval()
        val_correct = 0
        val_total = 0
        val_all_preds = []
        val_all_targets = []
        with torch.no_grad():
            for x, attention, t in tqdm(val_loader, desc=f"Epoch {e+1} Validation", leave=False):
                x = x.to(device)
                attention = attention.to(device)
                t = t.to(device)
                
                y = NN(x, attention)
                
                preds = y.argmax(dim=-1)
                val_correct += (preds == t).sum().item()
                val_total += len(x)

                val_all_preds.extend(preds.cpu().numpy())
                val_all_targets.extend(t.cpu().numpy())
        
        val_acc = val_correct / val_total
        
        if len(np.unique(val_all_targets)) > 1:
            val_f1 = f1_score(val_all_targets, val_all_preds, average=f1_average_mode)
        else:
            val_f1 = 1.0 if (len(val_all_targets) > 0 and np.all(np.array(val_all_preds) == np.array(val_all_targets))) else 0.0

        if val_f1 > best_f1:
            best_f1 = val_f1
            torch.save(NN.state_dict(), save_path)
            print(f"모델 저장 완료 (Best F1: {best_f1:.4f}).")
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        # 스케줄러 스텝은 WarmupScheduler를 통해 호출
        combined_scheduler.step(val_f1) 

        # TensorBoard에 로깅
        writer.add_scalar('Loss/train', train_loss_sum, e)
        writer.add_scalar('Metrics/val_accuracy', val_acc, e)
        writer.add_scalar('Metrics/val_f1_score', val_f1, e)
        writer.add_scalar('LearningRate', optimizer.param_groups[0]['lr'], e) # 현재 학습률 로깅
        
        print(f"Epoch {e+1}\tTrain Loss: {train_loss_sum:.6f}\tVal Acc: {val_acc:.4f}\tVal F1 ({f1_average_mode}): {val_f1:.4f}\tNo Improve Epochs: {epochs_no_improve}")
        
        if epochs_no_improve >= 10:
            print("조기 종료: 검증 F1 점수 개선 없음.")
            break
            
    writer.close() # 학습 종료 후 TensorBoard writer 닫기
    print("---------- 학습 종료 ----------")

In [11]:
SAVE_PATH = "saves/models/Sentiment_v4.pt"
NN = Transformer(vocab_size=VOCAB_SIZE,embedding_dim=128,hidden_dim=32,output_dim=7,n_layers=2,n_heads=16,dropout_p=0.1,max_len=128,pad_token_id=0)

all_train_labels = train_datasets_dict["labels"]
num_classes = NN.output_dim

label_counts = np.bincount(all_train_labels, minlength=num_classes)
class_counts = torch.tensor(label_counts, dtype=torch.float)
class_counts = torch.where(class_counts == 0, torch.tensor(1.0), class_counts)
print(f"클래스별 샘플 수: {class_counts.tolist()}")

class_weights = class_counts.sum() / class_counts
class_weights = class_weights.to(device)
print(f"계산된 클래스 가중치: {class_weights.tolist()}")

loss_function = nn.CrossEntropyLoss(weight=class_weights)
optimizer = torch.optim.AdamW(NN.parameters(), lr=5e-5) # Warm-up과 함께 쓸 초기 LR을 여기서 설정
scheduler_plateau = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="max", factor=0.5, patience=3)
epoch = 500

# f1_average_mode를 'weighted', 'macro', 'micro' 등으로 변경 가능
# warmup_epochs를 0으로 설정하면 Warm-up 없이 바로 scheduler_plateau가 적용됨
Transformer_Train(epoch, device, train_loader, val_loader, NN, loss_function, optimizer, scheduler_plateau,
                  warmup_epochs=5, # 5 에포크 동안 Warm-up
                  f1_average_mode='weighted',
                  log_dir="runs/sentiment_experiment_v4",
                  save_path=SAVE_PATH,
                  multiplier=5.0)

클래스별 샘플 수: [11703.0, 10593.0, 14507.0, 19764.0, 38841.0, 11043.0, 4480.0]
계산된 클래스 가중치: [9.478851318359375, 10.4721040725708, 7.646722316741943, 5.612781047821045, 2.8560283184051514, 10.045368194580078, 24.761383056640625]


Epoch 1 Training:   0%|          | 0/1734 [00:00<?, ?it/s]

Epoch 1 Validation:   0%|          | 0/434 [00:00<?, ?it/s]

  output = torch._nested_tensor_from_mask(


모델 저장 완료 (Best F1: 0.3543).
Epoch 1	Train Loss: 1.797641	Val Acc: 0.3753	Val F1 (weighted): 0.3543	No Improve Epochs: 0




Epoch 2 Training:   0%|          | 0/1734 [00:00<?, ?it/s]

Epoch 2 Validation:   0%|          | 0/434 [00:00<?, ?it/s]

모델 저장 완료 (Best F1: 0.4092).
Epoch 2	Train Loss: 1.685890	Val Acc: 0.4070	Val F1 (weighted): 0.4092	No Improve Epochs: 0




Epoch 3 Training:   0%|          | 0/1734 [00:00<?, ?it/s]

Epoch 3 Validation:   0%|          | 0/434 [00:00<?, ?it/s]

모델 저장 완료 (Best F1: 0.4347).
Epoch 3	Train Loss: 1.588381	Val Acc: 0.4293	Val F1 (weighted): 0.4347	No Improve Epochs: 0




Epoch 4 Training:   0%|          | 0/1734 [00:00<?, ?it/s]

Epoch 4 Validation:   0%|          | 0/434 [00:00<?, ?it/s]

모델 저장 완료 (Best F1: 0.4483).
Epoch 4	Train Loss: 1.514411	Val Acc: 0.4392	Val F1 (weighted): 0.4483	No Improve Epochs: 0




Epoch 5 Training:   0%|          | 0/1734 [00:00<?, ?it/s]

Epoch 5 Validation:   0%|          | 0/434 [00:00<?, ?it/s]

모델 저장 완료 (Best F1: 0.4729).
Epoch 5	Train Loss: 1.467205	Val Acc: 0.4698	Val F1 (weighted): 0.4729	No Improve Epochs: 0




Epoch 6 Training:   0%|          | 0/1734 [00:00<?, ?it/s]

Epoch 6 Validation:   0%|          | 0/434 [00:00<?, ?it/s]

모델 저장 완료 (Best F1: 0.4760).
Epoch 6	Train Loss: 1.428439	Val Acc: 0.4637	Val F1 (weighted): 0.4760	No Improve Epochs: 0




Epoch 7 Training:   0%|          | 0/1734 [00:00<?, ?it/s]

Epoch 7 Validation:   0%|          | 0/434 [00:00<?, ?it/s]

모델 저장 완료 (Best F1: 0.4958).
Epoch 7	Train Loss: 1.395608	Val Acc: 0.4967	Val F1 (weighted): 0.4958	No Improve Epochs: 0




Epoch 8 Training:   0%|          | 0/1734 [00:00<?, ?it/s]

Epoch 8 Validation:   0%|          | 0/434 [00:00<?, ?it/s]

모델 저장 완료 (Best F1: 0.5075).
Epoch 8	Train Loss: 1.367409	Val Acc: 0.5038	Val F1 (weighted): 0.5075	No Improve Epochs: 0




Epoch 9 Training:   0%|          | 0/1734 [00:00<?, ?it/s]

Epoch 9 Validation:   0%|          | 0/434 [00:00<?, ?it/s]

모델 저장 완료 (Best F1: 0.5109).
Epoch 9	Train Loss: 1.338979	Val Acc: 0.5029	Val F1 (weighted): 0.5109	No Improve Epochs: 0




Epoch 10 Training:   0%|          | 0/1734 [00:00<?, ?it/s]

Epoch 10 Validation:   0%|          | 0/434 [00:00<?, ?it/s]

모델 저장 완료 (Best F1: 0.5129).
Epoch 10	Train Loss: 1.314346	Val Acc: 0.5108	Val F1 (weighted): 0.5129	No Improve Epochs: 0




Epoch 11 Training:   0%|          | 0/1734 [00:00<?, ?it/s]

Epoch 11 Validation:   0%|          | 0/434 [00:00<?, ?it/s]

모델 저장 완료 (Best F1: 0.5265).
Epoch 11	Train Loss: 1.290066	Val Acc: 0.5207	Val F1 (weighted): 0.5265	No Improve Epochs: 0




Epoch 12 Training:   0%|          | 0/1734 [00:00<?, ?it/s]

Epoch 12 Validation:   0%|          | 0/434 [00:00<?, ?it/s]

모델 저장 완료 (Best F1: 0.5294).
Epoch 12	Train Loss: 1.270868	Val Acc: 0.5257	Val F1 (weighted): 0.5294	No Improve Epochs: 0




Epoch 13 Training:   0%|          | 0/1734 [00:00<?, ?it/s]

Epoch 13 Validation:   0%|          | 0/434 [00:00<?, ?it/s]

모델 저장 완료 (Best F1: 0.5353).
Epoch 13	Train Loss: 1.249163	Val Acc: 0.5302	Val F1 (weighted): 0.5353	No Improve Epochs: 0




Epoch 14 Training:   0%|          | 0/1734 [00:00<?, ?it/s]

Epoch 14 Validation:   0%|          | 0/434 [00:00<?, ?it/s]

모델 저장 완료 (Best F1: 0.5395).
Epoch 14	Train Loss: 1.230707	Val Acc: 0.5341	Val F1 (weighted): 0.5395	No Improve Epochs: 0




Epoch 15 Training:   0%|          | 0/1734 [00:00<?, ?it/s]

Epoch 15 Validation:   0%|          | 0/434 [00:00<?, ?it/s]

모델 저장 완료 (Best F1: 0.5415).
Epoch 15	Train Loss: 1.211586	Val Acc: 0.5338	Val F1 (weighted): 0.5415	No Improve Epochs: 0




Epoch 16 Training:   0%|          | 0/1734 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
# model = torch.load("Sentiment.pt",weights_only=False)
# torch.save(model,f"{saveFilePath}train_15.pt")

# Transfer Model

PRETRAIN_MODEL_SAVE_PATH = "saves/Pretrain.pt"
MODEL_SAVE_PATH = "Sentiment.pt"
HIDDEN_SIZE = 768
NUM_HIDDEN_LAYERS = 12
NUM_ATTENTION_HEADS = 12
INTERMEDIATE_SIZE = 3072
TYPE_VOCAB_SIZE = 2
DROPOUT_PROB = 0.1

config = CustomBertConfig(
    VOCAB_SIZE=VOCAB_SIZE,
    HIDDEN_SIZE=HIDDEN_SIZE,
    NUM_HIDDEN_LAYERS=NUM_HIDDEN_LAYERS,
    NUM_ATTENTION_HEADS=NUM_ATTENTION_HEADS,
    INTERMEDIATE_SIZE=INTERMEDIATE_SIZE,
    MAX_SEQUENCE_LENGTH=MAX_SEQUENCE_LENGTH,
    TYPE_VOCAB_SIZE=TYPE_VOCAB_SIZE,
    DROPOUT_PROB=DROPOUT_PROB
)

model = CustomBertSequenceClassification(config,PRETRAIN_MODEL_SAVE_PATH,7)

if os.path.exists(MODEL_SAVE_PATH):
    print("모델 가중치 로드 중...")
    # 먼저 CPU에 로드한 후 모델에 로드합니다.
    loaded_state_dict = torch.load(MODEL_SAVE_PATH, map_location='cpu')
    model.load_state_dict(loaded_state_dict)
    print("모델 가중치 로드 완료.")
else:
    print("새로운 모델 초기화 완료. 저장된 가중치를 찾을 수 없습니다.")

model.to(device)

num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'Custom Bert 모델 초기화 완료. 총 학습 가능 파라미터 수 : {num_params}')
print(f'모델이 담긴 장치 : {device}')

EPOCHS = 3
LEARNING_RATE = 5e-8
WEIGHT_DECAY = 0.1
optimizer = AdamW(model.parameters(),lr=LEARNING_RATE,weight_decay=WEIGHT_DECAY)

train_losses = []
acc = 0
prev_acc = 0
cnt = 0

print(f"\n<--- 학습 시작 ---> ({EPOCHS} 에폭)")

for e in range(EPOCHS):
    loss_sum = 0
    progress_bar = tqdm(train_loader,desc=f"Train Epoch {e+1}")
    model.train()
    for step, batch in enumerate(progress_bar):
        batch = {k: v.to(device) for k, v in batch.items()}
        
        outputs = model.forward(
            input_ids=batch["input_ids"],
            attention_mask=batch["attention_mask"],
            token_type_ids=batch["token_type_ids"],
            labels=batch["labels"]
        )
        loss = outputs["loss"]

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        loss_sum += loss.item()
        progress_bar.set_postfix({'loss':f"{(loss_sum/(step+1)):.4f}"})
        del outputs, loss
        if 'ccuda' in str(device):
            torch.cuda.empty_cache()

    avg_train_loss = loss_sum / len(train_loader)
    train_losses.append(avg_train_loss)

    print(f"Train Epoch {e+1} 완료. 평균 학습 손실 : {avg_train_loss:.4f}")

    model.eval()
    correct = 0
    total = 0
    val_progress = tqdm(val_loader, desc=f"Validation Epoch {e+1}")
    with torch.no_grad():
        for step, batch in enumerate(val_progress):
            batch = {k: v.to(device) for k, v in batch.items()}
            
            y = model.forward(input_ids=batch["input_ids"],
                attention_mask=batch["attention_mask"],
                token_type_ids=batch["token_type_ids"]
            )
            y_logits = y["logits"]
            t = batch["labels"]
            correct += (y_logits.argmax(dim=-1) == t).sum().item()
            total += len(batch["input_ids"])
            val_progress.set_postfix({"acc" : f"{((correct/total)*100):.2f}%"})
            
    acc = correct / total
    
    print(f"Validation Epoch {e+1} 완료. 검증 정확도 : {(acc*100):.2f}%")
    
    if acc <= prev_acc:
        cnt += 1
    else :
        torch.save(model.state_dict(), MODEL_SAVE_PATH)
        cnt = 0
        prev_acc = acc

    if cnt >= 5:
        print("train halted")
        break
       
print("\n<--- 학습 완료 --->")