In [1]:
import warnings
warnings.filterwarnings("ignore", message="The PyTorch API of nested tensors is in prototype stage and will change in the near future.")
# warnings.filterwarnings("ignore", message="The epoch parameter in `scheduler.step()` was not necessary and is being deprecated where possible.")
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from CustomDataCollatorForSequenceClassification import CustomDataCollatorForSequenceClassification
from torch.optim import AdamW
from datasets import Dataset
import random
import pandas as pd
import numpy as np
from WordPieceTokenizer import WordPieceTokenizer as Tokenizer
from sklearn.model_selection import train_test_split
from tqdm.auto import tqdm
from CustomBertSequenceClassification import CustomBertSequenceClassification
from CustomBert import CustomBertConfig
import CustomBert
from collections import Counter
import os
from Model import LSTM
from Model import Transformer, PositionalEncoding
from sklearn.metrics import f1_score
from torch.optim.lr_scheduler import _LRScheduler,ReduceLROnPlateau
from torch.utils.tensorboard import SummaryWriter
from TrainDataset import prepare_classification_dataset, tensor_dataset
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

dataFilePath = 'datasets/'
saveFilePath = 'saves/'
vocab_file_path = f'{saveFilePath}vocab.txt'
device = 'cuda' if torch.cuda.is_available() else 'cpu'
tokenizer = Tokenizer(vocab_file_path,do_lower_case=False,strip_accents=False,clean_text=True)
VOCAB_SIZE = tokenizer.get_vocab_size()
MAX_SEQUENCE_LENGTH = 128
BATCH_SIZE = 32

In [2]:
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

# SEED = 32
# set_seed(SEED)

In [3]:
df = pd.read_csv(f'datasets/sentiment_train.csv', index_col=0)

# '감정' 레이블을 숫자로 매핑
df.loc[(df['감정'] == '불안'), '감정'] = 0
df.loc[(df['감정'] == '당황'), '감정'] = 1
df.loc[(df['감정'] == '분노'), '감정'] = 2
df.loc[(df['감정'] == '슬픔'), '감정'] = 3
df.loc[(df['감정'] == '중립'), '감정'] = 4
df.loc[(df['감정'] == '행복'), '감정'] = 5
df.loc[(df['감정'] == '혐오'), '감정'] = 6

print(f"원본 df 크기: {len(df)}")
print(f"원본 df 감정 분포 (매핑 후): {Counter(df['감정'])}")

train_df, val_df = train_test_split(df, train_size=0.8, test_size=0.2, stratify=df['감정'], random_state=42) # 재현성을 위해 random_state 추가

print(f"학습 데이터프레임 크기: {len(train_df)}")
print(f"검증 데이터프레임 크기: {len(val_df)}")
print(f"학습 데이터프레임 감정 분포: {Counter(train_df['감정'])}")
print(f"검증 데이터프레임 감정 분포: {Counter(val_df['감정'])}")


print("학습 데이터 파싱 중...")
train_datasets_dict = prepare_classification_dataset(train_df)
print("검증 데이터 파싱 중...")
val_datasets_dict = prepare_classification_dataset(val_df)

train_datasets = tensor_dataset(train_datasets_dict)
val_datasets = tensor_dataset(val_datasets_dict)

print(f"학습 데이터셋 크기: {len(train_datasets)}")
print(f"검증 데이터셋 크기: {len(val_datasets)}")

train_loader = DataLoader(
    train_datasets,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=0
)
print(f"학습 DataLoader 배치 수: {len(train_loader)}")

val_loader = DataLoader(
    val_datasets,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0
)
print(f"검증 DataLoader 배치 수: {len(val_loader)}")

원본 df 크기: 138664
원본 df 감정 분포 (매핑 후): Counter({4: 48501, 3: 24748, 2: 18171, 0: 14651, 5: 13727, 1: 13224, 6: 5642})
학습 데이터프레임 크기: 110931
검증 데이터프레임 크기: 27733
학습 데이터프레임 감정 분포: Counter({4: 38801, 3: 19798, 2: 14537, 0: 11721, 5: 10981, 1: 10579, 6: 4514})
검증 데이터프레임 감정 분포: Counter({4: 9700, 3: 4950, 2: 3634, 0: 2930, 5: 2746, 1: 2645, 6: 1128})
학습 데이터 파싱 중...


데이터 파싱 중:   0%|          | 0/110931 [00:00<?, ?it/s]

검증 데이터 파싱 중...


데이터 파싱 중:   0%|          | 0/27733 [00:00<?, ?it/s]

학습 데이터셋 크기: 110931
검증 데이터셋 크기: 27733
학습 DataLoader 배치 수: 3467
검증 DataLoader 배치 수: 867


## LSTM

In [4]:
# def process_dataframe(data_frame, device,batch_size,shuffle=False):
#     tensor_x_list = []
#     attentions = []
#     token_type_ids_ = []
#     for i in tqdm(range(len(data_frame))):
#         token = data_frame.iloc[i,0]
#         token = token.split(" ")
#         token_list = []
#         for t in token:
#             token_list.append(int(t))
#         tensor_x_list.append(token_list)
        
#         attention = data_frame.iloc[i,3]
#         attention = attention.split(" ")
#         attention_list = []
#         for a in attention:
#             attention_list.append(int(a))
#         attentions.append(attention_list)

#         token_type_ids = data_frame.iloc[i,4]
#         token_type_ids = token_type_ids.split(" ")
#         token_type_ids_list = []
#         for t in token_type_ids:
#             token_type_ids_list.append(int(t))
#         token_type_ids_.append(attention_list)
        
#     tensor_x = torch.tensor(tensor_x_list, dtype=torch.long, device=device)
#     tensor_attention = torch.tensor(attentions, dtype=torch.long, device=device)
#     tensor_token_type_ids = torch.tensor(token_type_ids_, dtype=torch.long, device=device)
#     tensor_t = torch.tensor(data_frame["감정"].values.tolist(), dtype=torch.long, device=device)

#     dataset = TensorDataset(tensor_x,tensor_attention,tensor_t,tensor_token_type_ids)
#     loader = DataLoader(dataset,batch_size=batch_size,shuffle=shuffle,drop_last=True)
#     return loader
    
#     dataset = {"input_ids" : tensor_x, "attention_mask":tensor_attention,"token_type_ids":tensor_token_type_ids,"labels":tensor_t}
    
    

#     data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [5]:
def LSTM_Train(epoch,device,train_loader,val_loader,NN,loss_function,optimizer):
    acc = 0
    prev_acc = 0
    cnt = 0
    for e in range(epoch):
        NN.to(device)
        loss_sum = 0
        NN.train()
        for x, attention,t in train_loader:
            y = NN(x,attention)
            loss = loss_function(y,t)
            loss_sum += loss.item()
    
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        loss_sum /= len(train_loader)
    
        NN.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for x, attention, t in val_loader:
                x = x.to(device)
                attention = attention.to(device)
                t = t.to(device)
    
                y = NN(x, attention)
                correct += (y.argmax(dim=-1) == t).sum().item()
                total += len(x)
        acc = correct / total
    
        if acc <= prev_acc:
            cnt += 1
        else :
            torch.save(NN.state_dict(), "Sentiment.pt")
            cnt = 0
            prev_acc = acc
        
        print(f"epoch  {e+1}\t\tloss {loss_sum:.12f}\tacc {acc:.4f}\tcnt {cnt}")
        
        if cnt >= 5:
            print("train halted")
            break
            
    print("---------- 학습 종료 ----------")

In [6]:
# NN = LSTM(vocab_size=vocab_size,embedding_dim=embedding_dim,hidden_dim=64,output_dim=7,n_layers=4,bidirectional=True,dropout_p=0.1)
# NN.to(device)
# loss_function = nn.CrossEntropyLoss()
# optimizer = torch.optim.Adam(NN.parameters(),lr=0.001)
# epoch = 500
# LSTM_Train(epoch,device,train_loader,val_loader,NN,loss_function,optimizer)

## Transformer

In [7]:
class GradualWarmupScheduler(_LRScheduler):
    def __init__(self, optimizer, multiplier, total_epoch, after_scheduler=None):
        self.multiplier = multiplier
        self.total_epoch = total_epoch
        self.after_scheduler = after_scheduler
        self.finished = False
        super().__init__(optimizer)

    def get_lr(self):
        if self.last_epoch > self.total_epoch:
            if self.after_scheduler:
                if not self.finished:
                    self.after_scheduler.base_lrs = self.base_lrs
                    self.finished = True
                return self.after_scheduler.get_last_lr()
            return [base_lr * self.multiplier for base_lr in self.base_lrs]

        return [base_lr * ((self.multiplier - 1.) * self.last_epoch / self.total_epoch + 1.) for base_lr in self.base_lrs]

    def step(self, metrics=None):
        if self.finished and self.after_scheduler:
            if isinstance(self.after_scheduler, ReduceLROnPlateau) and metrics is not None:
                return self.after_scheduler.step(metrics)
            else:
                return self.after_scheduler.step() # metrics가 없으면 파라미터 없이 호출 (일반 스케줄러)
        else:
            return super(GradualWarmupScheduler, self).step() # Warmup 기간에는 파라미터 없이 호출

def Transformer_Train(epoch, device, train_loader, val_loader, NN, loss_function, optimizer, scheduler,
                      warmup_epochs=5, log_dir="runs/sentiment_experiment", 
                      save_path="saves/models/Sentiment.pt", multiplier=1.0, patience=5):
    
    writer = SummaryWriter(log_dir)

    combined_scheduler = GradualWarmupScheduler(optimizer, multiplier=multiplier, total_epoch=warmup_epochs, after_scheduler=scheduler)
    
    best_f1_weighted = 0.0
    epochs_no_improve = 0
    
    for e in range(epoch):
        NN.to(device)
        
        train_loss_sum = 0
        NN.train()
        for x, attention, t in tqdm(train_loader, desc=f"Epoch {e+1} Training"):
            x = x.to(device)
            attention = attention.to(device)
            t = t.to(device)

            y = NN(x, attention)
            loss = loss_function(y, t)
            train_loss_sum += loss.item()
            
            optimizer.zero_grad()
            loss.backward()
            nn.utils.clip_grad_norm_(NN.parameters(), 1.0)
            optimizer.step()
        train_loss_sum /= len(train_loader)
        
        NN.eval()
        val_correct = 0
        val_total = 0
        val_all_preds = []
        val_all_targets = []
        with torch.no_grad():
            for x, attention, t in tqdm(val_loader, desc=f"Epoch {e+1} Validation", leave=False):
                x = x.to(device)
                attention = attention.to(device)
                t = t.to(device)
                
                y = NN(x, attention)
                
                preds = y.argmax(dim=-1)
                val_correct += (preds == t).sum().item()
                val_total += len(x)

                val_all_preds.extend(preds.cpu().numpy())
                val_all_targets.extend(t.cpu().numpy())
        
        val_acc = val_correct / val_total
        
        if len(np.unique(val_all_targets)) > 1:
            val_f1_weighted = f1_score(val_all_targets, val_all_preds, average='weighted', zero_division=0)
            val_f1_macro = f1_score(val_all_targets, val_all_preds, average='macro', zero_division=0)
        else:
            val_f1_weighted = 1.0 if (len(val_all_targets) > 0 and np.all(np.array(val_all_preds) == np.array(val_all_targets))) else 0.0
            val_f1_macro = val_f1_weighted

        if val_f1_weighted > best_f1_weighted:
            best_f1_weighted = val_f1_weighted
            torch.save(NN.state_dict(), save_path)
            print(f"모델 저장 완료 (Best Weighted F1: {best_f1_weighted:.4f}).")
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        # 스케줄러 스텝: val_f1_weighted 값을 'metrics' 파라미터로 전달
        combined_scheduler.step(val_f1_weighted)
        
        # TensorBoard에 로깅
        writer.add_scalar('Loss/train', train_loss_sum, e)
        writer.add_scalar('Metrics/val_accuracy', val_acc, e)
        writer.add_scalar('Metrics/val_f1_weighted', val_f1_weighted, e)
        writer.add_scalar('Metrics/val_f1_macro', val_f1_macro, e)
        writer.add_scalar('LearningRate', optimizer.param_groups[0]['lr'], e)
        
        print(f"Epoch {e+1}\tTrain Loss: {train_loss_sum:.6f}\tVal Acc: {val_acc:.4f}\t\tVal F1 (Weighted): {val_f1_weighted:.4f}\tVal F1 (Macro): {val_f1_macro:.4f}\tNo Improve Epochs: {epochs_no_improve}")
        
        if epochs_no_improve >= patience:
            print("조기 종료: 검증 F1 점수 개선 없음.")
            break
            
    writer.close()
    print("---------- 학습 종료 ----------")

In [8]:
train_number = 21
save_path = f"saves/models/Sentiment_v{train_number}.pt"
NN = Transformer(vocab_size=VOCAB_SIZE,embedding_dim=128,hidden_dim=32,output_dim=7,n_layers=2,
                 n_heads=16,dropout_p=0.3,max_len=128,pad_token_id=0)
NN.load_state_dict(torch.load("saves/models/Sentiment_v19.pt"))
all_train_labels_original = train_df['감정'].values.astype(int)

num_classes = NN.output_dim

label_counts_original = np.bincount(all_train_labels_original, minlength=num_classes)
class_counts_tensor = torch.tensor(label_counts_original, dtype=torch.float)

class_counts_tensor = torch.where(class_counts_tensor == 0, torch.tensor(1.0), class_counts_tensor)

print(f"학습 데이터 클래스별 샘플 수: {class_counts_tensor.tolist()}")
print(f"학습 데이터 클래스 분포: {Counter(all_train_labels_original)}")

class_weights = (class_counts_tensor.sum() / class_counts_tensor)

class_weights = class_weights.to(device)

print(f"계산된 클래스 가중치: {class_weights.tolist()}")

loss_function = nn.CrossEntropyLoss(weight=class_weights)
# loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(NN.parameters(), lr=3e-5)
scheduler_plateau = ReduceLROnPlateau(optimizer, mode="max", factor=0.8, patience=5,min_lr=1e-7)
epoch = 10000

data, attention_mask, labels = next(iter(train_loader))
print(f"첫 배치 레이블 분포: {Counter(labels.cpu().numpy())}")

Transformer_Train(epoch, device, train_loader, val_loader, NN, loss_function, optimizer, scheduler_plateau,
                  warmup_epochs=3,
                  log_dir=f"runs/sentiment_experiment_v{train_number}",
                  save_path=save_path,
                  multiplier=1.0,
                  patience=20)

학습 데이터 클래스별 샘플 수: [11721.0, 10579.0, 14537.0, 19798.0, 38801.0, 10981.0, 4514.0]
학습 데이터 클래스 분포: Counter({np.int64(4): 38801, np.int64(3): 19798, np.int64(2): 14537, np.int64(0): 11721, np.int64(5): 10981, np.int64(1): 10579, np.int64(6): 4514})
계산된 클래스 가중치: [9.46429443359375, 10.485962867736816, 7.630941867828369, 5.603141784667969, 2.8589727878570557, 10.10208511352539, 24.574878692626953]
첫 배치 레이블 분포: Counter({np.int64(4): 8, np.int64(3): 8, np.int64(5): 6, np.int64(1): 4, np.int64(0): 2, np.int64(6): 2, np.int64(2): 2})


Epoch 1 Training:   0%|          | 0/3467 [00:00<?, ?it/s]

Epoch 1 Validation:   0%|          | 0/867 [00:00<?, ?it/s]

모델 저장 완료 (Best Weighted F1: 0.6511).
Epoch 1	Train Loss: 1.059796	Val Acc: 0.6437		Val F1 (Weighted): 0.6511	Val F1 (Macro): 0.5963	No Improve Epochs: 0


Epoch 2 Training:   0%|          | 0/3467 [00:00<?, ?it/s]

Epoch 2 Validation:   0%|          | 0/867 [00:00<?, ?it/s]

모델 저장 완료 (Best Weighted F1: 0.6521).
Epoch 2	Train Loss: 1.053838	Val Acc: 0.6459		Val F1 (Weighted): 0.6521	Val F1 (Macro): 0.6001	No Improve Epochs: 0


Epoch 3 Training:   0%|          | 0/3467 [00:00<?, ?it/s]

Epoch 3 Validation:   0%|          | 0/867 [00:00<?, ?it/s]

모델 저장 완료 (Best Weighted F1: 0.6528).
Epoch 3	Train Loss: 1.053319	Val Acc: 0.6455		Val F1 (Weighted): 0.6528	Val F1 (Macro): 0.5991	No Improve Epochs: 0


Epoch 4 Training:   0%|          | 0/3467 [00:00<?, ?it/s]

Epoch 4 Validation:   0%|          | 0/867 [00:00<?, ?it/s]

모델 저장 완료 (Best Weighted F1: 0.6532).
Epoch 4	Train Loss: 1.050151	Val Acc: 0.6435		Val F1 (Weighted): 0.6532	Val F1 (Macro): 0.5992	No Improve Epochs: 0


Epoch 5 Training:   0%|          | 0/3467 [00:00<?, ?it/s]

Epoch 5 Validation:   0%|          | 0/867 [00:00<?, ?it/s]

Epoch 5	Train Loss: 1.050916	Val Acc: 0.6409		Val F1 (Weighted): 0.6496	Val F1 (Macro): 0.5972	No Improve Epochs: 1


Epoch 6 Training:   0%|          | 0/3467 [00:00<?, ?it/s]

Epoch 6 Validation:   0%|          | 0/867 [00:00<?, ?it/s]

Epoch 6	Train Loss: 1.048516	Val Acc: 0.6422		Val F1 (Weighted): 0.6489	Val F1 (Macro): 0.5960	No Improve Epochs: 2


Epoch 7 Training:   0%|          | 0/3467 [00:00<?, ?it/s]

Epoch 7 Validation:   0%|          | 0/867 [00:00<?, ?it/s]

Epoch 7	Train Loss: 1.044536	Val Acc: 0.6437		Val F1 (Weighted): 0.6512	Val F1 (Macro): 0.5987	No Improve Epochs: 3


Epoch 8 Training:   0%|          | 0/3467 [00:00<?, ?it/s]

Epoch 8 Validation:   0%|          | 0/867 [00:00<?, ?it/s]

모델 저장 완료 (Best Weighted F1: 0.6552).
Epoch 8	Train Loss: 1.047208	Val Acc: 0.6479		Val F1 (Weighted): 0.6552	Val F1 (Macro): 0.6016	No Improve Epochs: 0


Epoch 9 Training:   0%|          | 0/3467 [00:00<?, ?it/s]

Epoch 9 Validation:   0%|          | 0/867 [00:00<?, ?it/s]

Epoch 9	Train Loss: 1.039500	Val Acc: 0.6393		Val F1 (Weighted): 0.6476	Val F1 (Macro): 0.5967	No Improve Epochs: 1


Epoch 10 Training:   0%|          | 0/3467 [00:00<?, ?it/s]

Epoch 10 Validation:   0%|          | 0/867 [00:00<?, ?it/s]

Epoch 10	Train Loss: 1.039717	Val Acc: 0.6400		Val F1 (Weighted): 0.6502	Val F1 (Macro): 0.5957	No Improve Epochs: 2


Epoch 11 Training:   0%|          | 0/3467 [00:00<?, ?it/s]

Epoch 11 Validation:   0%|          | 0/867 [00:00<?, ?it/s]

Epoch 11	Train Loss: 1.038846	Val Acc: 0.6452		Val F1 (Weighted): 0.6539	Val F1 (Macro): 0.5987	No Improve Epochs: 3


Epoch 12 Training:   0%|          | 0/3467 [00:00<?, ?it/s]

Epoch 12 Validation:   0%|          | 0/867 [00:00<?, ?it/s]

Epoch 12	Train Loss: 1.036234	Val Acc: 0.6396		Val F1 (Weighted): 0.6444	Val F1 (Macro): 0.5932	No Improve Epochs: 4


Epoch 13 Training:   0%|          | 0/3467 [00:00<?, ?it/s]

Epoch 13 Validation:   0%|          | 0/867 [00:00<?, ?it/s]

Epoch 13	Train Loss: 1.037147	Val Acc: 0.6372		Val F1 (Weighted): 0.6460	Val F1 (Macro): 0.5928	No Improve Epochs: 5


Epoch 14 Training:   0%|          | 0/3467 [00:00<?, ?it/s]

Epoch 14 Validation:   0%|          | 0/867 [00:00<?, ?it/s]

Epoch 14	Train Loss: 1.032497	Val Acc: 0.6335		Val F1 (Weighted): 0.6435	Val F1 (Macro): 0.5929	No Improve Epochs: 6


Epoch 15 Training:   0%|          | 0/3467 [00:00<?, ?it/s]

Epoch 15 Validation:   0%|          | 0/867 [00:00<?, ?it/s]

Epoch 15	Train Loss: 1.029418	Val Acc: 0.6399		Val F1 (Weighted): 0.6470	Val F1 (Macro): 0.5943	No Improve Epochs: 7


Epoch 16 Training:   0%|          | 0/3467 [00:00<?, ?it/s]

Epoch 16 Validation:   0%|          | 0/867 [00:00<?, ?it/s]

Epoch 16	Train Loss: 1.028059	Val Acc: 0.6474		Val F1 (Weighted): 0.6536	Val F1 (Macro): 0.6002	No Improve Epochs: 8


Epoch 17 Training:   0%|          | 0/3467 [00:00<?, ?it/s]

Epoch 17 Validation:   0%|          | 0/867 [00:00<?, ?it/s]

Epoch 17	Train Loss: 1.028669	Val Acc: 0.6424		Val F1 (Weighted): 0.6508	Val F1 (Macro): 0.5980	No Improve Epochs: 9


Epoch 18 Training:   0%|          | 0/3467 [00:00<?, ?it/s]

Epoch 18 Validation:   0%|          | 0/867 [00:00<?, ?it/s]

Epoch 18	Train Loss: 1.026169	Val Acc: 0.6342		Val F1 (Weighted): 0.6422	Val F1 (Macro): 0.5899	No Improve Epochs: 10


Epoch 19 Training:   0%|          | 0/3467 [00:00<?, ?it/s]

Epoch 19 Validation:   0%|          | 0/867 [00:00<?, ?it/s]

Epoch 19	Train Loss: 1.027224	Val Acc: 0.6452		Val F1 (Weighted): 0.6517	Val F1 (Macro): 0.5982	No Improve Epochs: 11


Epoch 20 Training:   0%|          | 0/3467 [00:00<?, ?it/s]

Epoch 20 Validation:   0%|          | 0/867 [00:00<?, ?it/s]

Epoch 20	Train Loss: 1.021726	Val Acc: 0.6410		Val F1 (Weighted): 0.6491	Val F1 (Macro): 0.5969	No Improve Epochs: 12


Epoch 21 Training:   0%|          | 0/3467 [00:00<?, ?it/s]

Epoch 21 Validation:   0%|          | 0/867 [00:00<?, ?it/s]

Epoch 21	Train Loss: 1.021688	Val Acc: 0.6368		Val F1 (Weighted): 0.6458	Val F1 (Macro): 0.5939	No Improve Epochs: 13


Epoch 22 Training:   0%|          | 0/3467 [00:00<?, ?it/s]

Epoch 22 Validation:   0%|          | 0/867 [00:00<?, ?it/s]

Epoch 22	Train Loss: 1.023543	Val Acc: 0.6421		Val F1 (Weighted): 0.6501	Val F1 (Macro): 0.5966	No Improve Epochs: 14


Epoch 23 Training:   0%|          | 0/3467 [00:00<?, ?it/s]

Epoch 23 Validation:   0%|          | 0/867 [00:00<?, ?it/s]

Epoch 23	Train Loss: 1.019945	Val Acc: 0.6370		Val F1 (Weighted): 0.6451	Val F1 (Macro): 0.5938	No Improve Epochs: 15


Epoch 24 Training:   0%|          | 0/3467 [00:00<?, ?it/s]

Epoch 24 Validation:   0%|          | 0/867 [00:00<?, ?it/s]

Epoch 24	Train Loss: 1.019881	Val Acc: 0.6446		Val F1 (Weighted): 0.6505	Val F1 (Macro): 0.5975	No Improve Epochs: 16


Epoch 25 Training:   0%|          | 0/3467 [00:00<?, ?it/s]

Epoch 25 Validation:   0%|          | 0/867 [00:00<?, ?it/s]

Epoch 25	Train Loss: 1.017958	Val Acc: 0.6461		Val F1 (Weighted): 0.6523	Val F1 (Macro): 0.5999	No Improve Epochs: 17


Epoch 26 Training:   0%|          | 0/3467 [00:00<?, ?it/s]

Epoch 26 Validation:   0%|          | 0/867 [00:00<?, ?it/s]

Epoch 26	Train Loss: 1.013973	Val Acc: 0.6420		Val F1 (Weighted): 0.6496	Val F1 (Macro): 0.5972	No Improve Epochs: 18


Epoch 27 Training:   0%|          | 0/3467 [00:00<?, ?it/s]

Epoch 27 Validation:   0%|          | 0/867 [00:00<?, ?it/s]

Epoch 27	Train Loss: 1.013700	Val Acc: 0.6410		Val F1 (Weighted): 0.6485	Val F1 (Macro): 0.5972	No Improve Epochs: 19


Epoch 28 Training:   0%|          | 0/3467 [00:00<?, ?it/s]

Epoch 28 Validation:   0%|          | 0/867 [00:00<?, ?it/s]

Epoch 28	Train Loss: 1.013432	Val Acc: 0.6408		Val F1 (Weighted): 0.6485	Val F1 (Macro): 0.5966	No Improve Epochs: 20
조기 종료: 검증 F1 점수 개선 없음.
---------- 학습 종료 ----------


# Transfer Model

PRETRAIN_MODEL_SAVE_PATH = "saves/Pretrain.pt"
MODEL_SAVE_PATH = "Sentiment.pt"
HIDDEN_SIZE = 768
NUM_HIDDEN_LAYERS = 12
NUM_ATTENTION_HEADS = 12
INTERMEDIATE_SIZE = 3072
TYPE_VOCAB_SIZE = 2
DROPOUT_PROB = 0.1

config = CustomBertConfig(
    VOCAB_SIZE=VOCAB_SIZE,
    HIDDEN_SIZE=HIDDEN_SIZE,
    NUM_HIDDEN_LAYERS=NUM_HIDDEN_LAYERS,
    NUM_ATTENTION_HEADS=NUM_ATTENTION_HEADS,
    INTERMEDIATE_SIZE=INTERMEDIATE_SIZE,
    MAX_SEQUENCE_LENGTH=MAX_SEQUENCE_LENGTH,
    TYPE_VOCAB_SIZE=TYPE_VOCAB_SIZE,
    DROPOUT_PROB=DROPOUT_PROB
)

model = CustomBertSequenceClassification(config,PRETRAIN_MODEL_SAVE_PATH,7)

if os.path.exists(MODEL_SAVE_PATH):
    print("모델 가중치 로드 중...")
    # 먼저 CPU에 로드한 후 모델에 로드합니다.
    loaded_state_dict = torch.load(MODEL_SAVE_PATH, map_location='cpu')
    model.load_state_dict(loaded_state_dict)
    print("모델 가중치 로드 완료.")
else:
    print("새로운 모델 초기화 완료. 저장된 가중치를 찾을 수 없습니다.")

model.to(device)

num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'Custom Bert 모델 초기화 완료. 총 학습 가능 파라미터 수 : {num_params}')
print(f'모델이 담긴 장치 : {device}')

EPOCHS = 3
LEARNING_RATE = 5e-8
WEIGHT_DECAY = 0.1
optimizer = AdamW(model.parameters(),lr=LEARNING_RATE,weight_decay=WEIGHT_DECAY)

train_losses = []
acc = 0
prev_acc = 0
cnt = 0

print(f"\n<--- 학습 시작 ---> ({EPOCHS} 에폭)")

for e in range(EPOCHS):
    loss_sum = 0
    progress_bar = tqdm(train_loader,desc=f"Train Epoch {e+1}")
    model.train()
    for step, batch in enumerate(progress_bar):
        batch = {k: v.to(device) for k, v in batch.items()}
        
        outputs = model.forward(
            input_ids=batch["input_ids"],
            attention_mask=batch["attention_mask"],
            token_type_ids=batch["token_type_ids"],
            labels=batch["labels"]
        )
        loss = outputs["loss"]

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        loss_sum += loss.item()
        progress_bar.set_postfix({'loss':f"{(loss_sum/(step+1)):.4f}"})
        del outputs, loss
        if 'ccuda' in str(device):
            torch.cuda.empty_cache()

    avg_train_loss = loss_sum / len(train_loader)
    train_losses.append(avg_train_loss)

    print(f"Train Epoch {e+1} 완료. 평균 학습 손실 : {avg_train_loss:.4f}")

    model.eval()
    correct = 0
    total = 0
    val_progress = tqdm(val_loader, desc=f"Validation Epoch {e+1}")
    with torch.no_grad():
        for step, batch in enumerate(val_progress):
            batch = {k: v.to(device) for k, v in batch.items()}
            
            y = model.forward(input_ids=batch["input_ids"],
                attention_mask=batch["attention_mask"],
                token_type_ids=batch["token_type_ids"]
            )
            y_logits = y["logits"]
            t = batch["labels"]
            correct += (y_logits.argmax(dim=-1) == t).sum().item()
            total += len(batch["input_ids"])
            val_progress.set_postfix({"acc" : f"{((correct/total)*100):.2f}%"})
            
    acc = correct / total
    
    print(f"Validation Epoch {e+1} 완료. 검증 정확도 : {(acc*100):.2f}%")
    
    if acc <= prev_acc:
        cnt += 1
    else :
        torch.save(model.state_dict(), MODEL_SAVE_PATH)
        cnt = 0
        prev_acc = acc

    if cnt >= 5:
        print("train halted")
        break
       
print("\n<--- 학습 완료 --->")