### 1) 모델 세팅, 데이터 세팅, 함수 세팅

In [4]:
import re
import ntpath 
from BuildModel import BuildModel
from ModelType import ModelType
import torch
import torchvision.models as M

RANGES = [
    ((1, 50),   "Happiness"),
    ((51, 100), "Surprise"),
    ((101,150), "Neutral"),
    ((151,200), "Fear"),
    ((201,250), "Disgust"),
    ((251,300), "Anger"),
    ((301,350), "Sadness"),
]
CLASSES = list(map(lambda x: x[1], RANGES))
label2idx = {name:i  for i,name in enumerate(CLASSES)}

device = "cuda" if torch.cuda.is_available() else "cpu"
buildModel = BuildModel(ModelType.CONVNEXT_LARGE, len(RANGES), M.ConvNeXt_Large_Weights.IMAGENET1K_V1)
model = buildModel.model
model.to(device)

def score_to_label(score:int):
    for (low,high), label in RANGES:
        if low <= score <= high: 
            return label

def parse_label_from_name(path:str):
    stem = ntpath.splitext(ntpath.basename(path))[0]
    m = re.match(r'^\d+-(\d+)_mel$',stem)
    return int(m.group(1))

def parse_label_idx(path: str) -> int:
    score = parse_label_from_name(path)
    name  = score_to_label(score)
    return label2idx[name]



### 2) 이미지 전처리

In [5]:
from torchvision import transforms as T

# 멜스펙 1채널 -> 3채널
train_tf = T.Compose([
    T.Grayscale(3),  
    buildModel.preprocess,                          
])
test_tf = T.Compose([
    T.Grayscale(3),
    buildModel.preprocess,
])

### 3) 학습, 검증 데이터 분류

In [17]:
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from pathlib import Path
import os

if os.name == "posix":  # Linux, macOS
    ROOT = "/home/wanted-1/PotenupWorkspace/aug-project5/jin_sup/mel_image"
elif os.name == "nt":   # Windows
    ROOT = "C:\\PythonProject\\aug-08month_project5\\jin_sup\\mel_image"

all_full_paths = sorted([str(p) for p in Path(ROOT).rglob("*.png")])
y_all = list(map(parse_label_idx, all_full_paths))

X_train, X_test, y_train, y_test = train_test_split(
    all_full_paths, y_all, test_size=0.2, stratify=y_all, random_state=42
)

### 4) 배치 및 전처리 적용

In [7]:
from PIL import Image
class MelSpecImageDataset(Dataset):
    def __init__(self, paths, transform):
        self.paths = list(paths)
        self.transform = transform
    def __len__(self): return len(self.paths)
    def __getitem__(self, i):
        p = self.paths[i]
        y = parse_label_idx(p)
        img = Image.open(p).convert("RGB")
        x = self.transform(img)
        return x, y

In [8]:
X_train_ds = MelSpecImageDataset(X_train, train_tf)
X_test_ds = MelSpecImageDataset(X_test, test_tf)

BATCH_SIZE = 32
NUM_WORKERS = 0
X_train_dl = DataLoader(
    X_train_ds,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=NUM_WORKERS,
    pin_memory=True,
)

X_test_dl = DataLoader(
    X_test_ds,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True,
)

### 5) 모델 학습 및 텐서 보드

In [9]:
import torch.optim as optim
from torch.cuda.amp import autocast, GradScaler
import torch.nn as nn
from torch import amp

criterion = nn.CrossEntropyLoss()
optimz = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)
import torch
print(torch.cuda.is_available())   # False 라고 나오면 GPU 안잡힘
print(torch.__version__)           # torch 버전 확인


True
2.5.1+cu121


In [10]:
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm
from datetime import datetime
import os

writer = SummaryWriter(log_dir="runs/convnext_experiment1")
EPOCHS = 10

step = 0
for epoch in range(1, EPOCHS + 1):
    model.train()
    
    for train, label in tqdm(X_train_dl):
        try: 
            optimz.zero_grad()
            train, label = train.to(device), label.to(device)
            
            logits = model(train)             
            loss = criterion(logits, label)   

            loss.backward()
            optimz.step()  
            #텐서 보드 기록
            writer.add_scalar("Loss/train", loss.item(), step)
            step += 1
            
        except Exception as e:
            print(f"❌ Error in training loop (epoch={epoch}): {e}")
            raise  


timestamp = datetime.now().strftime("%m-%d_%H-%M-%S")
filename = f"result_{timestamp}"
buildModel.preprocess
buildModel.weights
torch.save(model.state_dict(), f"model/model_{buildModel.weights}___{timestamp}.pth")


  0%|          | 1/259 [03:27<14:53:30, 207.79s/it]


KeyboardInterrupt: 

### 6) 모델 평가 및 저장

In [None]:
from sklearn.metrics import classification_report, accuracy_score, f1_score
from datetime import datetime
import os
import torch


y_true, y_pred = [], []


if os.name == "posix":  # Linux, macOS
    path = "/home/wanted-1/PotenupWorkspace/aug-project5/jin_sup/model/model_ConvNeXt_Small_Weights.IMAGENET1K_V1___08-22_12-57-04.pth"
elif os.name == "nt":   # Windows
    path = "C:\\PythonProject\\aug-08month_project5\\jin_sup\\model\\model_ConvNeXt_Small_Weights.IMAGENET1K_V1___08-22_12-57-04.pth"
    
checkpoint  =torch.load(path, map_location="cpu")

model = M.convnext_large(M.ConvNeXt_Large_Weights.IMAGENET1K_V1)
in_features = model.classifier[2].in_features
model.classifier[2] = nn.Linear(in_features, 7, bias=True)
model.load_state_dict(checkpoint)
model.to(device)

model.eval()
with torch.no_grad():
    for train, label in X_test_dl:
        train, label = train.to(device), label.to(device)

        logits = model(train)    
        pred = logits.argmax(dim=1)        


        y_true.extend(label.cpu().tolist())
        y_pred.extend(pred.cpu().tolist())

f1_macro = f1_score(y_true, y_pred, average="macro")   # 클래스별 f1을 평균
f1_micro = f1_score(y_true, y_pred, average="micro")   # 전체 샘플 기준 평균
f1_weighted = f1_score(y_true, y_pred, average="weighted")  # 클래스 비율 고려

print("F1(macro):", f1_macro)
print("F1(micro):", f1_micro)
print("F1(weighted):", f1_weighted)


print(classification_report(y_true, y_pred, target_names=CLASSES, digits=4))

  checkpoint  =torch.load("C:\\PythonProject\\aug-08month_project5\\jin_sup\\model\\model_ConvNeXt_Small_Weights.IMAGENET1K_V1___08-22_12-57-04.pth", map_location="cpu")


F1(macro): 0.6885698595265232
F1(micro): 0.6885562530178657
F1(weighted): 0.6885881110784972
              precision    recall  f1-score   support

   Happiness     0.6082    0.7425    0.6687       299
    Surprise     0.6657    0.7819    0.7191       298
     Neutral     0.7609    0.7635    0.7622       296
        Fear     0.6075    0.6610    0.6331       295
     Disgust     0.6653    0.5661    0.6117       295
       Anger     0.7860    0.6497    0.7114       294
     Sadness     0.7869    0.6531    0.7138       294

    accuracy                         0.6886      2071
   macro avg     0.6972    0.6882    0.6886      2071
weighted avg     0.6970    0.6886    0.6886      2071

