In [None]:
# ./train_audio.py

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from transformers import AutoFeatureExtractor, AutoModelForAudioClassification, DataCollatorWithPadding

import pandas as pd
from pathlib import Path
import mlflow
import optuna
import json
import os
from datetime import datetime
import shutil

from torch.optim.lr_scheduler import StepLR, CosineAnnealingLR, ReduceLROnPlateau

import numpy as np
import audiomentations as A

from core.data.audio_dataset import AudioDataset
from core.training.trainer import train_model
from core.data.DataCollatorForAudio import DataCollatorForAudio
from torch.cuda.amp import GradScaler, autocast

from sklearn.utils.class_weight import compute_class_weight

# 라벨에 따라 다른 증강을 적용하는 래퍼(wrapper) 클래스
class ClassAwareAugment:
    def __init__(self, minority_classes, strong_augment, weak_augment):
        self.minority_classes = minority_classes
        self.strong_augment = strong_augment
        self.weak_augment = weak_augment

    def __call__(self, samples: np.ndarray, sample_rate: int, emotion: str):
        if emotion in self.minority_classes:
            return self.strong_augment(samples=samples, sample_rate=sample_rate)
        else:
            return self.weak_augment(samples=samples, sample_rate=sample_rate)

def objective(trial: optuna.Trial):
    """Optuna가 최적화할 목표 함수"""
    with mlflow.start_run():
        # --- 하이퍼파라미터 및 모델 제안 ---
        #model_id = "team-lucid/hubert-large-korean"
        #model_name = "hubert-large"

        #model_id = "team-lucid/hubert-base-korean"
        #model_name = "hubert-base"
        #model_name = "wav2vec2"
        #model_name = trial.suggest_categorical("model_name", ["wav2vec2", "hubert-large"])
        
        '''
        # Optuna가 이 범위 내에서 최적의 값을 찾아 제안합니다.
        lr = trial.suggest_float("lr", 1e-5, 1e-3, log=True)
        optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "AdamW"])
        scheduler_name = trial.suggest_categorical("scheduler", ["StepLR", "CosineAnnealingLR", "ReduceLROnPlateau"])
        BATCH_SIZE = trial.suggest_categorical("batch_size", [4, 8, 16])
        accumulation_steps = trial.suggest_int("accumulation_steps", 1, 4)
        mlflow.log_params(trial.params)
        '''
        
        model_name = trial.suggest_categorical("model_name", ["hubert-base"])
        lr = trial.suggest_float("lr", 1e-5, 1e-3, log=True)
        optimizer_name = trial.suggest_categorical("optimizer", ["AdamW"])
        
        # '머리' 부분 학습률 대비 '몸통' 부분 학습률의 비율을 Optuna가 찾도록 함
        # 예: lr=1e-4, backbone_lr_scale=0.1 이면, 몸통의 학습률은 1e-5가 됨
        backbone_lr_scale = trial.suggest_float("backbone_lr_scale", 0.01, 0.5, log=True)
        
        mlflow.log_params(trial.params)
        
        # CUDA 성능 플래그 최적화
        torch.backends.cudnn.benchmark = True
        # TF32 텐서 코어 사용을 허용하여 Ampere 아키텍처 이상 GPU에서 연산 속도 향상
        torch.backends.cuda.matmul.allow_tf32 = True
        
        DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print(f"Using device: {DEVICE}")

        # --- 데이터 준비 ---
        sampling_percent = 50
        mlflow.log_param("sampling_percent", sampling_percent)
        mlflow.set_tag("dataset_description", f"{sampling_percent} 데이터셋으로 훈련")
        
        NUM_EPOCHS = 100
        EARLY_STOPPING_PATIENCE = 5

        # --- 데이터 준비 ---
        DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        DATA_DIR = Path(f"./datasets/audio_sampling_sets/dataset_{sampling_percent}_percent")
        
        # 소수/다수 클래스 정의 (분석 결과 기반)
        minority_classes = ['surprise', 'disgust', 'fear']
        
        # 소수 클래스에 적용할 강력한 증강 파이프라인
        strong_augment = A.Compose([
            A.AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5),
            A.TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5),
            A.PitchShift(min_semitones=-4, max_semitones=4, p=0.5),
        ])

        # 다수 클래스에 적용할 약한 증강 파이프라인 (또는 A.Compose([])로 비워둘 수 있음)
        weak_augment = A.Compose([
            A.AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.005, p=0.2),
        ])

        # 차등 증강 적용기 생성
        train_augmenter = ClassAwareAugment(
            minority_classes=minority_classes,
            strong_augment=strong_augment,
            weak_augment=weak_augment
        )

        # 훈련셋에는 차등 증강 적용, 검증셋에는 미적용
        train_dataset = AudioDataset(metadata_path=DATA_DIR / "train.csv", audio_dir=DATA_DIR / "train", transform=train_augmenter)
        val_dataset = AudioDataset(metadata_path=DATA_DIR / "val.csv", audio_dir=DATA_DIR / "val", transform=None)

        # --- 모델 및 Feature Extractor 로드 ---
        if model_name == "wav2vec2":
            model_id = "inseong00/wav2vec2-large-xlsr-korean-autumn"
        elif model_name == "hubert-large":
            model_id = "team-lucid/hubert-large-korean"
        elif model_name == "hubert-base":
            model_id = "team-lucid/hubert-base-korean"
        else:
            raise ValueError("지원하지 않는 모델 이름입니다.")
            
        feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
        
        model = AutoModelForAudioClassification.from_pretrained(
            model_id,
            num_labels=len(train_dataset.classes),
            label2id=train_dataset.class_to_idx,
            id2label=train_dataset.idx_to_class,
            ignore_mismatched_sizes=True # 사전 훈련된 모델의 분류층과 크기가 달라도 에러 없이 로드
        ).to(DEVICE)
        
        # 파라미터 그룹 분리 ---
        backbone_params = []
        classifier_params = []
        for name, param in model.named_parameters():
            if name.startswith("hubert."):
                backbone_params.append(param)
            else:
                classifier_params.append(param)
        
        # 차등 학습률을 적용하여 옵티마이저 생성 ---
        optimizer_grouped_parameters = [
            {'params': backbone_params, 'lr': lr * backbone_lr_scale},
            {'params': classifier_params, 'lr': lr}
        ]
        
        optimizer = getattr(optim, optimizer_name)(optimizer_grouped_parameters)

        # --- 클래스 가중치 계산 ---
        # 훈련 데이터셋의 라벨 분포를 기반으로 가중치 계산
        class_names = train_dataset.classes
        labels = [train_dataset.class_to_idx[emotion] for emotion in train_dataset.df['emotion']]
        
        class_weights = compute_class_weight(
            'balanced',
            classes=np.unique(labels),
            y=labels
        )
        class_weights = torch.tensor(class_weights, dtype=torch.float).to(DEVICE)
        
        print(f"클래스 가중치 적용: { {name: f'{w:.2f}' for name, w in zip(class_names, class_weights)} }")
        
        # 데이터 콜레이터 및 로더
        # 새로 만든 DataCollator 클래스를 사용
        data_collator = DataCollatorForAudio(feature_extractor=feature_extractor, padding=True)

        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=data_collator, num_workers=4, pin_memory=True)
        val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, collate_fn=data_collator, num_workers=2, pin_memory=True)

        # 손실 함수에 클래스 가중치 적용
        criterion = nn.CrossEntropyLoss(weight=class_weights)
        
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)

        checkpoint_dir = Path("./infrastructure/models/weights/checkpoints")
        checkpoint_dir.mkdir(parents=True, exist_ok=True) 

        CHECKPOINT_PATH = checkpoint_dir / f'{model_name}_{sampling_percent}_trained.pth'
        if CHECKPOINT_PATH.exists():
            print("체크포인트를 불러옵니다...")
            checkpoint = torch.load(CHECKPOINT_PATH)
            model.load_state_dict(checkpoint)
            print("체크포인트(모델 가중치) 로드 완료!")
        else:
            print("체크포인트가 존재하지 않습니다. 처음부터 훈련을 시작합니다.")
        
        trained_model, saved_metrics = train_model(model, 
                                                   train_loader, 
                                                   val_loader, 
                                                   criterion, 
                                                   optimizer, 
                                                   scheduler, 
                                                   DEVICE, 
                                                   num_epochs=NUM_EPOCHS, 
                                                   patience=EARLY_STOPPING_PATIENCE,
                                                   accumulation_steps=accumulation_steps)

        # MLflow에 결과 기록 ---
        # 최고 검증 손실과 정확도, F1 Score 등을 기록
        mlflow.log_metrics({
            "best_train_loss": float(saved_metrics['train_loss']),
            "best_train_accuracy": float(saved_metrics['train_accuracy']),
            "best_val_loss": float(saved_metrics['val_loss']),
            "best_val_accuracy": float(saved_metrics['val_accuracy']),
            "best_macro_f1": float(saved_metrics['macro_f1_score']),
        })
        
        # 훈련된 모델 저장
        torch.save(trained_model.state_dict(), CHECKPOINT_PATH)
        
        # MLflow에 모델 저장
        now_date = datetime.now().strftime("%Y%m%d_%H%M%S")
        trained_model_save = checkpoint_dir / f'{model_name}_{sampling_percent}_trained_{now_date}.pth'
        torch.save(trained_model.state_dict(), trained_model_save)
        mlflow.log_artifact(trained_model_save, artifact_path="model")
        print("훈련된 모델 가중치가 저장되었습니다.")

        # 최고 성능 시점의 상세 분석 결과를 JSON으로 저장
        METRICS_PATH = checkpoint_dir / f'{model_name}_{sampling_percent}_percent_trained_metrics_{now_date}.json'
        with open(METRICS_PATH, 'a', encoding='utf-8') as f:
            json.dump(saved_metrics, f, ensure_ascii=False, indent=4)
        print(f"상세 분석 결과가 저장되었습니다: {METRICS_PATH}")
        mlflow.log_artifact(METRICS_PATH, artifact_path="metrics")
                
        # --- 5. Optuna에 목표값 반환 ---
        # 우리는 검증 손실(val_loss)을 최소화하는 것을 목표로 함
        return float(saved_metrics['val_accuracy']) # Optuna는 최대화를 목표로 함

if __name__ == '__main__':
    #코드 실행 전 아래 명령어를 터미널에서 실행
    # mlflow ui
    # 위의 명령어만 먼저해보고 에러 MlflowException: When an mlflow-artifacts URI was supplied, the tracking URI must be a valid http or https URI 가 발생하면 아래 명령어 실행.
    # mlflow server --host 127.0.0.1 --port 5001
    
    # MLflow 추적 서버 URI 설정
    mlflow.set_tracking_uri("http://127.0.0.1:5003")
    
    # MLflow 실험 이름 설정, 대쉬보드에서 훈련을 구분하여 보기위해 사용.
    mlflow.set_experiment("Voice Emotion Classification Tuning")
    
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=5) # n번의 다른 조합으로 실험
    
    print("Best trial:")
    trial = study.best_trial
    print(f"  Value (Best Val Accuracy): {trial.value}")
    print(f"  Params: {trial.params}")

[I 2025-08-23 21:00:20,230] A new study created in memory with name: no-name-d7bb6861-3b08-4e60-80b2-6e7d51d2c13a


Using device: cuda


Some weights of HubertForSequenceClassification were not initialized from the model checkpoint at team-lucid/hubert-base-korean and are newly initialized: ['classifier.bias', 'classifier.weight', 'projector.bias', 'projector.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


체크포인트가 존재하지 않습니다. 처음부터 훈련을 시작합니다.
Epoch 1/100
----------
  [Batch 20/1870] Train Loss: 2.0737 Acc: 0.0000
  [Batch 1870/1870] Train Loss: 1.7358 Acc: 0.2857
Train Loss: 1.6944 Acc: 0.4188
Val Loss: 1.6668 Acc: 0.4324 Macro-F1: 0.0862
  -> Val Loss 개선됨! (1.6668) 모델 저장.
Epoch 2/100
----------
  [Batch 20/1870] Train Loss: 1.1591 Acc: 0.7500
  [Batch 1870/1870] Train Loss: 1.4253 Acc: 0.5714
Train Loss: 1.6458 Acc: 0.4324
Val Loss: 1.6391 Acc: 0.4324 Macro-F1: 0.0862
  -> Val Loss 개선됨! (1.6391) 모델 저장.
Epoch 3/100
----------
  [Batch 20/1870] Train Loss: 1.7404 Acc: 0.3750
  [Batch 1870/1870] Train Loss: 2.2370 Acc: 0.1429
Train Loss: 1.6412 Acc: 0.4324
Val Loss: 1.6452 Acc: 0.4324 Macro-F1: 0.0862
  -> Val Loss 개선되지 않음. EarlyStopping Counter: 1/5
Epoch 4/100
----------
  [Batch 20/1870] Train Loss: 1.6329 Acc: 0.3750
  [Batch 1870/1870] Train Loss: 1.4260 Acc: 0.5714
Train Loss: 1.6430 Acc: 0.4324
Val Loss: 1.6319 Acc: 0.4324 Macro-F1: 0.0862
  -> Val Loss 개선됨! (1.6319) 모델 저장.
Epoch 5/100

[I 2025-08-24 00:12:01,138] Trial 0 finished with value: 0.4324 and parameters: {'lr': 0.0006796187214702883, 'optimizer': 'AdamW', 'scheduler': 'CosineAnnealingLR', 'batch_size': 8, 'accumulation_steps': 3}. Best is trial 0 with value: 0.4324.


훈련된 모델 가중치가 저장되었습니다.
상세 분석 결과가 저장되었습니다: infrastructure/models/weights/checkpoints/hubert-base_50_percent_trained_metrics_20250824_001159.json
🏃 View run colorful-lamb-118 at: http://127.0.0.1:5003/#/experiments/971624972587108480/runs/35fed3acc28f495f9150e077398eaf22
🧪 View experiment at: http://127.0.0.1:5003/#/experiments/971624972587108480
Using device: cuda


Some weights of HubertForSequenceClassification were not initialized from the model checkpoint at team-lucid/hubert-base-korean and are newly initialized: ['classifier.bias', 'classifier.weight', 'projector.bias', 'projector.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


체크포인트를 불러옵니다...
체크포인트(모델 가중치) 로드 완료!
Epoch 1/100
----------
  [Batch 20/3740] Train Loss: 2.3232 Acc: 0.0000
  [Batch 3740/3740] Train Loss: 2.4303 Acc: 0.0000
Train Loss: 1.6320 Acc: 0.4324
Val Loss: 1.6309 Acc: 0.4324 Macro-F1: 0.0862
  -> Val Loss 개선됨! (1.6309) 모델 저장.
Epoch 2/100
----------
  [Batch 20/3740] Train Loss: 1.9036 Acc: 0.2500
  [Batch 3740/3740] Train Loss: 1.5828 Acc: 0.3333
Train Loss: 1.6311 Acc: 0.4323
Val Loss: 1.6307 Acc: 0.4324 Macro-F1: 0.0862
  -> Val Loss 개선됨! (1.6307) 모델 저장.
Epoch 3/100
----------
  [Batch 20/3740] Train Loss: 1.9386 Acc: 0.2500
  [Batch 3740/3740] Train Loss: 2.0365 Acc: 0.0000
Train Loss: 1.6315 Acc: 0.4319
Val Loss: 1.6307 Acc: 0.4324 Macro-F1: 0.0862
  -> Val Loss 개선됨! (1.6307) 모델 저장.
Epoch 4/100
----------
  [Batch 20/3740] Train Loss: 1.2489 Acc: 0.5000


[W 2025-08-24 00:55:59,500] Trial 1 failed with parameters: {'lr': 1.3067644672977184e-05, 'optimizer': 'AdamW', 'scheduler': 'CosineAnnealingLR', 'batch_size': 4, 'accumulation_steps': 3} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/root/feellog_02/.venv/lib/python3.9/site-packages/optuna/study/_optimize.py", line 201, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_765512/2772290466.py", line 155, in objective
    trained_model, saved_metrics = train_model(model,
  File "/root/feellog_02/core/training/trainer.py", line 102, in train_model
    scaler.scale(loss).backward()
  File "/root/feellog_02/.venv/lib/python3.9/site-packages/torch/_tensor.py", line 648, in backward
    torch.autograd.backward(
  File "/root/feellog_02/.venv/lib/python3.9/site-packages/torch/autograd/__init__.py", line 353, in backward
    _engine_run_backward(
  File "/root/feellog_02/.venv/lib/python3.9/site-packages/torch/autograd/graph

🏃 View run awesome-wren-751 at: http://127.0.0.1:5003/#/experiments/971624972587108480/runs/ac40582e492a42f986f99ad6b3dea335
🧪 View experiment at: http://127.0.0.1:5003/#/experiments/971624972587108480


KeyboardInterrupt: 

In [12]:
model_id = "team-lucid/hubert-base-korean"
model_name = "hubert-base"
sampling_percent = 5
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DATA_DIR = Path(f"./datasets/audio_sampling_sets/dataset_{sampling_percent}_percent")

feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
train_dataset = AudioDataset(metadata_path=DATA_DIR / "train.csv", audio_dir=DATA_DIR / "train")        
model = AutoModelForAudioClassification.from_pretrained(
    model_id,
    num_labels=len(train_dataset.classes),
    label2id=train_dataset.class_to_idx,
    id2label=train_dataset.idx_to_class,
    ignore_mismatched_sizes=True # 사전 훈련된 모델의 분류층과 크기가 달라도 에러 없이 로드
).to(DEVICE)      
  
checkpoint = torch.load(Path('./infrastructure/models/weights/checkpoints/hubert-base_50_trained.pth'))
model.load_state_dict(checkpoint)
print("체크포인트(모델 가중치) 로드 완료!")
print(model.state_dict().keys())
print("-"*50)
print("-"*50)
print(model.parameters())

Some weights of HubertForSequenceClassification were not initialized from the model checkpoint at team-lucid/hubert-base-korean and are newly initialized: ['classifier.bias', 'classifier.weight', 'projector.bias', 'projector.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


체크포인트(모델 가중치) 로드 완료!
odict_keys(['hubert.masked_spec_embed', 'hubert.feature_extractor.conv_layers.0.conv.weight', 'hubert.feature_extractor.conv_layers.0.layer_norm.weight', 'hubert.feature_extractor.conv_layers.0.layer_norm.bias', 'hubert.feature_extractor.conv_layers.1.conv.weight', 'hubert.feature_extractor.conv_layers.2.conv.weight', 'hubert.feature_extractor.conv_layers.3.conv.weight', 'hubert.feature_extractor.conv_layers.4.conv.weight', 'hubert.feature_extractor.conv_layers.5.conv.weight', 'hubert.feature_extractor.conv_layers.6.conv.weight', 'hubert.feature_projection.layer_norm.weight', 'hubert.feature_projection.layer_norm.bias', 'hubert.feature_projection.projection.weight', 'hubert.feature_projection.projection.bias', 'hubert.encoder.pos_conv_embed.conv.bias', 'hubert.encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'hubert.encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'hubert.encoder.layer_norm.weight', 'hubert.encoder.layer_norm.bias', 'h

In [2]:
torch.cuda.empty_cache()
print(torch.cuda.memory_summary())

|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 1            |        cudaMalloc retries: 1         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |   1756 MiB |   1756 MiB |   1756 MiB | 496128 B   |
|       from large pool |   1753 MiB |   1753 MiB |   1753 MiB |      0 B   |
|       from small pool |      2 MiB |      2 MiB |      2 MiB | 496128 B   |
|---------------------------------------------------------------------------|
| Active memory         |   1756 MiB |   1756 MiB |   1756 MiB | 496128 B   |
|       from large pool |   1753 MiB |   1753 MiB |   1753 MiB |      0 B   |
|       from small pool |      2 MiB |      2 MiB |      2 MiB | 496128 B   |
|---------------------------------------------------------------

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from transformers import AutoFeatureExtractor, AutoModel 
from sklearn.utils.class_weight import compute_class_weight
import pandas as pd
from pathlib import Path
import mlflow
import optuna
import json
import os
import shutil
import numpy as np
from datetime import datetime
import audiomentations as A

from core.data.audio_dataset import AudioDataset
from core.training.trainer import train_model
from core.data.DataCollatorForAudio import DataCollatorForAudio
from transformers import get_linear_schedule_with_warmup
import torch.nn.functional as F

# 라벨에 따라 다른 증강을 적용하는 래퍼(wrapper) 클래스
class ClassAwareAugment:
    def __init__(self, minority_classes, strong_augment, weak_augment):
        self.minority_classes = minority_classes
        self.strong_augment = strong_augment
        self.weak_augment = weak_augment

    def __call__(self, samples: np.ndarray, sample_rate: int, emotion: str):
        if emotion in self.minority_classes:
            return self.strong_augment(samples=samples, sample_rate=sample_rate)
        else:
            return self.weak_augment(samples=samples, sample_rate=sample_rate)

# 어텐션 풀링을 수행하는 헤드
class AttentionHead(nn.Module):
    def __init__(self, input_size, num_classes):
        super().__init__()
        # 각 시간 단계의 "중요도"를 학습하기 위한 레이어
        self.attention_weights = nn.Linear(input_size, 1)
        # 최종 분류를 위한 레이어
        self.classifier = nn.Linear(input_size, num_classes)

    def forward(self, features): # features shape: [batch, seq_len, hidden_size]
        # 1. 각 시간 단계별 중요도(attention score) 계산
        attention_scores = self.attention_weights(features).squeeze(-1)
        
        # 2. Softmax를 통해 확률적인 가중치로 변환
        attention_weights = F.softmax(attention_scores, dim=1)
        
        # 3. 계산된 가중치를 원래 특징에 곱하여 가중 평균 계산 (어텐션 풀링)
        #    -> 중요한 부분의 특징은 강조되고, 중요하지 않은 부분은 억제됨
        weighted_features = torch.sum(features * attention_weights.unsqueeze(-1), dim=1)
        
        # 4. 최종적으로 가중 평균된 특징을 사용하여 감정 분류
        logits = self.classifier(weighted_features)
        return logits
    
# 커스텀 분류기 헤드 정의
class CustomClassificationHead(nn.Module):
    def __init__(self, input_size, num_classes, dropout_prob=0.5):
        super().__init__()
        self.dense = nn.Linear(input_size, input_size // 2)
        self.activation = nn.ReLU()
        self.dropout = nn.Dropout(dropout_prob)
        self.out_proj = nn.Linear(input_size // 2, num_classes)

    def forward(self, features):
        x = self.dropout(features)
        x = self.dense(x)
        x = self.activation(x)
        x = self.dropout(x)
        x = self.out_proj(x)
        return x

# HuBERT 몸통과 커스텀 헤드를 결합한 최종 모델을 정의.
class EmotionFineTuningModel(nn.Module):
    def __init__(self, model_id, num_labels):
        super().__init__()
        # '몸통' 부분인 기본 HuBERT 모델을 로드
        self.base_model = AutoModel.from_pretrained(model_id)
        
        # 커스텀 헤드를 AttentionHead로 교체
        self.classifier = AttentionHead(self.base_model.config.hidden_size, num_labels)
        self.base_model_prefix = "base_model"
        '''
        # '머리' 부분인 커스텀 분류기 생성
        self.classifier = CustomClassificationHead(self.base_model.config.hidden_size, num_labels)
        # 나중에 파라미터 분리를 위해 몸통의 이름을 저장
        self.base_model_prefix = "base_model"
        '''
    def forward(self, input_values, attention_mask=None):
        outputs = self.base_model(input_values=input_values, attention_mask=attention_mask)
        # torch.mean을 사용한 평균 풀링을 제거
        # pooled_features = torch.mean(outputs.last_hidden_state, dim=1)
        
        # 시퀀스 전체를 AttentionHead에 전달
        logits = self.classifier(outputs.last_hidden_state)
        return logits  

def objective(trial: optuna.Trial):
    """Optuna가 최적화할 목표 함수 (단일 실행)"""
    now_date = datetime.now().strftime("%Y%m%d_%H%M%S")
    # MLflow는 Optuna의 Trial ID와 연동하여 각 실행을 기록
    with mlflow.start_run(run_name=f"trial_{trial.number}_{now_date}"):
        # 하이퍼파라미터 및 설정 
        DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        torch.backends.cudnn.benchmark = True
        torch.backends.cuda.matmul.allow_tf32 = True

        SAMPLING_PERCENT = 50
        
        # 1단계와 2단계의 에폭 수를 명확히 분리하여 정의
        HEAD_TUNE_EPOCHS = 15   # 1단계에서 '머리'만 훈련시킬 에폭 수
        FULL_TUNE_EPOCHS = 35   # 2단계에서 전체 모델을 훈련시킬 에폭 수
        PATIENCE = 10           # 조기 종료 '인내심'도 충분히 늘려줌
        
        # if model_name == "wav2vec2":
        # model_id = "inseong00/wav2vec2-large-xlsr-korean-autumn"
        
        MODEL_NAME = trial.suggest_categorical("model_name", ["hubert-base"])
        # 초기 학습 불안정성을 줄이기 위해 학습률 범위를 약간 낮춤
        EARLY_LR = trial.suggest_float("lr", 1e-6, 2e-5, log=True)
        #EARLY_LR = trial.suggest_float("lr", 1e-5, 5e-5, log=True)
        #LATE_LR = trial.suggest_float("lr", 5e-6, 5e-5, log=True)
        #EARLY_LR = trial.suggest_float("lr", 1e-5, 1e-4, log=True)
        BACKBONE_LR_SCALE = trial.suggest_float("backbone_lr_scale", 0.05, 0.2, log=True)
        BATCH_SIZE = trial.suggest_categorical("batch_size", [4, 8])
        ACCUMULATION_STEPS = trial.suggest_int("accumulation_steps", 1, 4)
        
        mlflow.log_params(trial.params)
        mlflow.log_param("sampling_percent", SAMPLING_PERCENT)
        
        
        # 데이터 준비
        DATA_DIR = Path(f"./datasets/audio_sampling_sets/dataset_{SAMPLING_PERCENT}_percent")
        
        # 소수/다수 클래스 정의 (분석 결과 기반)
        minority_classes = ['surprise', 'disgust', 'fear']
        
        # 소수 클래스에 적용할 강력한 증강 파이프라인
        strong_augment = A.Compose([
            A.AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5),
            A.TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5),
            A.PitchShift(min_semitones=-4, max_semitones=4, p=0.5),
        ])

        # 다수 클래스에 적용할 약한 증강 파이프라인 (또는 A.Compose([])로 비워둘 수 있음)
        weak_augment = A.Compose([
            A.AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.005, p=0.2),
        ])

        # 차등 증강 적용기 생성
        train_augmenter = ClassAwareAugment(
            minority_classes=minority_classes,
            strong_augment=strong_augment,
            weak_augment=weak_augment
        )

        # 훈련셋에는 차등 증강 적용, 검증셋에는 미적용
        train_dataset = AudioDataset(metadata_path=DATA_DIR / "train.csv", audio_dir=DATA_DIR / "train", transform=train_augmenter)
        val_dataset = AudioDataset(metadata_path=DATA_DIR / "val.csv", audio_dir=DATA_DIR / "val", transform=None)
        
        
        # 새로운 EmotionFineTuningModel을 생성
        model_id = f"team-lucid/{MODEL_NAME}-korean"
        feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
        model = EmotionFineTuningModel(model_id, num_labels=len(train_dataset.classes)).to(DEVICE)
    
        # 클래스 가중치 및 데이터로더
        # 훈련 데이터셋의 라벨 분포를 기반으로 가중치 계산
        class_names = train_dataset.classes
        labels = [train_dataset.class_to_idx[emotion] for emotion in train_dataset.df['emotion']]
        
        class_weights = compute_class_weight(
            'balanced',
            classes=np.unique(labels),
            y=labels
        )
        class_weights = torch.tensor(class_weights, dtype=torch.float).to(DEVICE)
        
        print(f"클래스 가중치 적용: { {name: f'{w:.2f}' for name, w in zip(class_names, class_weights)} }")
        
        # 데이터 콜레이터 및 로더
        data_collator = DataCollatorForAudio(feature_extractor=feature_extractor, padding=True)

        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=data_collator, num_workers=4, pin_memory=True)
        val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, collate_fn=data_collator, num_workers=2, pin_memory=True)
        
        # 손실 함수에 클래스 가중치 적용
        criterion = nn.CrossEntropyLoss(weight=class_weights)
        
        '''
        # Optuna를 사용할 땐 필요없음.
        if trial.number == 0: 
            # 체크포인트 로드
            CHECKPOINT_DIR = Path("./checkpoints")
            CHECKPOINT_DIR.mkdir(exist_ok=True)
            CHECKPOINT_PATH = CHECKPOINT_DIR / f'{MODEL_NAME}_{SAMPLING_PERCENT}_percent_best.pth'
            start_epoch = 0
            
            if CHECKPOINT_PATH.exists():
                print(f"체크포인트를 불러옵니다: {CHECKPOINT_PATH}")
                checkpoint = torch.load(CHECKPOINT_PATH)
                model.load_state_dict(checkpoint['model_state_dict'])
                optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
                scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
                start_epoch = checkpoint['epoch'] + 1
                print(f"체크포인트 로드 완료! {start_epoch} 에폭부터 훈련을 재개합니다.")
            else:
                print("체크포인트가 존재하지 않습니다. 처음부터 훈련을 시작합니다.")
        '''
        # =================================================================
        # === 1단계: 커스텀 헤드 훈련 ===
        # =================================================================
        print(f"\n--- Trial {trial.number}, 파인튜닝 1단계 시작: 커스텀 헤드 훈련 ---")
        
        for name, param in model.named_parameters():
            if name.startswith(model.base_model_prefix):
                param.requires_grad = False
        
        head_params = [p for p in model.parameters() if p.requires_grad]
        optimizer_head = optim.AdamW(head_params, lr=EARLY_LR)
        # 1단계는 간단한 스케줄러 사용
        scheduler_head = torch.optim.lr_scheduler.LinearLR(optimizer_head, start_factor=1.0, end_factor=0.1, total_iters=HEAD_TUNE_EPOCHS)
        train_model(
            model, train_loader, val_loader, criterion, optimizer_head, scheduler_head, DEVICE,
            num_epochs=HEAD_TUNE_EPOCHS, patience=5, accumulation_steps=ACCUMULATION_STEPS
        )
        
        # =================================================================
        # === 2단계: 전체 모델 미세 조정 ===
        # =================================================================
        print(f"\n--- Trial {trial.number}, 파인튜닝 2단계 시작: 전체 모델 미세 조정 ---")
        
        # 동결했던 '몸통' 파라미터를 모두 학습 가능하도록 해동
        for param in model.parameters():
            param.requires_grad = True
            
        # 차등 학습률을 적용한 전체 모델용 옵티마이저 생성
        backbone_params = model.base_model.parameters()
        classifier_params = model.classifier.parameters()
        
        optimizer_full = optim.AdamW([
            {'params': backbone_params, 'lr': EARLY_LR * BACKBONE_LR_SCALE},
            {'params': classifier_params, 'lr': EARLY_LR}
        ])
        
        # Warmup을 포함한 스케줄러 생성
        num_training_steps = len(train_loader) * FULL_TUNE_EPOCHS
        num_warmup_steps = int(num_training_steps * 0.1) # 첫 10% 스텝 동안 워밍업
        
        scheduler_full = get_linear_schedule_with_warmup(
            optimizer_full,
            num_warmup_steps=num_warmup_steps,
            num_training_steps=num_training_steps
        )
        #scheduler_full = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer_full, T_max=FULL_TUNE_EPOCHS)
        
        best_model, best_metrics = train_model(
            model, train_loader, val_loader, criterion, optimizer_full, scheduler_full, DEVICE,
            num_epochs=FULL_TUNE_EPOCHS,
            patience=PATIENCE, 
            start_epoch=0,
            accumulation_steps=ACCUMULATION_STEPS
        )
        
        '''
        # 훈련된 모델은 MLflow에 아티팩트로 저장 (추후에 결과가 좋아지면 저장)
        if best_wts_model:
            CHECKPOINT_DIR = Path("./checkpoints")
            CHECKPOINT_DIR.mkdir(exist_ok=True)
            BEST_MODEL_PATH = CHECKPOINT_DIR / f"trial_{trial.number}_best_model.pth"
            
            torch.save({'model_state_dict': best_wts_model.state_dict(),
                     'optimizer_state_dict': optimizer_full.state_dict(),
                     'scheduler_state_dict': scheduler_full.state_dict()}, BEST_MODEL_PATH)
            
            mlflow.log_artifact(BEST_MODEL_PATH, artifact_path="model")
        '''            
        # --- 8. 결과 기록 ---
        if best_metrics:
            REPORT_DIR = Path("./reports")
            REPORT_DIR.mkdir(exist_ok=True)
            mlflow.log_metrics({
                "best_train_loss": float(best_metrics['train_loss']),
                "best_train_accuracy": float(best_metrics['train_accuracy']),
                "best_val_loss": float(best_metrics['val_loss']),
                "best_val_accuracy": float(best_metrics['val_accuracy']),
                "best_macro_f1": float(best_metrics['macro_f1_score']),
            })
            REPORT_PATH = REPORT_DIR / f"{MODEL_NAME}_{SAMPLING_PERCENT}_percent_report_trial_{trial.number}_{now_date}.json"
            with open(REPORT_PATH, 'w', encoding='utf-8') as f:
                json.dump(best_metrics, f, ensure_ascii=False, indent=4)
            mlflow.log_artifact(REPORT_PATH, artifact_path="reports")

        return float(best_metrics.get('val_accuracy', 0))

if __name__ == '__main__':
    mlflow.set_tracking_uri("http://127.0.0.1:5003")
    mlflow.set_experiment("Audio Emotion Finetuning")
    
    # 연구 기록을 저장할 데이터베이스 파일과 연구 이름을 정의
    STUDY_NAME = "audio-finetune-study-v1" # 연구에 고유한 이름을 부여
    STORAGE_NAME = f"sqlite:///{STUDY_NAME}.db" # SQLite 데이터베이스 파일로 저장
    
    # storage와 study_name을 지정하고, load_if_exists=True로 설정
    # 기존 연구가 있으면 불러오고, 없으면 새로 생성
    study = optuna.create_study(
        study_name=STUDY_NAME,
        storage=STORAGE_NAME,
        direction="maximize",
        load_if_exists=True
    )
    
    study.optimize(objective, n_trials=1) # n번의 다른 조합으로 실험
    
    print("\n--- Hyperparameter Optimization Finished ---")
    print(f"Total trials in this study: {len(study.trials)}")
    
    print("Best trial:")
    trial = study.best_trial
    print(f"  Value (Best Val Accuracy): {trial.value}")
    print(f"  Params: {trial.params}")

[I 2025-08-24 23:55:24,266] Using an existing study with name 'audio-finetune-study-v1' instead of creating a new one.


Some weights of HubertForSequenceClassification were not initialized from the model checkpoint at team-lucid/hubert-base-korean and are newly initialized: ['classifier.bias', 'classifier.weight', 'projector.bias', 'projector.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


클래스 가중치 적용: {'angry': '0.75', 'disgust': '2.09', 'fear': '2.00', 'happiness': '1.35', 'neutral': '1.47', 'sadness': '0.33', 'surprise': '4.11'}

--- Trial 24, 파인튜닝 1단계 시작: 커스텀 헤드 훈련 ---
Epoch 1/15
----------


[W 2025-08-24 23:55:26,647] Trial 24 failed with parameters: {'model_name': 'hubert-base', 'lr': 1.5676995571651015e-05, 'backbone_lr_scale': 0.06049605839374828, 'batch_size': 8, 'accumulation_steps': 4} because of the following error: TypeError("unsupported operand type(s) for /: 'NoneType' and 'int'").
Traceback (most recent call last):
  File "/root/feellog_02/.venv/lib/python3.9/site-packages/optuna/study/_optimize.py", line 201, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_960283/1930072545.py", line 237, in objective
    train_model(
  File "/root/feellog_02/core/training/trainer.py", line 104, in train_model
    loss = loss / accumulation_steps
TypeError: unsupported operand type(s) for /: 'NoneType' and 'int'
[W 2025-08-24 23:55:26,649] Trial 24 failed with value None.


🏃 View run trial_24_20250824_235524 at: http://127.0.0.1:5003/#/experiments/200841879867459833/runs/6f2a922b4770404882f7c60c97f35897
🧪 View experiment at: http://127.0.0.1:5003/#/experiments/200841879867459833


TypeError: unsupported operand type(s) for /: 'NoneType' and 'int'