In [1]:
# import libraries
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import TensorDataset, DataLoader, Subset
from sklearn.model_selection import KFold
import optuna
from datetime import datetime
from optuna.trial import TrialState
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class LayerNormLSTM(nn.Module):
    def __init__(self, input_node, hidden_node):
        super().__init__()
        self.input_node = input_node
        self.hidden_node = hidden_node

        self.w_i = nn.Linear(input_node, 4*hidden_node, bias=False)
        self.w_h = nn.Linear(hidden_node, 4*hidden_node, bias=False)

        self.ln_i = nn.LayerNorm(hidden_node)
        self.ln_f = nn.LayerNorm(hidden_node)
        self.ln_w = nn.LayerNorm(hidden_node)
        self.ln_o = nn.LayerNorm(hidden_node)
        self.ln_c = nn.LayerNorm(hidden_node)

    def forward(self, input, hidden):
        h_prev, c_prev = hidden

        gi = self.w_i(input)
        gh = self.w_h(h_prev)
        i_i, i_f, i_w, i_o = gi.chunk(4, dim=-1)
        h_i, h_f, h_w, h_o = gh.chunk(4, dim=-1)

        i_g = torch.sigmoid(self.ln_i(i_i + h_i))
        f_g = torch.sigmoid(self.ln_f(i_f + h_f))
        w_g = torch.tanh(self.ln_w(i_w + h_w))
        o_g = torch.sigmoid(self.ln_o(i_o + h_o))
        

        c_new = f_g * c_prev + i_g * w_g
        c_new = self.ln_c(c_new)

        h_new = o_g * torch.tanh(c_new)

        return h_new, c_new

In [3]:
class StateExtr(nn.Module):
    def __init__(self, input_node, hidden_node, n_layer, dropout):
        super().__init__()
        self.hidden_node = hidden_node
        self.n_layer = n_layer
        self.input_node = input_node

        self.lstm_cells = nn.ModuleList()
        self.lstm_cells.append(LayerNormLSTM(input_node, hidden_node))
        for _ in range(n_layer - 1):
            self.lstm_cells.append(LayerNormLSTM(hidden_node, hidden_node))

        self.dropout = nn.Dropout(dropout)
        self.final_layer_norm = nn.LayerNorm(hidden_node)
        self.final_dropout = nn.Dropout(dropout)

    def forward(self, x, seq_len):
        batch_size, max_len, input_node = x.size()
        device = x.device

        h_states = []
        c_states = []
        for _ in range(self.n_layer):
            h_states.append(torch.zeros(batch_size, self.hidden_node, device=device))
            c_states.append(torch.zeros(batch_size, self.hidden_node, device=device))
        
        outputs = []
        for t in range(max_len):
            x_t = x[:, t, :]
            layer_input = x_t
            for layer_idx, lstm_cell in enumerate(self.lstm_cells):
                h_new, c_new = lstm_cell(layer_input, (h_states[layer_idx], c_states[layer_idx]))
                h_states[layer_idx] = h_new
                c_states[layer_idx] = c_new

                if layer_idx < len(self.lstm_cells) - 1:
                    layer_input = self.dropout(h_new)
                else:
                    layer_input = h_new
            outputs.append(layer_input)
        
        output_tensor = torch.stack(outputs, dim=1)
        seq_len_cpu = seq_len.detach().cpu().long()
        mask = torch.arange(max_len, device='cpu')[None, :] < seq_len_cpu[:, None]
        mask = mask.float().to(device).unsqueeze(-1)
        masked_output = output_tensor * mask
        normalized = self.final_layer_norm(masked_output)
        return self.final_dropout(normalized)

In [4]:
class PhysicalChangeDecoder(nn.Module):
    def __init__(self, input_node, output_node, n_layer, hidden_node, dropout):
        super().__init__()
        self.layers = nn.ModuleList()
        
        self.layers.append(nn.Linear(input_node, hidden_node))
        self.layers.append(nn.LayerNorm(hidden_node))
        self.layers.append(nn.ReLU())
        self.layers.append(nn.Dropout(dropout))

        for i in range(n_layer - 1):
            self.layers.append(nn.Linear(hidden_node, hidden_node))
            self.layers.append(nn.LayerNorm(hidden_node))
            self.layers.append(nn.ReLU())
            self.layers.append(nn.Dropout(dropout))

        self.layers.append(nn.Linear(hidden_node, output_node))
    
    def forward(self, hidden_states):
        x = hidden_states
        for layer in self.layers:
            x = layer(x)
        return x

In [5]:
class CurrentPredictor(nn.Module):
    def __init__(self, input_node, hidden_node, n_layer, dropout):
        super().__init__()
        self.layers = nn.ModuleList()
        
        self.layers.append(nn.Linear(input_node, hidden_node))
        self.layers.append(nn.LayerNorm(hidden_node))
        self.layers.append(nn.ReLU())
        self.layers.append(nn.Dropout(dropout))
        
        for i in range(n_layer - 1):
            self.layers.append(nn.Linear(hidden_node, hidden_node))
            self.layers.append(nn.LayerNorm(hidden_node))
            self.layers.append(nn.ReLU())
            self.layers.append(nn.Dropout(dropout))
        
        self.layers.append(nn.Linear(hidden_node, 1))
    
    def forward(self, new_state):
        x = new_state
        for layer in self.layers:
            x = layer(x)
        return x

In [6]:
class PhysicsConstraintLayer(nn.Module):
    def __init__(self, range_mm, current_predictor, eps=1e-2):
        super().__init__()
        self.sps = eps
        self.current_predictor = current_predictor
        self.register_buffer('range_mm_tensor', self._convert_range_to_tensor(range_mm))

    def _convert_range_to_tensor(self, range_mm):
        feature_names = ['V','E','VF','VA','VB','CFLA','CALA','CFK','CBK','I']
        ranges = torch.zeros(len(feature_names),2)
        for i, name in enumerate(feature_names):
            if name in range_mm:
                ranges[i, 0] = range_mm[name]['min']
                ranges[i, 1] = range_mm[name]['max']
        return ranges
    
    def normalize(self, data, feature_idx):
        min_val = self.range_mm_tensor[feature_idx, 0]
        max_val = self.range_mm_tensor[feature_idx, 1]
        return (data - min_val) / (max_val - min_val)

    def denormalize(self, data, feature_idx):
        min_val = self.range_mm_tensor[feature_idx, 0]
        max_val = self.range_mm_tensor[feature_idx, 1]
        return data * (max_val - min_val) + min_val

    def forward(self, physical_changes, current_state):
        V_idx, E_idx, VF_idx, VA_idx, VB_idx = 0, 1, 2, 3, 4
        CFLA_idx, CALA_idx, CFK_idx, CBK_idx, I_idx = 5, 6, 7, 8, 9

        VF = self.denormalize(current_state[..., 2:3], VF_idx)
        VA = self.denormalize(current_state[..., 3:4], VA_idx)
        VB = self.denormalize(current_state[..., 4:5], VB_idx)
        CFLA = self.denormalize(current_state[..., 5:6], CFLA_idx)
        CALA = self.denormalize(current_state[..., 6:7], CALA_idx)
        CFK = self.denormalize(current_state[..., 7:8], CFK_idx)
        CBK = self.denormalize(current_state[..., 8:9], CBK_idx)

        dVA = physical_changes[..., 0:1]
        dVB = physical_changes[..., 1:2]
        rratio = physical_changes[..., 2:3]
        dNBK = physical_changes[..., 3:4]

        ratio = torch.sigmoid(rratio)
        dNALA = ratio * dNBK

        NFLA = CFLA * VF
        NALA = CALA * VA
        NFK = CFK * VF
        NBK = CBK * VB

        # tensor 비교를 torch.where로 변경
        condition1 = VF < dVA + dVB
        dVA = torch.where(condition1, torch.zeros_like(dVA), dVA)
        dVB = torch.where(condition1, torch.zeros_like(dVB), dVB)
        
        condition2 = NFLA < dNALA
        dNALA = torch.where(condition2, torch.zeros_like(dNALA), dNALA)
        
        condition3 = NFK < dNBK
        dNBK = torch.where(condition3, torch.zeros_like(dNBK), dNBK)

        nVF = VF - dVA - dVB
        nVA = VA + dVA
        nVB = VB + dVB

        nVF = torch.clamp(nVF, min=self.sps)
        nVA = torch.clamp(nVA, min=self.sps)
        nVB = torch.clamp(nVB, min=self.sps)
        
        nNFLA = NFLA - dNALA
        nNALA = NALA + dNALA
        nNFK = NFK - dNBK
        nNBK = NBK + dNBK

        nCFLA = nNFLA / nVF
        nCALA = nNALA / nVA
        nCFK = nNFK / nVF
        nCBK = nNBK / nVB

        V = current_state[..., 0:1]
        E = current_state[..., 1:2]
        nVF_norm = self.normalize(nVF, VF_idx)
        nVA_norm = self.normalize(nVA, VA_idx)
        nVB_norm = self.normalize(nVB, VB_idx)
        nCFLA_norm = self.normalize(nCFLA, CFLA_idx)
        nCALA_norm = self.normalize(nCALA, CALA_idx)
        nCFK_norm = self.normalize(nCFK, CFK_idx)
        nCBK_norm = self.normalize(nCBK, CBK_idx)

        temp_state = torch.cat([
            V, E, nVF_norm, nVA_norm, nVB_norm, nCFLA_norm, nCALA_norm, nCFK_norm, nCBK_norm
        ], dim=-1)
        
        nI_pred_norm = self.current_predictor(temp_state)
        nI_real = self.denormalize(nI_pred_norm, I_idx)
        nI_real = torch.clamp(nI_real, min=0.0)
        nI_norm = self.normalize(nI_real, I_idx)

        next_state = torch.cat([
            V, E, nVF_norm, nVA_norm, nVB_norm, nCFLA_norm, nCALA_norm, nCFK_norm, nCBK_norm, nI_norm
        ], dim=-1)
        
        return next_state

In [7]:
class BMEDAutoregressiveModel(nn.Module):
    def __init__(self, state_extr_params, decoder_params, current_predictor_params, range_mm):
        super().__init__()
        self.state_extr = StateExtr(**state_extr_params)
        self.physical_decoder = PhysicalChangeDecoder(**decoder_params)
        self.current_predictor = CurrentPredictor(**current_predictor_params)
        self.physics_constraint = PhysicsConstraintLayer(range_mm, self.current_predictor)

    def forward(self, x, seq_len):
        hidden_states = self.state_extr(x, seq_len)
        physical_changes = self.physical_decoder(hidden_states)
        new_x = self.physics_constraint(physical_changes, x)
        return new_x

In [8]:
class NoamScheduler:
    def __init__(self, optimizer, model_size, warmup_epochs, factor=1.0):
        self.optimizer = optimizer
        self.model_size = model_size
        self.warmup_epochs = warmup_epochs
        self.factor = 1
        self.epoch_num = 0

    def step_epoch(self):
        self.epoch_num += 1
        lr = self.factor * (
            self.model_size ** (-0.5) *
            min(self.epoch_num ** (-0.5), self.epoch_num * self.warmup_epochs ** (-1.5))
        )
        for param_group in self.optimizer.param_groups:
            param_group['lr'] = lr
        return lr

In [9]:
# 유틸리티 함수들
def df_treat(name):
    df = pd.read_csv(name)
    ndf = pd.DataFrame()
    range_mm={
        'V': {'min':df['V'].min()*0.8, 'max': df['V'].max()*1.2},
        'E': {'min':df['E'].min()*0.8, 'max': df['E'].max()*1.2},
        'VF': {'min':df['VF'].min()*0.8, 'max': df['VF'].max()*1.2},
        'VA': {'min':df['VA'].min()*0.8, 'max': df['VA'].max()*1.2},
        'VB': {'min':df['VB'].min()*0.8, 'max': df['VB'].max()*1.2},
        'CFLA': {'min':0, 'max': df['CFLA'].max()*1.2},
        'CALA': {'min':0, 'max': df['CALA'].max()*1.2},
        'CFK': {'min':0, 'max': df['CFK'].max()*1.2},
        'CBK': {'min':0, 'max': df['CBK'].max()*1.2},
        'I': {'min':0, 'max': df['I'].max()*1.2},
    }
    ndf['exp'] = df['exp']; ndf['t'] = df['t']

    for col in ['V', 'E', 'VF', 'VA', 'VB', 'CFLA', 'CALA', 'CFK', 'CBK', 'I']:
        if col in range_mm:
            ndf[col] = (df[col] - range_mm[col]['min'])/(range_mm[col]['max'] - range_mm[col]['min'])
        else:
            ndf[col] = df[col]

    exp_num_list = sorted(ndf['exp'].unique())
    return df, ndf, range_mm, exp_num_list

def seq_data(ndf, exp_num_list):
    seq = []
    feature_cols = ['V', 'E', 'VF', 'VA', 'VB', 'CFLA', 'CALA', 'CFK', 'CBK', 'I']
    for exp in exp_num_list:
        exp_df = ndf[ndf['exp'] == exp]
        seq.append(exp_df[feature_cols].values)
    return seq

def pad_seq(seq):
    max_len = max([len(s) for s in seq])
    seq_len = [len(s) for s in seq]
    pad_seq = pad_sequence([torch.tensor(s) for s in seq], batch_first=True, padding_value=-1)
    return pad_seq, seq_len, max_len

def gen_dataset(pad_seq, seq_len):
    input_tensor = pad_seq.float()
    seq_len_tensor = torch.tensor(seq_len)
    dataset = TensorDataset(input_tensor, seq_len_tensor)
    return dataset

def masked_mse_loss(pred, target, seq_len):
    batch_size, max_len, features = pred.shape
    seq_len_cpu = seq_len.detach().cpu().long()
    mask = torch.arange(max_len, device='cpu')[None, :] < seq_len_cpu[:, None]
    mask = mask.float().to(pred.device)
    loss = F.mse_loss(pred, target, reduction='none')
    masked_loss = loss * mask.unsqueeze(-1)
    total_loss = masked_loss.sum()
    total_elements = mask.sum()
    masked_loss = total_loss / total_elements
    return masked_loss

def tf_data(input_seq, seq_len):
    inputs = input_seq[:, :-1, :-1]
    targets = input_seq[:, 1:, :]
    target_seq_len = seq_len - 1
    return inputs, targets, target_seq_len

In [10]:
# Optuna 목적 함수
def objective(trial):
    """
    Optuna trial을 위한 목적 함수
    K-fold cross validation을 사용하여 하이퍼파라미터 최적화
    """
    
    # 1. 하이퍼파라미터 제안
    # LSTM StateExtractor 파라미터
    lstm_hidden_size = trial.suggest_categorical('lstm_hidden_size', [16, 32, 48, 64, 72, 96])
    lstm_n_layers = trial.suggest_int('lstm_n_layers', 2, 6, step=1)
    lstm_dropout = trial.suggest_float('lstm_dropout', 0.1, 0.5, step=0.1)
    
    # PhysicalChangeDecoder 파라미터
    decoder_hidden_size = trial.suggest_categorical('decoder_hidden_size', [16, 32, 48, 64, 72, 96])
    decoder_n_layers = trial.suggest_int('decoder_n_layers', 2, 6, step=1)
    decoder_dropout = trial.suggest_float('decoder_dropout', 0.1, 0.6, step=0.1)
    
    # CurrentPredictor 파라미터
    current_hidden_size = trial.suggest_categorical('current_hidden_size', [16, 32, 48, 64, 72, 96])
    current_n_layers = trial.suggest_int('current_n_layers', 2, 6, step=1)
    current_dropout = trial.suggest_float('current_dropout', 0.1, 0.6, step=0.1)
    
    # 2. K-fold Cross Validation
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    n_splits = 5
    kfold = KFold(n_splits=n_splits, shuffle=True, random_state=42)
    
    fold_losses = []
    
    # 데이터 로드 (global 변수 사용)
    indices = list(range(len(dataset)))
    
    for fold, (train_idx, val_idx) in enumerate(kfold.split(indices)):
        print(f"  🔄 Trial {trial.number}, Fold {fold+1}/{n_splits}")
        
        # 폴드별 데이터셋 준비
        train_subset = Subset(dataset, train_idx)
        val_subset = Subset(dataset, val_idx)
        
        train_loader = DataLoader(train_subset, batch_size=3, shuffle=True)
        val_loader = DataLoader(val_subset, batch_size=3, shuffle=False)
        
        # 3. 모델 파라미터 설정
        state_extr_params = {
            'input_node': 9,
            'hidden_node': lstm_hidden_size,
            'n_layer': lstm_n_layers,
            'dropout': lstm_dropout
        }
        
        decoder_params = {
            'input_node': lstm_hidden_size,
            'hidden_node': decoder_hidden_size,
            'n_layer': decoder_n_layers,
            'dropout': decoder_dropout,
            'output_node': 4
        }
        
        current_predictor_params = {
            'input_node': 9,
            'hidden_node': current_hidden_size,
            'n_layer': current_n_layers,
            'dropout': current_dropout
        }
        
        # 4. 모델 초기화
        model = BMEDAutoregressiveModel(state_extr_params, decoder_params, current_predictor_params, range_mm)
        model = model.to(device)
        
        # 5. 옵티마이저 및 스케줄러 설정
        optimizer = torch.optim.AdamW(model.parameters(), lr=1.0)
        
        # 총 에포크 수와 warmup 에포크 계산
        total_epochs = 100  # Optuna 최적화를 위해 에포크 수 감소
        warmup_epochs = int(total_epochs * 0.1)
        
        scheduler = NoamScheduler(
            optimizer, 
            model_size=lstm_hidden_size,
            warmup_epochs=warmup_epochs,
            factor=1
        )
        
        # 6. 훈련
        best_total_loss = float('inf')
        
        for epoch in range(total_epochs):
            # Learning rate 업데이트
            current_lr = scheduler.step_epoch()
            
            # 훈련
            model.train()
            train_loss = 0.0
            train_batches = 0
            
            for input_seq, seq_len in train_loader:
                try:
                    input_seq = input_seq.to(device)
                    seq_len = seq_len.to(device)
                    
                    inputs, targets, target_seq_len = tf_data(input_seq, seq_len)
                    
                    optimizer.zero_grad()
                    pred = model(inputs, target_seq_len)
                    loss = masked_mse_loss(pred, targets, target_seq_len)
                    
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                    optimizer.step()
                    
                    train_loss += loss.item()
                    train_batches += 1
                    
                except Exception as e:
                    print(f"❌ Error in training: {str(e)}")
                    continue
            
            if train_batches == 0:
                break
                
            train_loss = train_loss / train_batches
            
            # 검증
            model.eval()
            val_loss = 0.0
            val_batches = 0
            
            with torch.no_grad():
                for input_seq, seq_len in val_loader:
                    try:
                        input_seq = input_seq.to(device)
                        seq_len = seq_len.to(device)
                        
                        inputs, targets, target_seq_len = tf_data(input_seq, seq_len)
                        
                        pred = model(inputs, target_seq_len)
                        loss = masked_mse_loss(pred, targets, target_seq_len)
                        
                        val_loss += loss.item()
                        val_batches += 1
                        
                    except Exception as e:
                        continue
            
            if val_batches == 0:
                break
                
            val_loss = val_loss / val_batches
            
            # Calculate total loss
            total_loss = train_loss + val_loss
            
            # Early stopping
            if total_loss < best_total_loss:
                best_total_loss = total_loss
        
        fold_losses.append(best_total_loss)
        print(f"    Fold {fold+1} best total loss: {best_total_loss:.6f}")
        
        # 메모리 정리
        del model, optimizer, scheduler
        torch.cuda.empty_cache()
    
    # 7. K-fold 평균 손실 반환
    avg_loss = np.mean(fold_losses)
    std_loss = np.std(fold_losses)
    
    print(f"  📊 Trial {trial.number} - Average CV Loss: {avg_loss:.6f} (±{std_loss:.6f})")
    
    return avg_loss

In [11]:
# 메인 최적화 함수
def run_optuna_optimization():
    """Optuna를 사용한 하이퍼파라미터 최적화 실행"""
    
    print("🚀 BMED TF Model Hyperparameter Optimization with Optuna")
    print("="*80)
    
    # 전역 데이터 로드
    global dataset, range_mm
    
    print("📋 데이터 로드 중...")
    df, ndf, range_mm, exp_num_list = df_treat('BMED_DATA_AG.csv')
    seq = seq_data(ndf, exp_num_list)
    pad, seq_len, max_len = pad_seq(seq)
    dataset = gen_dataset(pad, seq_len)
    
    print(f"   - 총 실험 개수: {len(exp_num_list)}")
    print(f"   - 총 데이터 포인트: {len(dataset)}")
    print(f"   - 최대 시퀀스 길이: {max_len}")
    
    # SQLite 데이터베이스를 사용한 Optuna study 생성
    #timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    timestamp = '20250915_234452'
    db_url = f"sqlite:///bmed_optuna_study_{timestamp}.db"
    
    study = optuna.create_study(
        direction='minimize',
        study_name='bmed_tf_optimization',
        sampler=optuna.samplers.TPESampler(seed=42),
        storage=db_url,
        load_if_exists=True
    )
    
    # 최적화 실행
    n_trials = 100
    print(f"🔍 최적화 시작 (총 {n_trials} trials)")
    
    try:
        study.optimize(objective, n_trials=n_trials, timeout=None)
    except KeyboardInterrupt:
        print("\n⚠️ 최적화가 사용자에 의해 중단되었습니다.")
    
    # 결과 분석
    print("\n" + "="*80)
    print("📊 OPTIMIZATION RESULTS")
    print("="*80)
    
    print(f"✅ 완료된 trials: {len(study.trials)}")
    print(f"🏆 최고 성능 trial: {study.best_trial.number}")
    print(f"💯 최고 성능 값: {study.best_value:.6f}")
    
    print(f"\n🎯 최적 하이퍼파라미터:")
    for key, value in study.best_params.items():
        print(f"   {key}: {value}")
    
    # 상위 5개 trial 정보
    print(f"\n📈 상위 5개 Trials:")
    trials_df = study.trials_dataframe().sort_values('value').head(5)
    for idx, (_, trial) in enumerate(trials_df.iterrows()):
        print(f"   {idx+1}. Trial {int(trial['number'])}: {trial['value']:.6f}")
    
    # 결과 저장
    result_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    # Trials 결과 CSV로 저장
    trials_file = f"bmed_optuna_trials_{result_timestamp}.csv"
    trials_df = study.trials_dataframe()
    trials_df.to_csv(trials_file, index=False)
    print(f"💾 모든 trials 결과가 저장되었습니다: {trials_file}")
    
    # SQLite 데이터베이스 정보
    print(f"💾 SQLite 데이터베이스에 실시간 저장됨: {db_url}")
    print(f"   - 중단 후 재시작 시 자동으로 기존 결과를 불러옵니다")
    print(f"   - 다른 프로세스에서 진행상황 모니터링 가능합니다")
    
    print("="*80)
    print("🎉 하이퍼파라미터 최적화 완료!")
    
    return study

if __name__ == "__main__":
    study = run_optuna_optimization()

🚀 BMED TF Model Hyperparameter Optimization with Optuna
📋 데이터 로드 중...
   - 총 실험 개수: 15
   - 총 데이터 포인트: 15
   - 최대 시퀀스 길이: 37


[I 2025-09-16 08:23:09,187] Using an existing study with name 'bmed_tf_optimization' instead of creating a new one.


🔍 최적화 시작 (총 100 trials)
  🔄 Trial 49, Fold 1/5
    Fold 1 best total loss: 0.007661
  🔄 Trial 49, Fold 2/5
    Fold 2 best total loss: 0.319122
  🔄 Trial 49, Fold 3/5
    Fold 3 best total loss: 0.017220
  🔄 Trial 49, Fold 4/5
    Fold 4 best total loss: 0.012732
  🔄 Trial 49, Fold 5/5


[I 2025-09-16 08:26:44,182] Trial 49 finished with value: 0.07380131604149938 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 2, 'lstm_dropout': 0.1, 'decoder_hidden_size': 32, 'decoder_n_layers': 5, 'decoder_dropout': 0.1, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.012272
  📊 Trial 49 - Average CV Loss: 0.073801 (±0.122698)
  🔄 Trial 50, Fold 1/5
    Fold 1 best total loss: 0.009122
  🔄 Trial 50, Fold 2/5
    Fold 2 best total loss: 0.317891
  🔄 Trial 50, Fold 3/5
    Fold 3 best total loss: 0.021509
  🔄 Trial 50, Fold 4/5
    Fold 4 best total loss: 0.011721
  🔄 Trial 50, Fold 5/5


[I 2025-09-16 08:30:22,773] Trial 50 finished with value: 0.07575069691520184 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 2, 'lstm_dropout': 0.5, 'decoder_hidden_size': 32, 'decoder_n_layers': 6, 'decoder_dropout': 0.2, 'current_hidden_size': 32, 'current_n_layers': 3, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.018510
  📊 Trial 50 - Average CV Loss: 0.075751 (±0.121153)
  🔄 Trial 51, Fold 1/5
    Fold 1 best total loss: 0.008506
  🔄 Trial 51, Fold 2/5
    Fold 2 best total loss: 0.020730
  🔄 Trial 51, Fold 3/5
    Fold 3 best total loss: 0.224590
  🔄 Trial 51, Fold 4/5
    Fold 4 best total loss: 0.013597
  🔄 Trial 51, Fold 5/5


[I 2025-09-16 08:33:59,542] Trial 51 finished with value: 0.05638162192190066 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 2, 'lstm_dropout': 0.1, 'decoder_hidden_size': 32, 'decoder_n_layers': 5, 'decoder_dropout': 0.1, 'current_hidden_size': 32, 'current_n_layers': 4, 'current_dropout': 0.1}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.014485
  📊 Trial 51 - Average CV Loss: 0.056382 (±0.084194)
  🔄 Trial 52, Fold 1/5
    Fold 1 best total loss: 0.008885
  🔄 Trial 52, Fold 2/5
    Fold 2 best total loss: 0.021410
  🔄 Trial 52, Fold 3/5
    Fold 3 best total loss: 0.014410
  🔄 Trial 52, Fold 4/5
    Fold 4 best total loss: 0.012235
  🔄 Trial 52, Fold 5/5


[I 2025-09-16 08:39:11,666] Trial 52 finished with value: 0.014062341768294573 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.2, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.013371
  📊 Trial 52 - Average CV Loss: 0.014062 (±0.004116)
  🔄 Trial 53, Fold 1/5
    Fold 1 best total loss: 0.009288
  🔄 Trial 53, Fold 2/5
    Fold 2 best total loss: 0.014960
  🔄 Trial 53, Fold 3/5
    Fold 3 best total loss: 0.015203
  🔄 Trial 53, Fold 4/5
    Fold 4 best total loss: 0.012697
  🔄 Trial 53, Fold 5/5


[I 2025-09-16 08:44:25,560] Trial 53 finished with value: 0.012575168663170188 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.2, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.010727
  📊 Trial 53 - Average CV Loss: 0.012575 (±0.002316)
  🔄 Trial 54, Fold 1/5
    Fold 1 best total loss: 0.007511
  🔄 Trial 54, Fold 2/5
    Fold 2 best total loss: 0.011121
  🔄 Trial 54, Fold 3/5
    Fold 3 best total loss: 0.016557
  🔄 Trial 54, Fold 4/5
    Fold 4 best total loss: 0.013232
  🔄 Trial 54, Fold 5/5


[I 2025-09-16 08:49:38,449] Trial 54 finished with value: 0.012998328567482531 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.2, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.016571
  📊 Trial 54 - Average CV Loss: 0.012998 (±0.003438)
  🔄 Trial 55, Fold 1/5
    Fold 1 best total loss: 0.007997
  🔄 Trial 55, Fold 2/5
    Fold 2 best total loss: 0.015990
  🔄 Trial 55, Fold 3/5
    Fold 3 best total loss: 0.015530
  🔄 Trial 55, Fold 4/5
    Fold 4 best total loss: 0.012677
  🔄 Trial 55, Fold 5/5


[I 2025-09-16 08:54:54,167] Trial 55 finished with value: 0.012991745653562248 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.1, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.012764
  📊 Trial 55 - Average CV Loss: 0.012992 (±0.002847)
  🔄 Trial 56, Fold 1/5
    Fold 1 best total loss: 0.007507
  🔄 Trial 56, Fold 2/5
    Fold 2 best total loss: 0.317821
  🔄 Trial 56, Fold 3/5
    Fold 3 best total loss: 0.011723
  🔄 Trial 56, Fold 4/5
    Fold 4 best total loss: 0.013515
  🔄 Trial 56, Fold 5/5


[I 2025-09-16 09:00:01,886] Trial 56 finished with value: 0.07247555266367271 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.4, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.1, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.011811
  📊 Trial 56 - Average CV Loss: 0.072476 (±0.122689)
  🔄 Trial 57, Fold 1/5
    Fold 1 best total loss: 0.007246
  🔄 Trial 57, Fold 2/5
    Fold 2 best total loss: 0.317908
  🔄 Trial 57, Fold 3/5
    Fold 3 best total loss: 0.222840
  🔄 Trial 57, Fold 4/5
    Fold 4 best total loss: 0.200503
  🔄 Trial 57, Fold 5/5


[I 2025-09-16 09:05:04,932] Trial 57 finished with value: 0.21841515807900577 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.2, 'current_hidden_size': 64, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.343579
  📊 Trial 57 - Average CV Loss: 0.218415 (±0.118742)
  🔄 Trial 58, Fold 1/5
    Fold 1 best total loss: 0.008575
  🔄 Trial 58, Fold 2/5
    Fold 2 best total loss: 0.020240
  🔄 Trial 58, Fold 3/5
    Fold 3 best total loss: 0.222583
  🔄 Trial 58, Fold 4/5
    Fold 4 best total loss: 0.010009
  🔄 Trial 58, Fold 5/5


[I 2025-09-16 09:10:03,918] Trial 58 finished with value: 0.05475541808409616 and parameters: {'lstm_hidden_size': 32, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.1, 'current_hidden_size': 32, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.012370
  📊 Trial 58 - Average CV Loss: 0.054755 (±0.084011)
  🔄 Trial 59, Fold 1/5
    Fold 1 best total loss: 0.314981
  🔄 Trial 59, Fold 2/5
    Fold 2 best total loss: 0.316982
  🔄 Trial 59, Fold 3/5
    Fold 3 best total loss: 0.013795
  🔄 Trial 59, Fold 4/5
    Fold 4 best total loss: 0.010420
  🔄 Trial 59, Fold 5/5


[I 2025-09-16 09:15:04,563] Trial 59 finished with value: 0.20004435373703017 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.2, 'current_hidden_size': 96, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.344043
  📊 Trial 59 - Average CV Loss: 0.200044 (±0.153796)
  🔄 Trial 60, Fold 1/5
    Fold 1 best total loss: 0.007969
  🔄 Trial 60, Fold 2/5
    Fold 2 best total loss: 0.016528
  🔄 Trial 60, Fold 3/5
    Fold 3 best total loss: 0.016492
  🔄 Trial 60, Fold 4/5
    Fold 4 best total loss: 0.009173
  🔄 Trial 60, Fold 5/5


[I 2025-09-16 09:20:02,928] Trial 60 finished with value: 0.012227280775550752 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.2, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.1, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.010975
  📊 Trial 60 - Average CV Loss: 0.012227 (±0.003625)
  🔄 Trial 61, Fold 1/5
    Fold 1 best total loss: 0.009167
  🔄 Trial 61, Fold 2/5
    Fold 2 best total loss: 0.318417
  🔄 Trial 61, Fold 3/5
    Fold 3 best total loss: 0.017297
  🔄 Trial 61, Fold 4/5
    Fold 4 best total loss: 0.016645
  🔄 Trial 61, Fold 5/5


[I 2025-09-16 09:25:02,993] Trial 61 finished with value: 0.07525223020929843 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.2, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.1, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.30000000000000004}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.014735
  📊 Trial 61 - Average CV Loss: 0.075252 (±0.121616)
  🔄 Trial 62, Fold 1/5
    Fold 1 best total loss: 0.012827
  🔄 Trial 62, Fold 2/5
    Fold 2 best total loss: 0.317828
  🔄 Trial 62, Fold 3/5
    Fold 3 best total loss: 0.027124
  🔄 Trial 62, Fold 4/5
    Fold 4 best total loss: 0.017491
  🔄 Trial 62, Fold 5/5


[I 2025-09-16 09:30:16,917] Trial 62 finished with value: 0.07837464853655547 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.2, 'decoder_hidden_size': 48, 'decoder_n_layers': 5, 'decoder_dropout': 0.1, 'current_hidden_size': 16, 'current_n_layers': 5, 'current_dropout': 0.1}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.016604
  📊 Trial 62 - Average CV Loss: 0.078375 (±0.119819)
  🔄 Trial 63, Fold 1/5
    Fold 1 best total loss: 0.008489
  🔄 Trial 63, Fold 2/5
    Fold 2 best total loss: 0.019665
  🔄 Trial 63, Fold 3/5
    Fold 3 best total loss: 0.015498
  🔄 Trial 63, Fold 4/5
    Fold 4 best total loss: 0.012614
  🔄 Trial 63, Fold 5/5


[I 2025-09-16 09:35:22,512] Trial 63 finished with value: 0.013702389679383486 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.1, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.012245
  📊 Trial 63 - Average CV Loss: 0.013702 (±0.003722)
  🔄 Trial 64, Fold 1/5
    Fold 1 best total loss: 0.008771
  🔄 Trial 64, Fold 2/5
    Fold 2 best total loss: 0.019033
  🔄 Trial 64, Fold 3/5
    Fold 3 best total loss: 0.017066
  🔄 Trial 64, Fold 4/5
    Fold 4 best total loss: 0.013599
  🔄 Trial 64, Fold 5/5


[I 2025-09-16 09:40:25,019] Trial 64 finished with value: 0.08057899868581445 and parameters: {'lstm_hidden_size': 16, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.2, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.344426
  📊 Trial 64 - Average CV Loss: 0.080579 (±0.131970)
  🔄 Trial 65, Fold 1/5
    Fold 1 best total loss: 0.008680
  🔄 Trial 65, Fold 2/5
    Fold 2 best total loss: 0.015287
  🔄 Trial 65, Fold 3/5
    Fold 3 best total loss: 0.015972
  🔄 Trial 65, Fold 4/5
    Fold 4 best total loss: 0.009045
  🔄 Trial 65, Fold 5/5


[I 2025-09-16 09:45:30,688] Trial 65 finished with value: 0.012160248798318207 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.1, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.011818
  📊 Trial 65 - Average CV Loss: 0.012160 (±0.003041)
  🔄 Trial 66, Fold 1/5
    Fold 1 best total loss: 0.225108
  🔄 Trial 66, Fold 2/5
    Fold 2 best total loss: 0.318471
  🔄 Trial 66, Fold 3/5
    Fold 3 best total loss: 0.017227
  🔄 Trial 66, Fold 4/5
    Fold 4 best total loss: 0.011120
  🔄 Trial 66, Fold 5/5


[I 2025-09-16 09:50:44,290] Trial 66 finished with value: 0.11649624025449157 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.2, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.1, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.010555
  📊 Trial 66 - Average CV Loss: 0.116496 (±0.130209)
  🔄 Trial 67, Fold 1/5
    Fold 1 best total loss: 0.008554
  🔄 Trial 67, Fold 2/5
    Fold 2 best total loss: 0.016675
  🔄 Trial 67, Fold 3/5
    Fold 3 best total loss: 0.017502
  🔄 Trial 67, Fold 4/5
    Fold 4 best total loss: 0.014377
  🔄 Trial 67, Fold 5/5


[I 2025-09-16 09:55:56,854] Trial 67 finished with value: 0.013678834622260183 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.1, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.011286
  📊 Trial 67 - Average CV Loss: 0.013679 (±0.003348)
  🔄 Trial 68, Fold 1/5
    Fold 1 best total loss: 0.165398
  🔄 Trial 68, Fold 2/5
    Fold 2 best total loss: 0.019802
  🔄 Trial 68, Fold 3/5
    Fold 3 best total loss: 0.014460
  🔄 Trial 68, Fold 4/5
    Fold 4 best total loss: 0.012276
  🔄 Trial 68, Fold 5/5


[I 2025-09-16 10:06:03,803] Trial 68 finished with value: 0.11133796066278592 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 6, 'lstm_dropout': 0.2, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.1, 'current_hidden_size': 64, 'current_n_layers': 2, 'current_dropout': 0.30000000000000004}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.344754
  📊 Trial 68 - Average CV Loss: 0.111338 (±0.130371)
  🔄 Trial 69, Fold 1/5
    Fold 1 best total loss: 0.008485
  🔄 Trial 69, Fold 2/5
    Fold 2 best total loss: 0.020923
  🔄 Trial 69, Fold 3/5
    Fold 3 best total loss: 0.013807
  🔄 Trial 69, Fold 4/5
    Fold 4 best total loss: 0.014725
  🔄 Trial 69, Fold 5/5


[I 2025-09-16 10:11:17,950] Trial 69 finished with value: 0.014079901180230081 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 5, 'decoder_dropout': 0.1, 'current_hidden_size': 32, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.012459
  📊 Trial 69 - Average CV Loss: 0.014080 (±0.004031)
  🔄 Trial 70, Fold 1/5
    Fold 1 best total loss: 0.316638
  🔄 Trial 70, Fold 2/5
    Fold 2 best total loss: 0.015472
  🔄 Trial 70, Fold 3/5
    Fold 3 best total loss: 0.015813
  🔄 Trial 70, Fold 4/5
    Fold 4 best total loss: 0.010113
  🔄 Trial 70, Fold 5/5


[I 2025-09-16 10:15:11,742] Trial 70 finished with value: 0.07414941079914569 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 2, 'lstm_dropout': 0.1, 'decoder_hidden_size': 16, 'decoder_n_layers': 6, 'decoder_dropout': 0.6, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.012712
  📊 Trial 70 - Average CV Loss: 0.074149 (±0.121262)
  🔄 Trial 71, Fold 1/5
    Fold 1 best total loss: 0.008828
  🔄 Trial 71, Fold 2/5
    Fold 2 best total loss: 0.020422
  🔄 Trial 71, Fold 3/5
    Fold 3 best total loss: 0.019231
  🔄 Trial 71, Fold 4/5
    Fold 4 best total loss: 0.014863
  🔄 Trial 71, Fold 5/5


[I 2025-09-16 10:20:36,263] Trial 71 finished with value: 0.01606491677230224 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 64, 'decoder_n_layers': 6, 'decoder_dropout': 0.1, 'current_hidden_size': 16, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.016980
  📊 Trial 71 - Average CV Loss: 0.016065 (±0.004090)
  🔄 Trial 72, Fold 1/5
    Fold 1 best total loss: 0.009796
  🔄 Trial 72, Fold 2/5
    Fold 2 best total loss: 0.019344
  🔄 Trial 72, Fold 3/5
    Fold 3 best total loss: 0.016983
  🔄 Trial 72, Fold 4/5
    Fold 4 best total loss: 0.013949
  🔄 Trial 72, Fold 5/5


[I 2025-09-16 10:25:46,204] Trial 72 finished with value: 0.014941592118702829 and parameters: {'lstm_hidden_size': 32, 'lstm_n_layers': 3, 'lstm_dropout': 0.2, 'decoder_hidden_size': 48, 'decoder_n_layers': 2, 'decoder_dropout': 0.1, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.30000000000000004}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.014636
  📊 Trial 72 - Average CV Loss: 0.014942 (±0.003197)
  🔄 Trial 73, Fold 1/5
    Fold 1 best total loss: 0.010719
  🔄 Trial 73, Fold 2/5
    Fold 2 best total loss: 0.021570
  🔄 Trial 73, Fold 3/5
    Fold 3 best total loss: 0.015669
  🔄 Trial 73, Fold 4/5
    Fold 4 best total loss: 0.201712
  🔄 Trial 73, Fold 5/5


[I 2025-09-16 10:30:54,776] Trial 73 finished with value: 0.11875228049466387 and parameters: {'lstm_hidden_size': 16, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.2, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.344091
  📊 Trial 73 - Average CV Loss: 0.118752 (±0.133718)
  🔄 Trial 74, Fold 1/5
    Fold 1 best total loss: 0.317448
  🔄 Trial 74, Fold 2/5
    Fold 2 best total loss: 0.022032
  🔄 Trial 74, Fold 3/5
    Fold 3 best total loss: 0.015203
  🔄 Trial 74, Fold 4/5
    Fold 4 best total loss: 0.011058
  🔄 Trial 74, Fold 5/5


[I 2025-09-16 10:36:09,253] Trial 74 finished with value: 0.07564449964556844 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 96, 'decoder_n_layers': 6, 'decoder_dropout': 0.2, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.012481
  📊 Trial 74 - Average CV Loss: 0.075644 (±0.120961)
  🔄 Trial 75, Fold 1/5
    Fold 1 best total loss: 0.008932
  🔄 Trial 75, Fold 2/5
    Fold 2 best total loss: 0.014945
  🔄 Trial 75, Fold 3/5
    Fold 3 best total loss: 0.017696
  🔄 Trial 75, Fold 4/5
    Fold 4 best total loss: 0.012107
  🔄 Trial 75, Fold 5/5


[I 2025-09-16 10:41:19,443] Trial 75 finished with value: 0.013292913150507957 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.2, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.012785
  📊 Trial 75 - Average CV Loss: 0.013293 (±0.002925)
  🔄 Trial 76, Fold 1/5
    Fold 1 best total loss: 0.008002
  🔄 Trial 76, Fold 2/5
    Fold 2 best total loss: 0.013307
  🔄 Trial 76, Fold 3/5
    Fold 3 best total loss: 0.015958
  🔄 Trial 76, Fold 4/5
    Fold 4 best total loss: 0.011305
  🔄 Trial 76, Fold 5/5


[I 2025-09-16 10:46:27,591] Trial 76 finished with value: 0.012380060809664428 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.1, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.013328
  📊 Trial 76 - Average CV Loss: 0.012380 (±0.002641)
  🔄 Trial 77, Fold 1/5
    Fold 1 best total loss: 0.006438
  🔄 Trial 77, Fold 2/5
    Fold 2 best total loss: 0.317852
  🔄 Trial 77, Fold 3/5
    Fold 3 best total loss: 0.015114
  🔄 Trial 77, Fold 4/5
    Fold 4 best total loss: 0.202358
  🔄 Trial 77, Fold 5/5


[I 2025-09-16 10:51:37,115] Trial 77 finished with value: 0.17749308639904485 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.1, 'current_hidden_size': 96, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.345704
  📊 Trial 77 - Average CV Loss: 0.177493 (±0.144389)
  🔄 Trial 78, Fold 1/5
    Fold 1 best total loss: 0.008670
  🔄 Trial 78, Fold 2/5
    Fold 2 best total loss: 0.020204
  🔄 Trial 78, Fold 3/5
    Fold 3 best total loss: 0.016853
  🔄 Trial 78, Fold 4/5
    Fold 4 best total loss: 0.013312
  🔄 Trial 78, Fold 5/5


[I 2025-09-16 10:55:10,084] Trial 78 finished with value: 0.08072472477797418 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 2, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.1, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.30000000000000004}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.344584
  📊 Trial 78 - Average CV Loss: 0.080725 (±0.131985)
  🔄 Trial 79, Fold 1/5
    Fold 1 best total loss: 0.226005
  🔄 Trial 79, Fold 2/5
    Fold 2 best total loss: 0.318038
  🔄 Trial 79, Fold 3/5
    Fold 3 best total loss: 0.221060
  🔄 Trial 79, Fold 4/5
    Fold 4 best total loss: 0.009707
  🔄 Trial 79, Fold 5/5


[I 2025-09-16 11:00:19,330] Trial 79 finished with value: 0.15764561225660145 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 5, 'decoder_dropout': 0.1, 'current_hidden_size': 72, 'current_n_layers': 5, 'current_dropout': 0.1}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.013417
  📊 Trial 79 - Average CV Loss: 0.157646 (±0.124184)
  🔄 Trial 80, Fold 1/5
    Fold 1 best total loss: 0.007946
  🔄 Trial 80, Fold 2/5
    Fold 2 best total loss: 0.319689
  🔄 Trial 80, Fold 3/5
    Fold 3 best total loss: 0.013316
  🔄 Trial 80, Fold 4/5
    Fold 4 best total loss: 0.198806
  🔄 Trial 80, Fold 5/5


[I 2025-09-16 11:06:59,789] Trial 80 finished with value: 0.11115607861429452 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 4, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.1, 'current_hidden_size': 48, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.016023
  📊 Trial 80 - Average CV Loss: 0.111156 (±0.126842)
  🔄 Trial 81, Fold 1/5
    Fold 1 best total loss: 0.314055
  🔄 Trial 81, Fold 2/5
    Fold 2 best total loss: 0.316850
  🔄 Trial 81, Fold 3/5
    Fold 3 best total loss: 0.017047
  🔄 Trial 81, Fold 4/5
    Fold 4 best total loss: 0.014822
  🔄 Trial 81, Fold 5/5


[I 2025-09-16 11:12:08,116] Trial 81 finished with value: 0.1365126925520599 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.2, 'decoder_hidden_size': 64, 'decoder_n_layers': 6, 'decoder_dropout': 0.4, 'current_hidden_size': 32, 'current_n_layers': 3, 'current_dropout': 0.30000000000000004}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.019788
  📊 Trial 81 - Average CV Loss: 0.136513 (±0.146115)
  🔄 Trial 82, Fold 1/5
    Fold 1 best total loss: 0.009931
  🔄 Trial 82, Fold 2/5
    Fold 2 best total loss: 0.016871
  🔄 Trial 82, Fold 3/5
    Fold 3 best total loss: 0.020703
  🔄 Trial 82, Fold 4/5
    Fold 4 best total loss: 0.016752
  🔄 Trial 82, Fold 5/5


[I 2025-09-16 11:18:55,591] Trial 82 finished with value: 0.01673024898627773 and parameters: {'lstm_hidden_size': 64, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 16, 'decoder_n_layers': 5, 'decoder_dropout': 0.1, 'current_hidden_size': 32, 'current_n_layers': 4, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.019394
  📊 Trial 82 - Average CV Loss: 0.016730 (±0.003718)
  🔄 Trial 83, Fold 1/5
    Fold 1 best total loss: 0.007881
  🔄 Trial 83, Fold 2/5
    Fold 2 best total loss: 0.019152
  🔄 Trial 83, Fold 3/5
    Fold 3 best total loss: 0.018772
  🔄 Trial 83, Fold 4/5
    Fold 4 best total loss: 0.012333
  🔄 Trial 83, Fold 5/5


[I 2025-09-16 11:24:04,840] Trial 83 finished with value: 0.013823672966100275 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.2, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.010981
  📊 Trial 83 - Average CV Loss: 0.013824 (±0.004438)
  🔄 Trial 84, Fold 1/5
    Fold 1 best total loss: 0.008209
  🔄 Trial 84, Fold 2/5
    Fold 2 best total loss: 0.018530
  🔄 Trial 84, Fold 3/5
    Fold 3 best total loss: 0.017191
  🔄 Trial 84, Fold 4/5
    Fold 4 best total loss: 0.014435
  🔄 Trial 84, Fold 5/5


[I 2025-09-16 11:29:12,211] Trial 84 finished with value: 0.01480989110423252 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.1, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.015685
  📊 Trial 84 - Average CV Loss: 0.014810 (±0.003577)
  🔄 Trial 85, Fold 1/5
    Fold 1 best total loss: 0.007891
  🔄 Trial 85, Fold 2/5
    Fold 2 best total loss: 0.315973
  🔄 Trial 85, Fold 3/5
    Fold 3 best total loss: 0.016655
  🔄 Trial 85, Fold 4/5
    Fold 4 best total loss: 0.009989
  🔄 Trial 85, Fold 5/5


[I 2025-09-16 11:34:22,434] Trial 85 finished with value: 0.07278906374704093 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.2, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.013437
  📊 Trial 85 - Average CV Loss: 0.072789 (±0.121629)
  🔄 Trial 86, Fold 1/5
    Fold 1 best total loss: 0.007337
  🔄 Trial 86, Fold 2/5
    Fold 2 best total loss: 0.014438
  🔄 Trial 86, Fold 3/5
    Fold 3 best total loss: 0.016713
  🔄 Trial 86, Fold 4/5
    Fold 4 best total loss: 0.011111
  🔄 Trial 86, Fold 5/5


[I 2025-09-16 11:39:26,613] Trial 86 finished with value: 0.01205747943604365 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.1, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.010689
  📊 Trial 86 - Average CV Loss: 0.012057 (±0.003237)
  🔄 Trial 87, Fold 1/5
    Fold 1 best total loss: 0.314176
  🔄 Trial 87, Fold 2/5
    Fold 2 best total loss: 0.016590
  🔄 Trial 87, Fold 3/5
    Fold 3 best total loss: 0.013878
  🔄 Trial 87, Fold 4/5
    Fold 4 best total loss: 0.199850
  🔄 Trial 87, Fold 5/5


[I 2025-09-16 11:42:56,624] Trial 87 finished with value: 0.11868225806392729 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 2, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.1, 'current_hidden_size': 72, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.048917
  📊 Trial 87 - Average CV Loss: 0.118682 (±0.119231)
  🔄 Trial 88, Fold 1/5
    Fold 1 best total loss: 0.315808
  🔄 Trial 88, Fold 2/5
    Fold 2 best total loss: 0.012777
  🔄 Trial 88, Fold 3/5
    Fold 3 best total loss: 0.017166
  🔄 Trial 88, Fold 4/5
    Fold 4 best total loss: 0.009143
  🔄 Trial 88, Fold 5/5


[I 2025-09-16 11:47:58,861] Trial 88 finished with value: 0.07289152217563241 and parameters: {'lstm_hidden_size': 32, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 96, 'decoder_n_layers': 6, 'decoder_dropout': 0.1, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.009564
  📊 Trial 88 - Average CV Loss: 0.072892 (±0.121492)
  🔄 Trial 89, Fold 1/5
    Fold 1 best total loss: 0.314239
  🔄 Trial 89, Fold 2/5
    Fold 2 best total loss: 0.317489
  🔄 Trial 89, Fold 3/5
    Fold 3 best total loss: 0.018254
  🔄 Trial 89, Fold 4/5
    Fold 4 best total loss: 0.011783
  🔄 Trial 89, Fold 5/5


[I 2025-09-16 11:54:33,598] Trial 89 finished with value: 0.13552333873230965 and parameters: {'lstm_hidden_size': 16, 'lstm_n_layers': 4, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.1, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.015851
  📊 Trial 89 - Average CV Loss: 0.135523 (±0.147266)
  🔄 Trial 90, Fold 1/5
    Fold 1 best total loss: 0.226023
  🔄 Trial 90, Fold 2/5
    Fold 2 best total loss: 0.317056
  🔄 Trial 90, Fold 3/5
    Fold 3 best total loss: 0.014322
  🔄 Trial 90, Fold 4/5
    Fold 4 best total loss: 0.008754
  🔄 Trial 90, Fold 5/5


[I 2025-09-16 11:58:04,046] Trial 90 finished with value: 0.18205405394546686 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 2, 'lstm_dropout': 0.1, 'decoder_hidden_size': 72, 'decoder_n_layers': 6, 'decoder_dropout': 0.1, 'current_hidden_size': 64, 'current_n_layers': 2, 'current_dropout': 0.30000000000000004}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.344116
  📊 Trial 90 - Average CV Loss: 0.182054 (±0.144630)
  🔄 Trial 91, Fold 1/5
    Fold 1 best total loss: 0.007221
  🔄 Trial 91, Fold 2/5
    Fold 2 best total loss: 0.317329
  🔄 Trial 91, Fold 3/5
    Fold 3 best total loss: 0.015954
  🔄 Trial 91, Fold 4/5
    Fold 4 best total loss: 0.014000
  🔄 Trial 91, Fold 5/5


[I 2025-09-16 12:03:07,921] Trial 91 finished with value: 0.07323384324554354 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 5, 'decoder_dropout': 0.1, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.011665
  📊 Trial 91 - Average CV Loss: 0.073234 (±0.122082)
  🔄 Trial 92, Fold 1/5
    Fold 1 best total loss: 0.011647
  🔄 Trial 92, Fold 2/5
    Fold 2 best total loss: 0.022683
  🔄 Trial 92, Fold 3/5
    Fold 3 best total loss: 0.022731
  🔄 Trial 92, Fold 4/5
    Fold 4 best total loss: 0.015702
  🔄 Trial 92, Fold 5/5


[I 2025-09-16 12:08:13,609] Trial 92 finished with value: 0.01809443477541208 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.2, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.1, 'current_hidden_size': 16, 'current_n_layers': 3, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.017709
  📊 Trial 92 - Average CV Loss: 0.018094 (±0.004242)
  🔄 Trial 93, Fold 1/5
    Fold 1 best total loss: 0.007534
  🔄 Trial 93, Fold 2/5
    Fold 2 best total loss: 0.022239
  🔄 Trial 93, Fold 3/5
    Fold 3 best total loss: 0.017122
  🔄 Trial 93, Fold 4/5
    Fold 4 best total loss: 0.010172
  🔄 Trial 93, Fold 5/5


[I 2025-09-16 12:13:19,277] Trial 93 finished with value: 0.013816012197639792 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.2, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.012013
  📊 Trial 93 - Average CV Loss: 0.013816 (±0.005251)
  🔄 Trial 94, Fold 1/5
    Fold 1 best total loss: 0.007871
  🔄 Trial 94, Fold 2/5
    Fold 2 best total loss: 0.014492
  🔄 Trial 94, Fold 3/5
    Fold 3 best total loss: 0.015727
  🔄 Trial 94, Fold 4/5
    Fold 4 best total loss: 0.009745
  🔄 Trial 94, Fold 5/5


[I 2025-09-16 12:18:25,480] Trial 94 finished with value: 0.012077829521149396 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.1, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.012555
  📊 Trial 94 - Average CV Loss: 0.012078 (±0.002916)
  🔄 Trial 95, Fold 1/5
    Fold 1 best total loss: 0.008732
  🔄 Trial 95, Fold 2/5
    Fold 2 best total loss: 0.013833
  🔄 Trial 95, Fold 3/5
    Fold 3 best total loss: 0.016663
  🔄 Trial 95, Fold 4/5
    Fold 4 best total loss: 0.016128
  🔄 Trial 95, Fold 5/5


[I 2025-09-16 12:23:33,569] Trial 95 finished with value: 0.013412799581419676 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.1, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.011708
  📊 Trial 95 - Average CV Loss: 0.013413 (±0.002930)
  🔄 Trial 96, Fold 1/5
    Fold 1 best total loss: 0.008142
  🔄 Trial 96, Fold 2/5
    Fold 2 best total loss: 0.015187
  🔄 Trial 96, Fold 3/5
    Fold 3 best total loss: 0.016931
  🔄 Trial 96, Fold 4/5
    Fold 4 best total loss: 0.011356
  🔄 Trial 96, Fold 5/5


[I 2025-09-16 12:28:43,564] Trial 96 finished with value: 0.01317641066852957 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.1, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.014266
  📊 Trial 96 - Average CV Loss: 0.013176 (±0.003098)
  🔄 Trial 97, Fold 1/5
    Fold 1 best total loss: 0.314598
  🔄 Trial 97, Fold 2/5
    Fold 2 best total loss: 0.317449
  🔄 Trial 97, Fold 3/5
    Fold 3 best total loss: 0.011683
  🔄 Trial 97, Fold 4/5
    Fold 4 best total loss: 0.199061
  🔄 Trial 97, Fold 5/5


[I 2025-09-16 12:33:54,411] Trial 97 finished with value: 0.23734210729016922 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.1, 'current_hidden_size': 72, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.343920
  📊 Trial 97 - Average CV Loss: 0.237342 (±0.123395)
  🔄 Trial 98, Fold 1/5
    Fold 1 best total loss: 0.008279
  🔄 Trial 98, Fold 2/5
    Fold 2 best total loss: 0.018672
  🔄 Trial 98, Fold 3/5
    Fold 3 best total loss: 0.016436
  🔄 Trial 98, Fold 4/5
    Fold 4 best total loss: 0.012266
  🔄 Trial 98, Fold 5/5


[I 2025-09-16 12:39:03,798] Trial 98 finished with value: 0.013547220977488904 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.1, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.012083
  📊 Trial 98 - Average CV Loss: 0.013547 (±0.003637)
  🔄 Trial 99, Fold 1/5
    Fold 1 best total loss: 0.007757
  🔄 Trial 99, Fold 2/5
    Fold 2 best total loss: 0.017473
  🔄 Trial 99, Fold 3/5
    Fold 3 best total loss: 0.014152
  🔄 Trial 99, Fold 4/5
    Fold 4 best total loss: 0.013435
  🔄 Trial 99, Fold 5/5


[I 2025-09-16 12:42:40,357] Trial 99 finished with value: 0.013038744864752516 and parameters: {'lstm_hidden_size': 64, 'lstm_n_layers': 2, 'lstm_dropout': 0.1, 'decoder_hidden_size': 16, 'decoder_n_layers': 6, 'decoder_dropout': 0.1, 'current_hidden_size': 48, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.012377
  📊 Trial 99 - Average CV Loss: 0.013039 (±0.003143)
  🔄 Trial 100, Fold 1/5
    Fold 1 best total loss: 0.315213
  🔄 Trial 100, Fold 2/5
    Fold 2 best total loss: 0.319503
  🔄 Trial 100, Fold 3/5
    Fold 3 best total loss: 0.221528
  🔄 Trial 100, Fold 4/5
    Fold 4 best total loss: 0.202100
  🔄 Trial 100, Fold 5/5


[I 2025-09-16 12:49:34,691] Trial 100 finished with value: 0.2806893017143011 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.1, 'decoder_hidden_size': 64, 'decoder_n_layers': 6, 'decoder_dropout': 0.1, 'current_hidden_size': 96, 'current_n_layers': 6, 'current_dropout': 0.30000000000000004}. Best is trial 35 with value: 0.011858979251701384.


    Fold 5 best total loss: 0.345103
  📊 Trial 100 - Average CV Loss: 0.280689 (±0.057487)
  🔄 Trial 101, Fold 1/5


[W 2025-09-16 12:49:48,353] Trial 101 failed with parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.1, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/home/sjbaek/miniforge3/envs/torchenv/lib/python3.11/site-packages/optuna/study/_optimize.py", line 201, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/tmp/ipykernel_5480/3899643224.py", line 105, in objective
    pred = model(inputs, target_seq_len)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/sjbaek/miniforge3/envs/torchenv/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/sjbaek/miniforge3/envs/torchenv/lib/python3.11/site-p


⚠️ 최적화가 사용자에 의해 중단되었습니다.

📊 OPTIMIZATION RESULTS
✅ 완료된 trials: 102
🏆 최고 성능 trial: 35
💯 최고 성능 값: 0.011859

🎯 최적 하이퍼파라미터:
   lstm_hidden_size: 72
   lstm_n_layers: 4
   lstm_dropout: 0.1
   decoder_hidden_size: 32
   decoder_n_layers: 6
   decoder_dropout: 0.1
   current_hidden_size: 72
   current_n_layers: 2
   current_dropout: 0.30000000000000004

📈 상위 5개 Trials:
   1. Trial 35: 0.011859
   2. Trial 86: 0.012057
   3. Trial 94: 0.012078
   4. Trial 47: 0.012124
   5. Trial 65: 0.012160
💾 모든 trials 결과가 저장되었습니다: bmed_optuna_trials_20250916_124948.csv
💾 SQLite 데이터베이스에 실시간 저장됨: sqlite:///bmed_optuna_study_20250915_234452.db
   - 중단 후 재시작 시 자동으로 기존 결과를 불러옵니다
   - 다른 프로세스에서 진행상황 모니터링 가능합니다
🎉 하이퍼파라미터 최적화 완료!
