In [1]:
# import libraries
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import TensorDataset, DataLoader, Subset
from sklearn.model_selection import KFold
import optuna
from datetime import datetime
from optuna.trial import TrialState
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class LayerNormLSTM(nn.Module):
    def __init__(self, input_node, hidden_node):
        super().__init__()
        self.input_node = input_node
        self.hidden_node = hidden_node

        self.w_i = nn.Linear(input_node, 4*hidden_node, bias=False)
        self.w_h = nn.Linear(hidden_node, 4*hidden_node, bias=False)

        self.ln_i = nn.LayerNorm(hidden_node)
        self.ln_f = nn.LayerNorm(hidden_node)
        self.ln_w = nn.LayerNorm(hidden_node)
        self.ln_o = nn.LayerNorm(hidden_node)
        self.ln_c = nn.LayerNorm(hidden_node)

    def forward(self, input, hidden):
        h_prev, c_prev = hidden

        gi = self.w_i(input)
        gh = self.w_h(h_prev)
        i_i, i_f, i_w, i_o = gi.chunk(4, dim=-1)
        h_i, h_f, h_w, h_o = gh.chunk(4, dim=-1)

        i_g = torch.sigmoid(self.ln_i(i_i + h_i))
        f_g = torch.sigmoid(self.ln_f(i_f + h_f))
        w_g = torch.tanh(self.ln_w(i_w + h_w))
        o_g = torch.sigmoid(self.ln_o(i_o + h_o))
        

        c_new = f_g * c_prev + i_g * w_g
        c_new = self.ln_c(c_new)

        h_new = o_g * torch.tanh(c_new)

        return h_new, c_new

In [3]:
class StateExtr(nn.Module):
    def __init__(self, input_node, hidden_node, n_layer, dropout):
        super().__init__()
        self.hidden_node = hidden_node
        self.n_layer = n_layer
        self.input_node = input_node

        self.lstm_cells = nn.ModuleList()
        self.lstm_cells.append(LayerNormLSTM(input_node, hidden_node))
        for _ in range(n_layer - 1):
            self.lstm_cells.append(LayerNormLSTM(hidden_node, hidden_node))

        self.dropout = nn.Dropout(dropout)
        self.final_layer_norm = nn.LayerNorm(hidden_node)
        self.final_dropout = nn.Dropout(dropout)

    def forward(self, x, seq_len):
        batch_size, max_len, input_node = x.size()
        device = x.device

        h_states = []
        c_states = []
        for _ in range(self.n_layer):
            h_states.append(torch.zeros(batch_size, self.hidden_node, device=device))
            c_states.append(torch.zeros(batch_size, self.hidden_node, device=device))
        
        outputs = []
        for t in range(max_len):
            x_t = x[:, t, :]
            layer_input = x_t
            for layer_idx, lstm_cell in enumerate(self.lstm_cells):
                h_new, c_new = lstm_cell(layer_input, (h_states[layer_idx], c_states[layer_idx]))
                h_states[layer_idx] = h_new
                c_states[layer_idx] = c_new

                if layer_idx < len(self.lstm_cells) - 1:
                    layer_input = self.dropout(h_new)
                else:
                    layer_input = h_new
            outputs.append(layer_input)
        
        output_tensor = torch.stack(outputs, dim=1)
        seq_len_cpu = seq_len.detach().cpu().long()
        mask = torch.arange(max_len, device='cpu')[None, :] < seq_len_cpu[:, None]
        mask = mask.float().to(device).unsqueeze(-1)
        masked_output = output_tensor * mask
        normalized = self.final_layer_norm(masked_output)
        return self.final_dropout(normalized)

In [4]:
class PhysicalChangeDecoder(nn.Module):
    def __init__(self, input_node, output_node, n_layer, hidden_node, dropout):
        super().__init__()
        self.layers = nn.ModuleList()
        
        self.layers.append(nn.Linear(input_node, hidden_node))
        self.layers.append(nn.LayerNorm(hidden_node))
        self.layers.append(nn.ReLU())
        self.layers.append(nn.Dropout(dropout))

        for i in range(n_layer - 1):
            self.layers.append(nn.Linear(hidden_node, hidden_node))
            self.layers.append(nn.LayerNorm(hidden_node))
            self.layers.append(nn.ReLU())
            self.layers.append(nn.Dropout(dropout))

        self.layers.append(nn.Linear(hidden_node, output_node))
        self.layers.append(nn.Sigmoid())
    
    def forward(self, hidden_states):
        x = hidden_states
        for layer in self.layers:
            x = layer(x)
        return x

In [5]:
class CurrentPredictor(nn.Module):
    def __init__(self, input_node, hidden_node, n_layer, dropout):
        super().__init__()
        self.layers = nn.ModuleList()
        
        self.layers.append(nn.Linear(input_node, hidden_node))
        self.layers.append(nn.LayerNorm(hidden_node))
        self.layers.append(nn.ReLU())
        self.layers.append(nn.Dropout(dropout))
        
        for i in range(n_layer - 1):
            self.layers.append(nn.Linear(hidden_node, hidden_node))
            self.layers.append(nn.LayerNorm(hidden_node))
            self.layers.append(nn.ReLU())
            self.layers.append(nn.Dropout(dropout))
        
        self.layers.append(nn.Linear(hidden_node, 1))
    
    def forward(self, new_state):
        x = new_state
        for layer in self.layers:
            x = layer(x)
        return x

In [6]:
class PhysicsConstraintLayer(nn.Module):
    def __init__(self, range_mm, current_predictor, eps=1e-2):
        super().__init__()
        self.sps = eps
        self.current_predictor = current_predictor
        self.register_buffer('range_mm_tensor', self._convert_range_to_tensor(range_mm))

    def _convert_range_to_tensor(self, range_mm):
        feature_names = ['V','E','VF','VA','VB','CFLA','CALA','CFK','CBK','I']
        ranges = torch.zeros(len(feature_names),2)
        for i, name in enumerate(feature_names):
            if name in range_mm:
                ranges[i, 0] = range_mm[name]['min']
                ranges[i, 1] = range_mm[name]['max']
        return ranges
    
    def normalize(self, data, feature_idx):
        min_val = self.range_mm_tensor[feature_idx, 0]
        max_val = self.range_mm_tensor[feature_idx, 1]
        return (data - min_val) / (max_val - min_val)

    def denormalize(self, data, feature_idx):
        min_val = self.range_mm_tensor[feature_idx, 0]
        max_val = self.range_mm_tensor[feature_idx, 1]
        return data * (max_val - min_val) + min_val

    def forward(self, physical_changes, current_state):
        V_idx, E_idx, VF_idx, VA_idx, VB_idx = 0, 1, 2, 3, 4
        CFLA_idx, CALA_idx, CFK_idx, CBK_idx, I_idx = 5, 6, 7, 8, 9

        VF = self.denormalize(current_state[..., 2:3], VF_idx)
        VA = self.denormalize(current_state[..., 3:4], VA_idx)
        VB = self.denormalize(current_state[..., 4:5], VB_idx)
        CFLA = self.denormalize(current_state[..., 5:6], CFLA_idx)
        CALA = self.denormalize(current_state[..., 6:7], CALA_idx)
        CFK = self.denormalize(current_state[..., 7:8], CFK_idx)
        CBK = self.denormalize(current_state[..., 8:9], CBK_idx)

        NFLA = CFLA * VF
        NALA = CALA * VA
        NFK = CFK * VF
        NBK = CBK * VB

        rdVA = physical_changes[..., 0:1]
        rdVB = physical_changes[..., 1:2]
        rLA2K = physical_changes[..., 2:3]
        rdNBK = physical_changes[..., 3:4]

        dVA = VF*(rdVA -0.5)
        dVB = VF*(rdVB - 0.5)
        dNBK = NFK*rdNBK
        dNALA = dNBK * rLA2K

        nVF = VF - dVA - dVB
        nVA = VA + dVA
        nVB = VB + dVB
        
        nNFLA = NFLA - dNALA
        nNALA = NALA + dNALA
        nNFK = NFK - dNBK
        nNBK = NBK + dNBK

        nCFLA = nNFLA / nVF
        nCALA = nNALA / nVA
        nCFK = nNFK / nVF
        nCBK = nNBK / nVB

        V = current_state[..., 0:1]
        E = current_state[..., 1:2]
        nVF_norm = self.normalize(nVF, VF_idx)
        nVA_norm = self.normalize(nVA, VA_idx)
        nVB_norm = self.normalize(nVB, VB_idx)
        nCFLA_norm = self.normalize(nCFLA, CFLA_idx)
        nCALA_norm = self.normalize(nCALA, CALA_idx)
        nCFK_norm = self.normalize(nCFK, CFK_idx)
        nCBK_norm = self.normalize(nCBK, CBK_idx)

        temp_state = torch.cat([
            V, E, nVF_norm, nVA_norm, nVB_norm, nCFLA_norm, nCALA_norm, nCFK_norm, nCBK_norm
        ], dim=-1)
        
        nI_pred_norm = self.current_predictor(temp_state)
        nI_real = self.denormalize(nI_pred_norm, I_idx)
        nI_real = torch.clamp(nI_real, min=0.0)
        nI_norm = self.normalize(nI_real, I_idx)

        next_state = torch.cat([
            V, E, nVF_norm, nVA_norm, nVB_norm, nCFLA_norm, nCALA_norm, nCFK_norm, nCBK_norm, nI_norm
        ], dim=-1)
        
        return next_state

In [7]:
class BMEDAutoregressiveModel(nn.Module):
    def __init__(self, state_extr_params, decoder_params, current_predictor_params, range_mm):
        super().__init__()
        self.state_extr = StateExtr(**state_extr_params)
        self.physical_decoder = PhysicalChangeDecoder(**decoder_params)
        self.current_predictor = CurrentPredictor(**current_predictor_params)
        self.physics_constraint = PhysicsConstraintLayer(range_mm, self.current_predictor)

    def forward(self, x, seq_len):
        hidden_states = self.state_extr(x, seq_len)
        physical_changes = self.physical_decoder(hidden_states)
        new_x = self.physics_constraint(physical_changes, x)
        return new_x

In [8]:
class NoamScheduler:
    def __init__(self, optimizer, model_size, warmup_epochs, factor=1.0):
        self.optimizer = optimizer
        self.model_size = model_size
        self.warmup_epochs = warmup_epochs
        self.factor = 1
        self.epoch_num = 0

    def step_epoch(self):
        self.epoch_num += 1
        lr = self.factor * (
            self.model_size ** (-0.5) *
            min(self.epoch_num ** (-0.5), self.epoch_num * self.warmup_epochs ** (-1.5))
        )
        for param_group in self.optimizer.param_groups:
            param_group['lr'] = lr
        return lr

In [9]:
# 유틸리티 함수들
def df_treat(name):
    df = pd.read_csv(name)
    ndf = pd.DataFrame()
    range_mm={
        'V': {'min':df['V'].min()*0.8, 'max': df['V'].max()*1.2},
        'E': {'min':df['E'].min()*0.8, 'max': df['E'].max()*1.2},
        'VF': {'min':df['VF'].min()*0.8, 'max': df['VF'].max()*1.2},
        'VA': {'min':df['VA'].min()*0.8, 'max': df['VA'].max()*1.2},
        'VB': {'min':df['VB'].min()*0.8, 'max': df['VB'].max()*1.2},
        'CFLA': {'min':0, 'max': df['CFLA'].max()*1.2},
        'CALA': {'min':0, 'max': df['CALA'].max()*1.2},
        'CFK': {'min':0, 'max': df['CFK'].max()*1.2},
        'CBK': {'min':0, 'max': df['CBK'].max()*1.2},
        'I': {'min':0, 'max': df['I'].max()*1.2},
    }
    ndf['exp'] = df['exp']; ndf['t'] = df['t']

    for col in ['V', 'E', 'VF', 'VA', 'VB', 'CFLA', 'CALA', 'CFK', 'CBK', 'I']:
        if col in range_mm:
            ndf[col] = (df[col] - range_mm[col]['min'])/(range_mm[col]['max'] - range_mm[col]['min'])
        else:
            ndf[col] = df[col]

    exp_num_list = sorted(ndf['exp'].unique())
    return df, ndf, range_mm, exp_num_list

def seq_data(ndf, exp_num_list):
    seq = []
    feature_cols = ['V', 'E', 'VF', 'VA', 'VB', 'CFLA', 'CALA', 'CFK', 'CBK', 'I']
    for exp in exp_num_list:
        exp_df = ndf[ndf['exp'] == exp]
        seq.append(exp_df[feature_cols].values)
    return seq

def pad_seq(seq):
    max_len = max([len(s) for s in seq])
    seq_len = [len(s) for s in seq]
    pad_seq = pad_sequence([torch.tensor(s) for s in seq], batch_first=True, padding_value=-1)
    return pad_seq, seq_len, max_len

def gen_dataset(pad_seq, seq_len):
    input_tensor = pad_seq.float()
    seq_len_tensor = torch.tensor(seq_len)
    dataset = TensorDataset(input_tensor, seq_len_tensor)
    return dataset

def masked_mse_loss(pred, target, seq_len):
    batch_size, max_len, features = pred.shape
    seq_len_cpu = seq_len.detach().cpu().long()
    mask = torch.arange(max_len, device='cpu')[None, :] < seq_len_cpu[:, None]
    mask = mask.float().to(pred.device)
    loss = F.mse_loss(pred, target, reduction='none')
    masked_loss = loss * mask.unsqueeze(-1)
    total_loss = masked_loss.sum()
    total_elements = mask.sum()
    masked_loss = total_loss / total_elements
    return masked_loss

def tf_data(input_seq, seq_len):
    inputs = input_seq[:, :-1, :-1]
    targets = input_seq[:, 1:, :]
    target_seq_len = seq_len - 1
    return inputs, targets, target_seq_len

In [10]:
# Optuna 목적 함수
def objective(trial):
    """
    Optuna trial을 위한 목적 함수
    K-fold cross validation을 사용하여 하이퍼파라미터 최적화
    """
    
    # 1. 하이퍼파라미터 제안
    # LSTM StateExtractor 파라미터
    lstm_hidden_size = trial.suggest_categorical('lstm_hidden_size', [16, 32, 48, 64, 72, 96])
    lstm_n_layers = trial.suggest_int('lstm_n_layers', 2, 6, step=1)
    lstm_dropout = trial.suggest_float('lstm_dropout', 0.1, 0.5, step=0.1)
    
    # PhysicalChangeDecoder 파라미터
    decoder_hidden_size = trial.suggest_categorical('decoder_hidden_size', [16, 32, 48, 64, 72, 96])
    decoder_n_layers = trial.suggest_int('decoder_n_layers', 2, 6, step=1)
    decoder_dropout = trial.suggest_float('decoder_dropout', 0.1, 0.6, step=0.1)
    
    # CurrentPredictor 파라미터
    current_hidden_size = trial.suggest_categorical('current_hidden_size', [16, 32, 48, 64, 72, 96])
    current_n_layers = trial.suggest_int('current_n_layers', 2, 6, step=1)
    current_dropout = trial.suggest_float('current_dropout', 0.1, 0.6, step=0.1)
    
    # 2. K-fold Cross Validation
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    n_splits = 5
    kfold = KFold(n_splits=n_splits, shuffle=True, random_state=42)
    
    fold_losses = []
    
    # 데이터 로드 (global 변수 사용)
    indices = list(range(len(dataset)))
    
    for fold, (train_idx, val_idx) in enumerate(kfold.split(indices)):
        print(f"  🔄 Trial {trial.number}, Fold {fold+1}/{n_splits}")
        
        # 폴드별 데이터셋 준비
        train_subset = Subset(dataset, train_idx)
        val_subset = Subset(dataset, val_idx)
        
        train_loader = DataLoader(train_subset, batch_size=3, shuffle=True)
        val_loader = DataLoader(val_subset, batch_size=3, shuffle=False)
        
        # 3. 모델 파라미터 설정
        state_extr_params = {
            'input_node': 9,
            'hidden_node': lstm_hidden_size,
            'n_layer': lstm_n_layers,
            'dropout': lstm_dropout
        }
        
        decoder_params = {
            'input_node': lstm_hidden_size,
            'hidden_node': decoder_hidden_size,
            'n_layer': decoder_n_layers,
            'dropout': decoder_dropout,
            'output_node': 4
        }
        
        current_predictor_params = {
            'input_node': 9,
            'hidden_node': current_hidden_size,
            'n_layer': current_n_layers,
            'dropout': current_dropout
        }
        
        # 4. 모델 초기화
        model = BMEDAutoregressiveModel(state_extr_params, decoder_params, current_predictor_params, range_mm)
        model = model.to(device)
        
        # 5. 옵티마이저 및 스케줄러 설정
        optimizer = torch.optim.AdamW(model.parameters(), lr=1.0)
        
        # 총 에포크 수와 warmup 에포크 계산
        total_epochs = 100  # Optuna 최적화를 위해 에포크 수 감소
        warmup_epochs = int(total_epochs * 0.1)
        
        scheduler = NoamScheduler(
            optimizer, 
            model_size=lstm_hidden_size,
            warmup_epochs=warmup_epochs,
            factor=1
        )
        
        # 6. 훈련
        best_total_loss = float('inf')
        
        for epoch in range(total_epochs):
            # Learning rate 업데이트
            current_lr = scheduler.step_epoch()
            
            # 훈련
            model.train()
            train_loss = 0.0
            train_batches = 0
            
            for input_seq, seq_len in train_loader:
                try:
                    input_seq = input_seq.to(device)
                    seq_len = seq_len.to(device)
                    
                    inputs, targets, target_seq_len = tf_data(input_seq, seq_len)
                    
                    optimizer.zero_grad()
                    pred = model(inputs, target_seq_len)
                    loss = masked_mse_loss(pred, targets, target_seq_len)
                    
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                    optimizer.step()
                    
                    train_loss += loss.item()
                    train_batches += 1
                    
                except Exception as e:
                    print(f"❌ Error in training: {str(e)}")
                    continue
            
            if train_batches == 0:
                break
                
            train_loss = train_loss / train_batches
            
            # 검증
            model.eval()
            val_loss = 0.0
            val_batches = 0
            
            with torch.no_grad():
                for input_seq, seq_len in val_loader:
                    try:
                        input_seq = input_seq.to(device)
                        seq_len = seq_len.to(device)
                        
                        inputs, targets, target_seq_len = tf_data(input_seq, seq_len)
                        
                        pred = model(inputs, target_seq_len)
                        loss = masked_mse_loss(pred, targets, target_seq_len)
                        
                        val_loss += loss.item()
                        val_batches += 1
                        
                    except Exception as e:
                        continue
            
            if val_batches == 0:
                break
                
            val_loss = val_loss / val_batches
            
            # Calculate total loss
            total_loss = train_loss + val_loss
            
            # Early stopping
            if total_loss < best_total_loss:
                best_total_loss = total_loss
        
        fold_losses.append(best_total_loss)
        print(f"    Fold {fold+1} best total loss: {best_total_loss:.6f}")
        
        # 메모리 정리
        del model, optimizer, scheduler
        torch.cuda.empty_cache()
    
    # 7. K-fold 평균 손실 반환
    avg_loss = np.mean(fold_losses)
    std_loss = np.std(fold_losses)
    
    print(f"  📊 Trial {trial.number} - Average CV Loss: {avg_loss:.6f} (±{std_loss:.6f})")
    
    return avg_loss

In [11]:
# 메인 최적화 함수
def run_optuna_optimization():
    """Optuna를 사용한 하이퍼파라미터 최적화 실행"""
    
    print("🚀 BMED TF Model Hyperparameter Optimization with Optuna")
    print("="*80)
    
    # 전역 데이터 로드
    global dataset, range_mm
    
    print("📋 데이터 로드 중...")
    df, ndf, range_mm, exp_num_list = df_treat('BMED_DATA_AG.csv')
    seq = seq_data(ndf, exp_num_list)
    pad, seq_len, max_len = pad_seq(seq)
    dataset = gen_dataset(pad, seq_len)
    
    print(f"   - 총 실험 개수: {len(exp_num_list)}")
    print(f"   - 총 데이터 포인트: {len(dataset)}")
    print(f"   - 최대 시퀀스 길이: {max_len}")
    
    # SQLite 데이터베이스를 사용한 Optuna study 생성
    #timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    timestamp = '20250915_234452'
    db_url = f"sqlite:///bmed_hpopt_study.db"
    
    study = optuna.create_study(
        direction='minimize',
        study_name='bmed_tf_optimization',
        sampler=optuna.samplers.TPESampler(seed=42),
        storage=db_url,
        load_if_exists=True
    )
    
    # 최적화 실행
    n_trials = 100
    print(f"🔍 최적화 시작 (총 {n_trials} trials)")
    
    try:
        study.optimize(objective, n_trials=n_trials, timeout=None)
    except KeyboardInterrupt:
        print("\n⚠️ 최적화가 사용자에 의해 중단되었습니다.")
    
    # 결과 분석
    print("\n" + "="*80)
    print("📊 OPTIMIZATION RESULTS")
    print("="*80)
    
    print(f"✅ 완료된 trials: {len(study.trials)}")
    print(f"🏆 최고 성능 trial: {study.best_trial.number}")
    print(f"💯 최고 성능 값: {study.best_value:.6f}")
    
    print(f"\n🎯 최적 하이퍼파라미터:")
    for key, value in study.best_params.items():
        print(f"   {key}: {value}")
    
    # 상위 5개 trial 정보
    print(f"\n📈 상위 5개 Trials:")
    trials_df = study.trials_dataframe().sort_values('value').head(5)
    for idx, (_, trial) in enumerate(trials_df.iterrows()):
        print(f"   {idx+1}. Trial {int(trial['number'])}: {trial['value']:.6f}")
    
    # 결과 저장
    result_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    # Trials 결과 CSV로 저장
    trials_file = f"bmed_optuna_trials_{result_timestamp}.csv"
    trials_df = study.trials_dataframe()
    trials_df.to_csv(trials_file, index=False)
    print(f"💾 모든 trials 결과가 저장되었습니다: {trials_file}")
    
    # SQLite 데이터베이스 정보
    print(f"💾 SQLite 데이터베이스에 실시간 저장됨: {db_url}")
    print(f"   - 중단 후 재시작 시 자동으로 기존 결과를 불러옵니다")
    print(f"   - 다른 프로세스에서 진행상황 모니터링 가능합니다")
    
    print("="*80)
    print("🎉 하이퍼파라미터 최적화 완료!")
    
    return study

if __name__ == "__main__":
    study = run_optuna_optimization()

🚀 BMED TF Model Hyperparameter Optimization with Optuna
📋 데이터 로드 중...
   - 총 실험 개수: 15
   - 총 데이터 포인트: 15
   - 최대 시퀀스 길이: 37


[I 2025-09-29 19:08:44,198] Using an existing study with name 'bmed_tf_optimization' instead of creating a new one.


🔍 최적화 시작 (총 100 trials)
  🔄 Trial 13, Fold 1/5
    Fold 1 best total loss: 0.016504
  🔄 Trial 13, Fold 2/5
    Fold 2 best total loss: 0.319265
  🔄 Trial 13, Fold 3/5
    Fold 3 best total loss: 0.025245
  🔄 Trial 13, Fold 4/5
    Fold 4 best total loss: 0.019693
  🔄 Trial 13, Fold 5/5


[I 2025-09-29 19:16:56,095] Trial 13 finished with value: 0.0816689093131572 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 5, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 96, 'decoder_n_layers': 4, 'decoder_dropout': 0.6, 'current_hidden_size': 48, 'current_n_layers': 2, 'current_dropout': 0.6}. Best is trial 10 with value: 0.02191737436223775.


    Fold 5 best total loss: 0.027637
  📊 Trial 13 - Average CV Loss: 0.081669 (±0.118863)
  🔄 Trial 14, Fold 1/5
    Fold 1 best total loss: 0.013320
  🔄 Trial 14, Fold 2/5
    Fold 2 best total loss: 0.025548
  🔄 Trial 14, Fold 3/5
    Fold 3 best total loss: 0.024671
  🔄 Trial 14, Fold 4/5
    Fold 4 best total loss: 0.018073
  🔄 Trial 14, Fold 5/5


[I 2025-09-29 19:24:09,138] Trial 14 finished with value: 0.02085857084020972 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 96, 'decoder_n_layers': 4, 'decoder_dropout': 0.6, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.5}. Best is trial 14 with value: 0.02085857084020972.


    Fold 5 best total loss: 0.022680
  📊 Trial 14 - Average CV Loss: 0.020859 (±0.004570)
  🔄 Trial 15, Fold 1/5
    Fold 1 best total loss: 0.013427
  🔄 Trial 15, Fold 2/5
    Fold 2 best total loss: 0.025672
  🔄 Trial 15, Fold 3/5
    Fold 3 best total loss: 0.022725
  🔄 Trial 15, Fold 4/5
    Fold 4 best total loss: 0.020386
  🔄 Trial 15, Fold 5/5


[I 2025-09-29 19:31:07,534] Trial 15 finished with value: 0.02015695902518928 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 96, 'decoder_n_layers': 3, 'decoder_dropout': 0.5, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.5}. Best is trial 15 with value: 0.02015695902518928.


    Fold 5 best total loss: 0.018575
  📊 Trial 15 - Average CV Loss: 0.020157 (±0.004120)
  🔄 Trial 16, Fold 1/5
    Fold 1 best total loss: 0.020895
  🔄 Trial 16, Fold 2/5
    Fold 2 best total loss: 0.024323
  🔄 Trial 16, Fold 3/5
    Fold 3 best total loss: 0.032728
  🔄 Trial 16, Fold 4/5
    Fold 4 best total loss: 0.027955
  🔄 Trial 16, Fold 5/5


[I 2025-09-29 19:37:55,518] Trial 16 finished with value: 0.02734473315067589 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 96, 'decoder_n_layers': 3, 'decoder_dropout': 0.5, 'current_hidden_size': 32, 'current_n_layers': 3, 'current_dropout': 0.5}. Best is trial 15 with value: 0.02015695902518928.


    Fold 5 best total loss: 0.030822
  📊 Trial 16 - Average CV Loss: 0.027345 (±0.004294)
  🔄 Trial 17, Fold 1/5
    Fold 1 best total loss: 0.028199
  🔄 Trial 17, Fold 2/5
    Fold 2 best total loss: 0.109707
  🔄 Trial 17, Fold 3/5
    Fold 3 best total loss: 0.034742
  🔄 Trial 17, Fold 4/5
    Fold 4 best total loss: 0.035096
  🔄 Trial 17, Fold 5/5


[I 2025-09-29 19:44:47,071] Trial 17 finished with value: 0.053200533823110165 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 96, 'decoder_n_layers': 3, 'decoder_dropout': 0.5, 'current_hidden_size': 32, 'current_n_layers': 4, 'current_dropout': 0.5}. Best is trial 15 with value: 0.02015695902518928.


    Fold 5 best total loss: 0.058259
  📊 Trial 17 - Average CV Loss: 0.053201 (±0.030041)
  🔄 Trial 18, Fold 1/5
    Fold 1 best total loss: 0.032605
  🔄 Trial 18, Fold 2/5
    Fold 2 best total loss: 0.032154
  🔄 Trial 18, Fold 3/5
    Fold 3 best total loss: 0.041565
  🔄 Trial 18, Fold 4/5
    Fold 4 best total loss: 0.033009
  🔄 Trial 18, Fold 5/5


[I 2025-09-29 19:49:57,205] Trial 18 finished with value: 0.03667394984513521 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 3, 'lstm_dropout': 0.2, 'decoder_hidden_size': 32, 'decoder_n_layers': 4, 'decoder_dropout': 0.5, 'current_hidden_size': 32, 'current_n_layers': 4, 'current_dropout': 0.5}. Best is trial 15 with value: 0.02015695902518928.


    Fold 5 best total loss: 0.044037
  📊 Trial 18 - Average CV Loss: 0.036674 (±0.005071)
  🔄 Trial 19, Fold 1/5
    Fold 1 best total loss: 0.011934
  🔄 Trial 19, Fold 2/5
    Fold 2 best total loss: 0.020710
  🔄 Trial 19, Fold 3/5
    Fold 3 best total loss: 0.021861
  🔄 Trial 19, Fold 4/5
    Fold 4 best total loss: 0.015651
  🔄 Trial 19, Fold 5/5


[I 2025-09-29 19:55:08,364] Trial 19 finished with value: 0.017774961609393358 and parameters: {'lstm_hidden_size': 64, 'lstm_n_layers': 3, 'lstm_dropout': 0.4, 'decoder_hidden_size': 72, 'decoder_n_layers': 2, 'decoder_dropout': 0.5, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.4}. Best is trial 19 with value: 0.017774961609393358.


    Fold 5 best total loss: 0.018719
  📊 Trial 19 - Average CV Loss: 0.017775 (±0.003601)
  🔄 Trial 20, Fold 1/5
    Fold 1 best total loss: 0.013520
  🔄 Trial 20, Fold 2/5
    Fold 2 best total loss: 0.021744
  🔄 Trial 20, Fold 3/5
    Fold 3 best total loss: 0.021832
  🔄 Trial 20, Fold 4/5
    Fold 4 best total loss: 0.016808
  🔄 Trial 20, Fold 5/5


[I 2025-09-29 20:00:20,479] Trial 20 finished with value: 0.01941157344263047 and parameters: {'lstm_hidden_size': 64, 'lstm_n_layers': 3, 'lstm_dropout': 0.4, 'decoder_hidden_size': 72, 'decoder_n_layers': 2, 'decoder_dropout': 0.5, 'current_hidden_size': 32, 'current_n_layers': 3, 'current_dropout': 0.4}. Best is trial 19 with value: 0.017774961609393358.


    Fold 5 best total loss: 0.023154
  📊 Trial 20 - Average CV Loss: 0.019412 (±0.003655)
  🔄 Trial 21, Fold 1/5
    Fold 1 best total loss: 0.317323
  🔄 Trial 21, Fold 2/5
    Fold 2 best total loss: 0.319882
  🔄 Trial 21, Fold 3/5
    Fold 3 best total loss: 0.225906
  🔄 Trial 21, Fold 4/5
    Fold 4 best total loss: 0.201773
  🔄 Trial 21, Fold 5/5


[I 2025-09-29 20:05:18,060] Trial 21 finished with value: 0.2821568480692804 and parameters: {'lstm_hidden_size': 64, 'lstm_n_layers': 3, 'lstm_dropout': 0.4, 'decoder_hidden_size': 72, 'decoder_n_layers': 2, 'decoder_dropout': 0.4, 'current_hidden_size': 72, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 19 with value: 0.017774961609393358.


    Fold 5 best total loss: 0.345900
  📊 Trial 21 - Average CV Loss: 0.282157 (±0.057182)
  🔄 Trial 22, Fold 1/5
    Fold 1 best total loss: 0.012259
  🔄 Trial 22, Fold 2/5
    Fold 2 best total loss: 0.019911
  🔄 Trial 22, Fold 3/5
    Fold 3 best total loss: 0.023229
  🔄 Trial 22, Fold 4/5
    Fold 4 best total loss: 0.014308
  🔄 Trial 22, Fold 5/5


[I 2025-09-29 20:10:16,113] Trial 22 finished with value: 0.01805626475252211 and parameters: {'lstm_hidden_size': 64, 'lstm_n_layers': 3, 'lstm_dropout': 0.4, 'decoder_hidden_size': 72, 'decoder_n_layers': 2, 'decoder_dropout': 0.5, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.4}. Best is trial 19 with value: 0.017774961609393358.


    Fold 5 best total loss: 0.020574
  📊 Trial 22 - Average CV Loss: 0.018056 (±0.004104)
  🔄 Trial 23, Fold 1/5
    Fold 1 best total loss: 0.317916
  🔄 Trial 23, Fold 2/5
    Fold 2 best total loss: 0.021760
  🔄 Trial 23, Fold 3/5
    Fold 3 best total loss: 0.025080
  🔄 Trial 23, Fold 4/5
    Fold 4 best total loss: 0.023167
  🔄 Trial 23, Fold 5/5


[I 2025-09-29 20:13:41,697] Trial 23 finished with value: 0.08256013123318554 and parameters: {'lstm_hidden_size': 64, 'lstm_n_layers': 2, 'lstm_dropout': 0.4, 'decoder_hidden_size': 72, 'decoder_n_layers': 2, 'decoder_dropout': 0.5, 'current_hidden_size': 32, 'current_n_layers': 3, 'current_dropout': 0.4}. Best is trial 19 with value: 0.017774961609393358.


    Fold 5 best total loss: 0.024878
  📊 Trial 23 - Average CV Loss: 0.082560 (±0.117684)
  🔄 Trial 24, Fold 1/5
    Fold 1 best total loss: 0.010043
  🔄 Trial 24, Fold 2/5
    Fold 2 best total loss: 0.018906
  🔄 Trial 24, Fold 3/5
    Fold 3 best total loss: 0.021450
  🔄 Trial 24, Fold 4/5
    Fold 4 best total loss: 0.017306
  🔄 Trial 24, Fold 5/5


[I 2025-09-29 20:18:38,294] Trial 24 finished with value: 0.016999375366140157 and parameters: {'lstm_hidden_size': 64, 'lstm_n_layers': 3, 'lstm_dropout': 0.4, 'decoder_hidden_size': 72, 'decoder_n_layers': 2, 'decoder_dropout': 0.4, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.30000000000000004}. Best is trial 24 with value: 0.016999375366140157.


    Fold 5 best total loss: 0.017292
  📊 Trial 24 - Average CV Loss: 0.016999 (±0.003795)
  🔄 Trial 25, Fold 1/5
    Fold 1 best total loss: 0.009263
  🔄 Trial 25, Fold 2/5
    Fold 2 best total loss: 0.019447
  🔄 Trial 25, Fold 3/5
    Fold 3 best total loss: 0.019501
  🔄 Trial 25, Fold 4/5
    Fold 4 best total loss: 0.011440
  🔄 Trial 25, Fold 5/5


[I 2025-09-29 20:23:47,224] Trial 25 finished with value: 0.01515727280639112 and parameters: {'lstm_hidden_size': 64, 'lstm_n_layers': 3, 'lstm_dropout': 0.4, 'decoder_hidden_size': 72, 'decoder_n_layers': 2, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.30000000000000004}. Best is trial 25 with value: 0.01515727280639112.


    Fold 5 best total loss: 0.016136
  📊 Trial 25 - Average CV Loss: 0.015157 (±0.004166)
  🔄 Trial 26, Fold 1/5
    Fold 1 best total loss: 0.009379
  🔄 Trial 26, Fold 2/5
    Fold 2 best total loss: 0.017493
  🔄 Trial 26, Fold 3/5
    Fold 3 best total loss: 0.019678
  🔄 Trial 26, Fold 4/5
    Fold 4 best total loss: 0.014946
  🔄 Trial 26, Fold 5/5


[I 2025-09-29 20:27:21,227] Trial 26 finished with value: 0.01529569566482678 and parameters: {'lstm_hidden_size': 64, 'lstm_n_layers': 2, 'lstm_dropout': 0.5, 'decoder_hidden_size': 72, 'decoder_n_layers': 2, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.30000000000000004}. Best is trial 25 with value: 0.01515727280639112.


    Fold 5 best total loss: 0.014983
  📊 Trial 26 - Average CV Loss: 0.015296 (±0.003443)
  🔄 Trial 27, Fold 1/5
    Fold 1 best total loss: 0.010633
  🔄 Trial 27, Fold 2/5
    Fold 2 best total loss: 0.015048
  🔄 Trial 27, Fold 3/5
    Fold 3 best total loss: 0.021227
  🔄 Trial 27, Fold 4/5
    Fold 4 best total loss: 0.015523
  🔄 Trial 27, Fold 5/5


[I 2025-09-29 20:30:45,384] Trial 27 finished with value: 0.016069229878485202 and parameters: {'lstm_hidden_size': 64, 'lstm_n_layers': 2, 'lstm_dropout': 0.5, 'decoder_hidden_size': 72, 'decoder_n_layers': 3, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.30000000000000004}. Best is trial 25 with value: 0.01515727280639112.


    Fold 5 best total loss: 0.017915
  📊 Trial 27 - Average CV Loss: 0.016069 (±0.003490)
  🔄 Trial 28, Fold 1/5
    Fold 1 best total loss: 0.315865
  🔄 Trial 28, Fold 2/5
    Fold 2 best total loss: 0.317866
  🔄 Trial 28, Fold 3/5
    Fold 3 best total loss: 0.077913
  🔄 Trial 28, Fold 4/5
    Fold 4 best total loss: 0.202736
  🔄 Trial 28, Fold 5/5


[I 2025-09-29 20:34:11,204] Trial 28 finished with value: 0.20721907829865813 and parameters: {'lstm_hidden_size': 16, 'lstm_n_layers': 2, 'lstm_dropout': 0.5, 'decoder_hidden_size': 72, 'decoder_n_layers': 3, 'decoder_dropout': 0.2, 'current_hidden_size': 32, 'current_n_layers': 6, 'current_dropout': 0.30000000000000004}. Best is trial 25 with value: 0.01515727280639112.


    Fold 5 best total loss: 0.121716
  📊 Trial 28 - Average CV Loss: 0.207219 (±0.098079)
  🔄 Trial 29, Fold 1/5
    Fold 1 best total loss: 0.316532
  🔄 Trial 29, Fold 2/5
    Fold 2 best total loss: 0.318557
  🔄 Trial 29, Fold 3/5
    Fold 3 best total loss: 0.015553
  🔄 Trial 29, Fold 4/5
    Fold 4 best total loss: 0.010769
  🔄 Trial 29, Fold 5/5


[I 2025-09-29 20:37:39,407] Trial 29 finished with value: 0.20131087851477786 and parameters: {'lstm_hidden_size': 64, 'lstm_n_layers': 2, 'lstm_dropout': 0.5, 'decoder_hidden_size': 72, 'decoder_n_layers': 3, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 72, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 25 with value: 0.01515727280639112.


    Fold 5 best total loss: 0.345144
  📊 Trial 29 - Average CV Loss: 0.201311 (±0.153963)
  🔄 Trial 30, Fold 1/5
    Fold 1 best total loss: 0.014967
  🔄 Trial 30, Fold 2/5
    Fold 2 best total loss: 0.026774
  🔄 Trial 30, Fold 3/5
    Fold 3 best total loss: 0.027567
  🔄 Trial 30, Fold 4/5
    Fold 4 best total loss: 0.021299
  🔄 Trial 30, Fold 5/5


[I 2025-09-29 20:41:11,580] Trial 30 finished with value: 0.08727262308821082 and parameters: {'lstm_hidden_size': 64, 'lstm_n_layers': 2, 'lstm_dropout': 0.5, 'decoder_hidden_size': 64, 'decoder_n_layers': 2, 'decoder_dropout': 0.1, 'current_hidden_size': 16, 'current_n_layers': 3, 'current_dropout': 0.30000000000000004}. Best is trial 25 with value: 0.01515727280639112.


    Fold 5 best total loss: 0.345756
  📊 Trial 30 - Average CV Loss: 0.087273 (±0.129321)
  🔄 Trial 31, Fold 1/5
    Fold 1 best total loss: 0.316133
  🔄 Trial 31, Fold 2/5
    Fold 2 best total loss: 0.319043
  🔄 Trial 31, Fold 3/5
    Fold 3 best total loss: 0.017052
  🔄 Trial 31, Fold 4/5
    Fold 4 best total loss: 0.013495
  🔄 Trial 31, Fold 5/5


[I 2025-09-29 20:44:40,808] Trial 31 finished with value: 0.17749768403591587 and parameters: {'lstm_hidden_size': 64, 'lstm_n_layers': 2, 'lstm_dropout': 0.5, 'decoder_hidden_size': 72, 'decoder_n_layers': 3, 'decoder_dropout': 0.2, 'current_hidden_size': 96, 'current_n_layers': 4, 'current_dropout': 0.30000000000000004}. Best is trial 25 with value: 0.01515727280639112.


    Fold 5 best total loss: 0.221765
  📊 Trial 31 - Average CV Loss: 0.177498 (±0.137007)
  🔄 Trial 32, Fold 1/5
    Fold 1 best total loss: 0.009617
  🔄 Trial 32, Fold 2/5
    Fold 2 best total loss: 0.022311
  🔄 Trial 32, Fold 3/5
    Fold 3 best total loss: 0.015742
  🔄 Trial 32, Fold 4/5
    Fold 4 best total loss: 0.016825
  🔄 Trial 32, Fold 5/5


[I 2025-09-29 20:48:12,716] Trial 32 finished with value: 0.015805650979746132 and parameters: {'lstm_hidden_size': 64, 'lstm_n_layers': 2, 'lstm_dropout': 0.5, 'decoder_hidden_size': 72, 'decoder_n_layers': 2, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.30000000000000004}. Best is trial 25 with value: 0.01515727280639112.


    Fold 5 best total loss: 0.014534
  📊 Trial 32 - Average CV Loss: 0.015806 (±0.004081)
  🔄 Trial 33, Fold 1/5
    Fold 1 best total loss: 0.009910
  🔄 Trial 33, Fold 2/5
    Fold 2 best total loss: 0.020160
  🔄 Trial 33, Fold 3/5
    Fold 3 best total loss: 0.019013
  🔄 Trial 33, Fold 4/5
    Fold 4 best total loss: 0.014574
  🔄 Trial 33, Fold 5/5


[I 2025-09-29 20:51:34,888] Trial 33 finished with value: 0.01575434682890773 and parameters: {'lstm_hidden_size': 64, 'lstm_n_layers': 2, 'lstm_dropout': 0.5, 'decoder_hidden_size': 72, 'decoder_n_layers': 2, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.30000000000000004}. Best is trial 25 with value: 0.01515727280639112.


    Fold 5 best total loss: 0.015114
  📊 Trial 33 - Average CV Loss: 0.015754 (±0.003633)
  🔄 Trial 34, Fold 1/5
    Fold 1 best total loss: 0.007802
  🔄 Trial 34, Fold 2/5
    Fold 2 best total loss: 0.016540
  🔄 Trial 34, Fold 3/5
    Fold 3 best total loss: 0.017323
  🔄 Trial 34, Fold 4/5
    Fold 4 best total loss: 0.014300
  🔄 Trial 34, Fold 5/5


[I 2025-09-29 20:54:53,493] Trial 34 finished with value: 0.01374785042135045 and parameters: {'lstm_hidden_size': 64, 'lstm_n_layers': 2, 'lstm_dropout': 0.5, 'decoder_hidden_size': 72, 'decoder_n_layers': 2, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 34 with value: 0.01374785042135045.


    Fold 5 best total loss: 0.012775
  📊 Trial 34 - Average CV Loss: 0.013748 (±0.003382)
  🔄 Trial 35, Fold 1/5
    Fold 1 best total loss: 0.317331
  🔄 Trial 35, Fold 2/5
    Fold 2 best total loss: 0.318161
  🔄 Trial 35, Fold 3/5
    Fold 3 best total loss: 0.014015
  🔄 Trial 35, Fold 4/5
    Fold 4 best total loss: 0.010809
  🔄 Trial 35, Fold 5/5


[I 2025-09-29 20:58:12,776] Trial 35 finished with value: 0.13410588552942498 and parameters: {'lstm_hidden_size': 64, 'lstm_n_layers': 2, 'lstm_dropout': 0.5, 'decoder_hidden_size': 72, 'decoder_n_layers': 2, 'decoder_dropout': 0.2, 'current_hidden_size': 72, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 34 with value: 0.01374785042135045.


    Fold 5 best total loss: 0.010212
  📊 Trial 35 - Average CV Loss: 0.134106 (±0.149948)
  🔄 Trial 36, Fold 1/5
    Fold 1 best total loss: 0.008656
  🔄 Trial 36, Fold 2/5
    Fold 2 best total loss: 0.013983
  🔄 Trial 36, Fold 3/5
    Fold 3 best total loss: 0.017732
  🔄 Trial 36, Fold 4/5
    Fold 4 best total loss: 0.012749
  🔄 Trial 36, Fold 5/5


[I 2025-09-29 21:01:33,181] Trial 36 finished with value: 0.013651931239292026 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 2, 'lstm_dropout': 0.5, 'decoder_hidden_size': 16, 'decoder_n_layers': 2, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 36 with value: 0.013651931239292026.


    Fold 5 best total loss: 0.015141
  📊 Trial 36 - Average CV Loss: 0.013652 (±0.002992)
  🔄 Trial 37, Fold 1/5
    Fold 1 best total loss: 0.009331
  🔄 Trial 37, Fold 2/5
    Fold 2 best total loss: 0.015980
  🔄 Trial 37, Fold 3/5
    Fold 3 best total loss: 0.019365
  🔄 Trial 37, Fold 4/5
    Fold 4 best total loss: 0.016687
  🔄 Trial 37, Fold 5/5


[I 2025-09-29 21:06:23,905] Trial 37 finished with value: 0.01655132739106193 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 3, 'lstm_dropout': 0.5, 'decoder_hidden_size': 16, 'decoder_n_layers': 2, 'decoder_dropout': 0.2, 'current_hidden_size': 16, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 36 with value: 0.013651931239292026.


    Fold 5 best total loss: 0.021393
  📊 Trial 37 - Average CV Loss: 0.016551 (±0.004095)
  🔄 Trial 38, Fold 1/5
    Fold 1 best total loss: 0.315816
  🔄 Trial 38, Fold 2/5
    Fold 2 best total loss: 0.319948
  🔄 Trial 38, Fold 3/5
    Fold 3 best total loss: 0.015679
  🔄 Trial 38, Fold 4/5
    Fold 4 best total loss: 0.197915
  🔄 Trial 38, Fold 5/5


[I 2025-09-29 21:09:44,157] Trial 38 finished with value: 0.17229508715099656 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 2, 'lstm_dropout': 0.5, 'decoder_hidden_size': 16, 'decoder_n_layers': 2, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 96, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 36 with value: 0.013651931239292026.


    Fold 5 best total loss: 0.012118
  📊 Trial 38 - Average CV Loss: 0.172295 (±0.136559)
  🔄 Trial 39, Fold 1/5
    Fold 1 best total loss: 0.010382
  🔄 Trial 39, Fold 2/5
    Fold 2 best total loss: 0.021164
  🔄 Trial 39, Fold 3/5
    Fold 3 best total loss: 0.019405
  🔄 Trial 39, Fold 4/5
    Fold 4 best total loss: 0.011704
  🔄 Trial 39, Fold 5/5


[I 2025-09-29 21:14:35,393] Trial 39 finished with value: 0.015607682836707682 and parameters: {'lstm_hidden_size': 32, 'lstm_n_layers': 3, 'lstm_dropout': 0.4, 'decoder_hidden_size': 16, 'decoder_n_layers': 5, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 32, 'current_n_layers': 3, 'current_dropout': 0.2}. Best is trial 36 with value: 0.013651931239292026.


    Fold 5 best total loss: 0.015383
  📊 Trial 39 - Average CV Loss: 0.015608 (±0.004192)
  🔄 Trial 40, Fold 1/5
    Fold 1 best total loss: 0.008631
  🔄 Trial 40, Fold 2/5
    Fold 2 best total loss: 0.019579
  🔄 Trial 40, Fold 3/5
    Fold 3 best total loss: 0.018731
  🔄 Trial 40, Fold 4/5
    Fold 4 best total loss: 0.010277
  🔄 Trial 40, Fold 5/5


[I 2025-09-29 21:17:56,401] Trial 40 finished with value: 0.014444711070973426 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 2, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 32, 'decoder_n_layers': 3, 'decoder_dropout': 0.2, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 36 with value: 0.013651931239292026.


    Fold 5 best total loss: 0.015005
  📊 Trial 40 - Average CV Loss: 0.014445 (±0.004387)
  🔄 Trial 41, Fold 1/5
    Fold 1 best total loss: 0.314426
  🔄 Trial 41, Fold 2/5
    Fold 2 best total loss: 0.213541
  🔄 Trial 41, Fold 3/5
    Fold 3 best total loss: 0.016562
  🔄 Trial 41, Fold 4/5
    Fold 4 best total loss: 0.011728
  🔄 Trial 41, Fold 5/5


[I 2025-09-29 21:22:49,808] Trial 41 finished with value: 0.16092009158455767 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 3, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 32, 'decoder_n_layers': 3, 'decoder_dropout': 0.1, 'current_hidden_size': 64, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 36 with value: 0.013651931239292026.


    Fold 5 best total loss: 0.248344
  📊 Trial 41 - Average CV Loss: 0.160920 (±0.124156)
  🔄 Trial 42, Fold 1/5
    Fold 1 best total loss: 0.008550
  🔄 Trial 42, Fold 2/5
    Fold 2 best total loss: 0.016390
  🔄 Trial 42, Fold 3/5
    Fold 3 best total loss: 0.016571
  🔄 Trial 42, Fold 4/5
    Fold 4 best total loss: 0.014897
  🔄 Trial 42, Fold 5/5


[I 2025-09-29 21:26:09,805] Trial 42 finished with value: 0.013624434161465615 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 2, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 32, 'decoder_n_layers': 2, 'decoder_dropout': 0.2, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 42 with value: 0.013624434161465615.


    Fold 5 best total loss: 0.011714
  📊 Trial 42 - Average CV Loss: 0.013624 (±0.003077)
  🔄 Trial 43, Fold 1/5
    Fold 1 best total loss: 0.007822
  🔄 Trial 43, Fold 2/5
    Fold 2 best total loss: 0.013708
  🔄 Trial 43, Fold 3/5
    Fold 3 best total loss: 0.016324
  🔄 Trial 43, Fold 4/5
    Fold 4 best total loss: 0.014036
  🔄 Trial 43, Fold 5/5


[I 2025-09-29 21:29:29,840] Trial 43 finished with value: 0.01295835494529456 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 2, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 32, 'decoder_n_layers': 2, 'decoder_dropout': 0.2, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 43 with value: 0.01295835494529456.


    Fold 5 best total loss: 0.012902
  📊 Trial 43 - Average CV Loss: 0.012958 (±0.002808)
  🔄 Trial 44, Fold 1/5
    Fold 1 best total loss: 0.008186
  🔄 Trial 44, Fold 2/5
    Fold 2 best total loss: 0.019270
  🔄 Trial 44, Fold 3/5
    Fold 3 best total loss: 0.016531
  🔄 Trial 44, Fold 4/5
    Fold 4 best total loss: 0.012858
  🔄 Trial 44, Fold 5/5


[I 2025-09-29 21:32:51,956] Trial 44 finished with value: 0.0143723358749412 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 2, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 32, 'decoder_n_layers': 3, 'decoder_dropout': 0.2, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 43 with value: 0.01295835494529456.


    Fold 5 best total loss: 0.015017
  📊 Trial 44 - Average CV Loss: 0.014372 (±0.003732)
  🔄 Trial 45, Fold 1/5
    Fold 1 best total loss: 0.010979
  🔄 Trial 45, Fold 2/5
    Fold 2 best total loss: 0.022198
  🔄 Trial 45, Fold 3/5
    Fold 3 best total loss: 0.022815
  🔄 Trial 45, Fold 4/5
    Fold 4 best total loss: 0.016167
  🔄 Trial 45, Fold 5/5


[I 2025-09-29 21:36:11,892] Trial 45 finished with value: 0.018035689007956535 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 2, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 32, 'decoder_n_layers': 2, 'decoder_dropout': 0.1, 'current_hidden_size': 16, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 43 with value: 0.01295835494529456.


    Fold 5 best total loss: 0.018020
  📊 Trial 45 - Average CV Loss: 0.018036 (±0.004323)
  🔄 Trial 46, Fold 1/5
    Fold 1 best total loss: 0.008560
  🔄 Trial 46, Fold 2/5
    Fold 2 best total loss: 0.319503
  🔄 Trial 46, Fold 3/5
    Fold 3 best total loss: 0.016602
  🔄 Trial 46, Fold 4/5
    Fold 4 best total loss: 0.008654
  🔄 Trial 46, Fold 5/5


[I 2025-09-29 21:39:32,397] Trial 46 finished with value: 0.07318799157510511 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 2, 'lstm_dropout': 0.1, 'decoder_hidden_size': 32, 'decoder_n_layers': 3, 'decoder_dropout': 0.2, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 43 with value: 0.01295835494529456.


    Fold 5 best total loss: 0.012621
  📊 Trial 46 - Average CV Loss: 0.073188 (±0.123193)
  🔄 Trial 47, Fold 1/5
    Fold 1 best total loss: 0.007179
  🔄 Trial 47, Fold 2/5
    Fold 2 best total loss: 0.017401
  🔄 Trial 47, Fold 3/5
    Fold 3 best total loss: 0.017469
  🔄 Trial 47, Fold 4/5
    Fold 4 best total loss: 0.008922
  🔄 Trial 47, Fold 5/5


[I 2025-09-29 21:42:53,087] Trial 47 finished with value: 0.012869430589489639 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 2, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 32, 'decoder_n_layers': 2, 'decoder_dropout': 0.2, 'current_hidden_size': 96, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 47 with value: 0.012869430589489639.


    Fold 5 best total loss: 0.013376
  📊 Trial 47 - Average CV Loss: 0.012869 (±0.004241)
  🔄 Trial 48, Fold 1/5
    Fold 1 best total loss: 0.009742
  🔄 Trial 48, Fold 2/5
    Fold 2 best total loss: 0.318562
  🔄 Trial 48, Fold 3/5
    Fold 3 best total loss: 0.222139
  🔄 Trial 48, Fold 4/5
    Fold 4 best total loss: 0.015045
  🔄 Trial 48, Fold 5/5


[I 2025-09-29 21:46:17,103] Trial 48 finished with value: 0.18220715842908247 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 2, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 32, 'decoder_n_layers': 2, 'decoder_dropout': 0.1, 'current_hidden_size': 96, 'current_n_layers': 5, 'current_dropout': 0.2}. Best is trial 47 with value: 0.012869430589489639.


    Fold 5 best total loss: 0.345549
  📊 Trial 48 - Average CV Loss: 0.182207 (±0.144607)
  🔄 Trial 49, Fold 1/5
    Fold 1 best total loss: 0.006954
  🔄 Trial 49, Fold 2/5
    Fold 2 best total loss: 0.317857
  🔄 Trial 49, Fold 3/5
    Fold 3 best total loss: 0.013469
  🔄 Trial 49, Fold 4/5
    Fold 4 best total loss: 0.009835
  🔄 Trial 49, Fold 5/5


[I 2025-09-29 21:49:56,276] Trial 49 finished with value: 0.13868104622815736 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 2, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 32, 'decoder_n_layers': 2, 'decoder_dropout': 0.2, 'current_hidden_size': 96, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 47 with value: 0.012869430589489639.


    Fold 5 best total loss: 0.345290
  📊 Trial 49 - Average CV Loss: 0.138681 (±0.157748)
  🔄 Trial 50, Fold 1/5
    Fold 1 best total loss: 0.007427
  🔄 Trial 50, Fold 2/5
    Fold 2 best total loss: 0.016810
  🔄 Trial 50, Fold 3/5
    Fold 3 best total loss: 0.017084
  🔄 Trial 50, Fold 4/5
    Fold 4 best total loss: 0.012856
  🔄 Trial 50, Fold 5/5


[I 2025-09-29 21:53:54,426] Trial 50 finished with value: 0.0800248431682121 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 2, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.2, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.2}. Best is trial 47 with value: 0.012869430589489639.


    Fold 5 best total loss: 0.345947
  📊 Trial 50 - Average CV Loss: 0.080025 (±0.133007)
  🔄 Trial 51, Fold 1/5
    Fold 1 best total loss: 0.313972
  🔄 Trial 51, Fold 2/5
    Fold 2 best total loss: 0.318085
  🔄 Trial 51, Fold 3/5
    Fold 3 best total loss: 0.224164
  🔄 Trial 51, Fold 4/5
    Fold 4 best total loss: 0.081080
  🔄 Trial 51, Fold 5/5


[I 2025-09-29 22:04:26,699] Trial 51 finished with value: 0.2562971721403301 and parameters: {'lstm_hidden_size': 16, 'lstm_n_layers': 6, 'lstm_dropout': 0.2, 'decoder_hidden_size': 16, 'decoder_n_layers': 2, 'decoder_dropout': 0.1, 'current_hidden_size': 48, 'current_n_layers': 6, 'current_dropout': 0.1}. Best is trial 47 with value: 0.012869430589489639.


    Fold 5 best total loss: 0.344186
  📊 Trial 51 - Average CV Loss: 0.256297 (±0.096542)
  🔄 Trial 52, Fold 1/5
    Fold 1 best total loss: 0.007901
  🔄 Trial 52, Fold 2/5
    Fold 2 best total loss: 0.319191
  🔄 Trial 52, Fold 3/5
    Fold 3 best total loss: 0.016110
  🔄 Trial 52, Fold 4/5
    Fold 4 best total loss: 0.012699
  🔄 Trial 52, Fold 5/5


[I 2025-09-29 22:08:04,611] Trial 52 finished with value: 0.07335147415287793 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 2, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 32, 'decoder_n_layers': 2, 'decoder_dropout': 0.2, 'current_hidden_size': 64, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 47 with value: 0.012869430589489639.


    Fold 5 best total loss: 0.010855
  📊 Trial 52 - Average CV Loss: 0.073351 (±0.122949)
  🔄 Trial 53, Fold 1/5
    Fold 1 best total loss: 0.315675
  🔄 Trial 53, Fold 2/5
    Fold 2 best total loss: 0.017208
  🔄 Trial 53, Fold 3/5
    Fold 3 best total loss: 0.014765
  🔄 Trial 53, Fold 4/5
    Fold 4 best total loss: 0.010115
  🔄 Trial 53, Fold 5/5


[I 2025-09-29 22:11:54,175] Trial 53 finished with value: 0.14101756197633222 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 2, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 32, 'decoder_n_layers': 3, 'decoder_dropout': 0.2, 'current_hidden_size': 96, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 47 with value: 0.012869430589489639.


    Fold 5 best total loss: 0.347324
  📊 Trial 53 - Average CV Loss: 0.141018 (±0.155866)
  🔄 Trial 54, Fold 1/5
    Fold 1 best total loss: 0.008522
  🔄 Trial 54, Fold 2/5
    Fold 2 best total loss: 0.022473
  🔄 Trial 54, Fold 3/5
    Fold 3 best total loss: 0.019568
  🔄 Trial 54, Fold 4/5
    Fold 4 best total loss: 0.011438
  🔄 Trial 54, Fold 5/5


[I 2025-09-29 22:15:42,561] Trial 54 finished with value: 0.015241271827835589 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 2, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 32, 'decoder_n_layers': 2, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 47 with value: 0.012869430589489639.


    Fold 5 best total loss: 0.014205
  📊 Trial 54 - Average CV Loss: 0.015241 (±0.005132)
  🔄 Trial 55, Fold 1/5
    Fold 1 best total loss: 0.009421
  🔄 Trial 55, Fold 2/5
    Fold 2 best total loss: 0.017587
  🔄 Trial 55, Fold 3/5
    Fold 3 best total loss: 0.018312
  🔄 Trial 55, Fold 4/5
    Fold 4 best total loss: 0.012799
  🔄 Trial 55, Fold 5/5


[I 2025-09-29 22:19:29,332] Trial 55 finished with value: 0.07998393406160176 and parameters: {'lstm_hidden_size': 32, 'lstm_n_layers': 2, 'lstm_dropout': 0.2, 'decoder_hidden_size': 64, 'decoder_n_layers': 3, 'decoder_dropout': 0.4, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 47 with value: 0.012869430589489639.


    Fold 5 best total loss: 0.341800
  📊 Trial 55 - Average CV Loss: 0.079984 (±0.130949)
  🔄 Trial 56, Fold 1/5
    Fold 1 best total loss: 0.009367
  🔄 Trial 56, Fold 2/5
    Fold 2 best total loss: 0.014948
  🔄 Trial 56, Fold 3/5
    Fold 3 best total loss: 0.014580
  🔄 Trial 56, Fold 4/5
    Fold 4 best total loss: 0.011167
  🔄 Trial 56, Fold 5/5


[I 2025-09-29 22:24:47,664] Trial 56 finished with value: 0.012543224275577813 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 3, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 32, 'decoder_n_layers': 2, 'decoder_dropout': 0.2, 'current_hidden_size': 48, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 56 with value: 0.012543224275577813.


    Fold 5 best total loss: 0.012655
  📊 Trial 56 - Average CV Loss: 0.012543 (±0.002094)
  🔄 Trial 57, Fold 1/5
    Fold 1 best total loss: 0.006795
  🔄 Trial 57, Fold 2/5
    Fold 2 best total loss: 0.319072
  🔄 Trial 57, Fold 3/5
    Fold 3 best total loss: 0.014422
  🔄 Trial 57, Fold 4/5
    Fold 4 best total loss: 0.010304
  🔄 Trial 57, Fold 5/5


[I 2025-09-29 22:32:34,136] Trial 57 finished with value: 0.11998672202462331 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 4, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 32, 'decoder_n_layers': 5, 'decoder_dropout': 0.1, 'current_hidden_size': 48, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 56 with value: 0.012543224275577813.


    Fold 5 best total loss: 0.249341
  📊 Trial 57 - Average CV Loss: 0.119987 (±0.135908)
  🔄 Trial 58, Fold 1/5
    Fold 1 best total loss: 0.008332
  🔄 Trial 58, Fold 2/5
    Fold 2 best total loss: 0.177959
  🔄 Trial 58, Fold 3/5
    Fold 3 best total loss: 0.017617
  🔄 Trial 58, Fold 4/5
    Fold 4 best total loss: 0.010198
  🔄 Trial 58, Fold 5/5


[I 2025-09-29 22:37:54,297] Trial 58 finished with value: 0.04578937941696495 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 3, 'lstm_dropout': 0.2, 'decoder_hidden_size': 16, 'decoder_n_layers': 2, 'decoder_dropout': 0.2, 'current_hidden_size': 48, 'current_n_layers': 2, 'current_dropout': 0.2}. Best is trial 56 with value: 0.012543224275577813.


    Fold 5 best total loss: 0.014841
  📊 Trial 58 - Average CV Loss: 0.045789 (±0.066166)
  🔄 Trial 59, Fold 1/5
    Fold 1 best total loss: 0.006748
  🔄 Trial 59, Fold 2/5
    Fold 2 best total loss: 0.012659
  🔄 Trial 59, Fold 3/5
    Fold 3 best total loss: 0.015082
  🔄 Trial 59, Fold 4/5
    Fold 4 best total loss: 0.010648
  🔄 Trial 59, Fold 5/5


[I 2025-09-29 22:43:24,498] Trial 59 finished with value: 0.011053004348650575 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 3, 'lstm_dropout': 0.4, 'decoder_hidden_size': 48, 'decoder_n_layers': 2, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 48, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 59 with value: 0.011053004348650575.


    Fold 5 best total loss: 0.010128
  📊 Trial 59 - Average CV Loss: 0.011053 (±0.002770)
  🔄 Trial 60, Fold 1/5
    Fold 1 best total loss: 0.007389
  🔄 Trial 60, Fold 2/5
    Fold 2 best total loss: 0.013185
  🔄 Trial 60, Fold 3/5
    Fold 3 best total loss: 0.015425
  🔄 Trial 60, Fold 4/5
    Fold 4 best total loss: 0.013275
  🔄 Trial 60, Fold 5/5


[I 2025-09-29 22:48:56,166] Trial 60 finished with value: 0.012437578762182966 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 3, 'lstm_dropout': 0.4, 'decoder_hidden_size': 48, 'decoder_n_layers': 2, 'decoder_dropout': 0.2, 'current_hidden_size': 48, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 59 with value: 0.011053004348650575.


    Fold 5 best total loss: 0.012914
  📊 Trial 60 - Average CV Loss: 0.012438 (±0.002680)
  🔄 Trial 61, Fold 1/5
    Fold 1 best total loss: 0.007294
  🔄 Trial 61, Fold 2/5
    Fold 2 best total loss: 0.017222
  🔄 Trial 61, Fold 3/5
    Fold 3 best total loss: 0.017407
  🔄 Trial 61, Fold 4/5
    Fold 4 best total loss: 0.011130
  🔄 Trial 61, Fold 5/5


[I 2025-09-29 22:56:34,829] Trial 61 finished with value: 0.012973133433843032 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 48, 'decoder_n_layers': 2, 'decoder_dropout': 0.2, 'current_hidden_size': 48, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 59 with value: 0.011053004348650575.


    Fold 5 best total loss: 0.011814
  📊 Trial 61 - Average CV Loss: 0.012973 (±0.003865)
  🔄 Trial 62, Fold 1/5
    Fold 1 best total loss: 0.007385
  🔄 Trial 62, Fold 2/5
    Fold 2 best total loss: 0.317472
  🔄 Trial 62, Fold 3/5
    Fold 3 best total loss: 0.013283
  🔄 Trial 62, Fold 4/5
    Fold 4 best total loss: 0.010253
  🔄 Trial 62, Fold 5/5


[I 2025-09-29 23:03:58,892] Trial 62 finished with value: 0.12542152348905802 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 48, 'decoder_n_layers': 2, 'decoder_dropout': 0.2, 'current_hidden_size': 48, 'current_n_layers': 4, 'current_dropout': 0.1}. Best is trial 59 with value: 0.011053004348650575.


    Fold 5 best total loss: 0.278713
  📊 Trial 62 - Average CV Loss: 0.125422 (±0.141529)
  🔄 Trial 63, Fold 1/5
    Fold 1 best total loss: 0.007192
  🔄 Trial 63, Fold 2/5
    Fold 2 best total loss: 0.015408
  🔄 Trial 63, Fold 3/5
    Fold 3 best total loss: 0.015064
  🔄 Trial 63, Fold 4/5
    Fold 4 best total loss: 0.010345
  🔄 Trial 63, Fold 5/5


[I 2025-09-29 23:10:27,438] Trial 63 finished with value: 0.011709601007169113 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 48, 'decoder_n_layers': 2, 'decoder_dropout': 0.2, 'current_hidden_size': 48, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 59 with value: 0.011053004348650575.


    Fold 5 best total loss: 0.010540
  📊 Trial 63 - Average CV Loss: 0.011710 (±0.003117)
  🔄 Trial 64, Fold 1/5
    Fold 1 best total loss: 0.007162
  🔄 Trial 64, Fold 2/5
    Fold 2 best total loss: 0.016266
  🔄 Trial 64, Fold 3/5
    Fold 3 best total loss: 0.018160
  🔄 Trial 64, Fold 4/5
    Fold 4 best total loss: 0.011586
  🔄 Trial 64, Fold 5/5


[I 2025-09-29 23:18:24,671] Trial 64 finished with value: 0.013487047422677279 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 5, 'lstm_dropout': 0.4, 'decoder_hidden_size': 48, 'decoder_n_layers': 2, 'decoder_dropout': 0.1, 'current_hidden_size': 48, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 59 with value: 0.011053004348650575.


    Fold 5 best total loss: 0.014262
  📊 Trial 64 - Average CV Loss: 0.013487 (±0.003841)
  🔄 Trial 65, Fold 1/5
    Fold 1 best total loss: 0.007361
  🔄 Trial 65, Fold 2/5
    Fold 2 best total loss: 0.019374
  🔄 Trial 65, Fold 3/5
    Fold 3 best total loss: 0.015729
  🔄 Trial 65, Fold 4/5
    Fold 4 best total loss: 0.198960
  🔄 Trial 65, Fold 5/5


[I 2025-09-29 23:24:49,297] Trial 65 finished with value: 0.050743967189919206 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 48, 'decoder_n_layers': 2, 'decoder_dropout': 0.2, 'current_hidden_size': 48, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 59 with value: 0.011053004348650575.


    Fold 5 best total loss: 0.012297
  📊 Trial 65 - Average CV Loss: 0.050744 (±0.074214)
  🔄 Trial 66, Fold 1/5
    Fold 1 best total loss: 0.007565
  🔄 Trial 66, Fold 2/5
    Fold 2 best total loss: 0.014326
  🔄 Trial 66, Fold 3/5
    Fold 3 best total loss: 0.016386
  🔄 Trial 66, Fold 4/5
    Fold 4 best total loss: 0.009894
  🔄 Trial 66, Fold 5/5


[I 2025-09-29 23:31:12,694] Trial 66 finished with value: 0.011745190666988491 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 48, 'decoder_n_layers': 2, 'decoder_dropout': 0.2, 'current_hidden_size': 48, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 59 with value: 0.011053004348650575.


    Fold 5 best total loss: 0.010554
  📊 Trial 66 - Average CV Loss: 0.011745 (±0.003179)
  🔄 Trial 67, Fold 1/5
    Fold 1 best total loss: 0.006679
  🔄 Trial 67, Fold 2/5
    Fold 2 best total loss: 0.016938
  🔄 Trial 67, Fold 3/5
    Fold 3 best total loss: 0.017806
  🔄 Trial 67, Fold 4/5
    Fold 4 best total loss: 0.011257
  🔄 Trial 67, Fold 5/5


[I 2025-09-29 23:37:37,411] Trial 67 finished with value: 0.013005511392839253 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 48, 'decoder_n_layers': 2, 'decoder_dropout': 0.2, 'current_hidden_size': 48, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 59 with value: 0.011053004348650575.


    Fold 5 best total loss: 0.012348
  📊 Trial 67 - Average CV Loss: 0.013006 (±0.004050)
  🔄 Trial 68, Fold 1/5
    Fold 1 best total loss: 0.006807
  🔄 Trial 68, Fold 2/5
    Fold 2 best total loss: 0.016894
  🔄 Trial 68, Fold 3/5
    Fold 3 best total loss: 0.015901
  🔄 Trial 68, Fold 4/5
    Fold 4 best total loss: 0.008860
  🔄 Trial 68, Fold 5/5


[I 2025-09-29 23:45:40,730] Trial 68 finished with value: 0.011869396443944424 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 5, 'lstm_dropout': 0.4, 'decoder_hidden_size': 48, 'decoder_n_layers': 2, 'decoder_dropout': 0.2, 'current_hidden_size': 48, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 59 with value: 0.011053004348650575.


    Fold 5 best total loss: 0.010884
  📊 Trial 68 - Average CV Loss: 0.011869 (±0.003928)
  🔄 Trial 69, Fold 1/5
    Fold 1 best total loss: 0.007407
  🔄 Trial 69, Fold 2/5
    Fold 2 best total loss: 0.318269
  🔄 Trial 69, Fold 3/5
    Fold 3 best total loss: 0.020628
  🔄 Trial 69, Fold 4/5
    Fold 4 best total loss: 0.172830
  🔄 Trial 69, Fold 5/5


[I 2025-09-29 23:53:33,898] Trial 69 finished with value: 0.1060449808370322 and parameters: {'lstm_hidden_size': 16, 'lstm_n_layers': 5, 'lstm_dropout': 0.4, 'decoder_hidden_size': 48, 'decoder_n_layers': 4, 'decoder_dropout': 0.1, 'current_hidden_size': 48, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 59 with value: 0.011053004348650575.


    Fold 5 best total loss: 0.011091
  📊 Trial 69 - Average CV Loss: 0.106045 (±0.122915)
  🔄 Trial 70, Fold 1/5
    Fold 1 best total loss: 0.314716
  🔄 Trial 70, Fold 2/5
    Fold 2 best total loss: 0.014609
  🔄 Trial 70, Fold 3/5
    Fold 3 best total loss: 0.011554
  🔄 Trial 70, Fold 4/5
    Fold 4 best total loss: 0.011265
  🔄 Trial 70, Fold 5/5


[I 2025-09-29 23:59:55,077] Trial 70 finished with value: 0.07254046759917401 and parameters: {'lstm_hidden_size': 32, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 48, 'decoder_n_layers': 2, 'decoder_dropout': 0.2, 'current_hidden_size': 48, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 59 with value: 0.011053004348650575.


    Fold 5 best total loss: 0.010558
  📊 Trial 70 - Average CV Loss: 0.072540 (±0.121096)
  🔄 Trial 71, Fold 1/5
    Fold 1 best total loss: 0.007074
  🔄 Trial 71, Fold 2/5
    Fold 2 best total loss: 0.013796
  🔄 Trial 71, Fold 3/5
    Fold 3 best total loss: 0.013763
  🔄 Trial 71, Fold 4/5
    Fold 4 best total loss: 0.014047
  🔄 Trial 71, Fold 5/5


[I 2025-09-30 00:07:51,000] Trial 71 finished with value: 0.011973617516923695 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 5, 'lstm_dropout': 0.4, 'decoder_hidden_size': 48, 'decoder_n_layers': 2, 'decoder_dropout': 0.2, 'current_hidden_size': 48, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 59 with value: 0.011053004348650575.


    Fold 5 best total loss: 0.011188
  📊 Trial 71 - Average CV Loss: 0.011974 (±0.002663)
  🔄 Trial 72, Fold 1/5
    Fold 1 best total loss: 0.314800
  🔄 Trial 72, Fold 2/5
    Fold 2 best total loss: 0.012548
  🔄 Trial 72, Fold 3/5
    Fold 3 best total loss: 0.016415
  🔄 Trial 72, Fold 4/5
    Fold 4 best total loss: 0.010980
  🔄 Trial 72, Fold 5/5


[I 2025-09-30 00:15:49,257] Trial 72 finished with value: 0.07319083870388568 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 5, 'lstm_dropout': 0.4, 'decoder_hidden_size': 48, 'decoder_n_layers': 2, 'decoder_dropout': 0.2, 'current_hidden_size': 48, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 59 with value: 0.011053004348650575.


    Fold 5 best total loss: 0.011211
  📊 Trial 72 - Average CV Loss: 0.073191 (±0.120820)
  🔄 Trial 73, Fold 1/5
    Fold 1 best total loss: 0.007370
  🔄 Trial 73, Fold 2/5
    Fold 2 best total loss: 0.016390
  🔄 Trial 73, Fold 3/5
    Fold 3 best total loss: 0.013324
  🔄 Trial 73, Fold 4/5
    Fold 4 best total loss: 0.010637
  🔄 Trial 73, Fold 5/5


[I 2025-09-30 00:23:43,889] Trial 73 finished with value: 0.012021190853556619 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 5, 'lstm_dropout': 0.4, 'decoder_hidden_size': 48, 'decoder_n_layers': 2, 'decoder_dropout': 0.2, 'current_hidden_size': 48, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 59 with value: 0.011053004348650575.


    Fold 5 best total loss: 0.012385
  📊 Trial 73 - Average CV Loss: 0.012021 (±0.002982)
  🔄 Trial 74, Fold 1/5
    Fold 1 best total loss: 0.007568
  🔄 Trial 74, Fold 2/5
    Fold 2 best total loss: 0.017318
  🔄 Trial 74, Fold 3/5
    Fold 3 best total loss: 0.015598
  🔄 Trial 74, Fold 4/5
    Fold 4 best total loss: 0.197795
  🔄 Trial 74, Fold 5/5


[I 2025-09-30 00:31:37,256] Trial 74 finished with value: 0.04946426229435019 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 5, 'lstm_dropout': 0.4, 'decoder_hidden_size': 48, 'decoder_n_layers': 2, 'decoder_dropout': 0.2, 'current_hidden_size': 48, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 59 with value: 0.011053004348650575.


    Fold 5 best total loss: 0.009041
  📊 Trial 74 - Average CV Loss: 0.049464 (±0.074258)
  🔄 Trial 75, Fold 1/5
    Fold 1 best total loss: 0.007313
  🔄 Trial 75, Fold 2/5
    Fold 2 best total loss: 0.015666
  🔄 Trial 75, Fold 3/5
    Fold 3 best total loss: 0.015913
  🔄 Trial 75, Fold 4/5
    Fold 4 best total loss: 0.014558
  🔄 Trial 75, Fold 5/5


[I 2025-09-30 00:39:32,390] Trial 75 finished with value: 0.013078332802979275 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 5, 'lstm_dropout': 0.4, 'decoder_hidden_size': 48, 'decoder_n_layers': 2, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 48, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 59 with value: 0.011053004348650575.


    Fold 5 best total loss: 0.011942
  📊 Trial 75 - Average CV Loss: 0.013078 (±0.003208)
  🔄 Trial 76, Fold 1/5
    Fold 1 best total loss: 0.007073
  🔄 Trial 76, Fold 2/5
    Fold 2 best total loss: 0.318775
  🔄 Trial 76, Fold 3/5
    Fold 3 best total loss: 0.013092
  🔄 Trial 76, Fold 4/5
    Fold 4 best total loss: 0.008926
  🔄 Trial 76, Fold 5/5


[I 2025-09-30 00:49:02,674] Trial 76 finished with value: 0.07145701042027212 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 6, 'lstm_dropout': 0.4, 'decoder_hidden_size': 48, 'decoder_n_layers': 2, 'decoder_dropout': 0.2, 'current_hidden_size': 48, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 59 with value: 0.011053004348650575.


    Fold 5 best total loss: 0.009420
  📊 Trial 76 - Average CV Loss: 0.071457 (±0.123674)
  🔄 Trial 77, Fold 1/5
    Fold 1 best total loss: 0.008489
  🔄 Trial 77, Fold 2/5
    Fold 2 best total loss: 0.013536
  🔄 Trial 77, Fold 3/5
    Fold 3 best total loss: 0.222400
  🔄 Trial 77, Fold 4/5
    Fold 4 best total loss: 0.018472
  🔄 Trial 77, Fold 5/5


[I 2025-09-30 00:57:06,910] Trial 77 finished with value: 0.1216907531954348 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 5, 'lstm_dropout': 0.4, 'decoder_hidden_size': 48, 'decoder_n_layers': 2, 'decoder_dropout': 0.1, 'current_hidden_size': 48, 'current_n_layers': 5, 'current_dropout': 0.1}. Best is trial 59 with value: 0.011053004348650575.


    Fold 5 best total loss: 0.345557
  📊 Trial 77 - Average CV Loss: 0.121691 (±0.138148)
  🔄 Trial 78, Fold 1/5
    Fold 1 best total loss: 0.007654
  🔄 Trial 78, Fold 2/5
    Fold 2 best total loss: 0.018787
  🔄 Trial 78, Fold 3/5
    Fold 3 best total loss: 0.014967
  🔄 Trial 78, Fold 4/5
    Fold 4 best total loss: 0.009518
  🔄 Trial 78, Fold 5/5


[I 2025-09-30 01:05:04,149] Trial 78 finished with value: 0.012451499269809574 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 5, 'lstm_dropout': 0.4, 'decoder_hidden_size': 48, 'decoder_n_layers': 2, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 48, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 59 with value: 0.011053004348650575.


    Fold 5 best total loss: 0.011332
  📊 Trial 78 - Average CV Loss: 0.012451 (±0.003984)
  🔄 Trial 79, Fold 1/5
    Fold 1 best total loss: 0.009050
  🔄 Trial 79, Fold 2/5
    Fold 2 best total loss: 0.318272
  🔄 Trial 79, Fold 3/5
    Fold 3 best total loss: 0.018640
  🔄 Trial 79, Fold 4/5
    Fold 4 best total loss: 0.009395
  🔄 Trial 79, Fold 5/5


[I 2025-09-30 01:13:02,013] Trial 79 finished with value: 0.07321978932595812 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 5, 'lstm_dropout': 0.4, 'decoder_hidden_size': 48, 'decoder_n_layers': 3, 'decoder_dropout': 0.4, 'current_hidden_size': 48, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 59 with value: 0.011053004348650575.


    Fold 5 best total loss: 0.010742
  📊 Trial 79 - Average CV Loss: 0.073220 (±0.122576)
  🔄 Trial 80, Fold 1/5
    Fold 1 best total loss: 0.007867
  🔄 Trial 80, Fold 2/5
    Fold 2 best total loss: 0.021995
  🔄 Trial 80, Fold 3/5
    Fold 3 best total loss: 0.015960
  🔄 Trial 80, Fold 4/5
    Fold 4 best total loss: 0.009615
  🔄 Trial 80, Fold 5/5


[I 2025-09-30 01:21:04,838] Trial 80 finished with value: 0.013061346614267676 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 5, 'lstm_dropout': 0.4, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 48, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 59 with value: 0.011053004348650575.


    Fold 5 best total loss: 0.009870
  📊 Trial 80 - Average CV Loss: 0.013061 (±0.005240)
  🔄 Trial 81, Fold 1/5
    Fold 1 best total loss: 0.314907
  🔄 Trial 81, Fold 2/5
    Fold 2 best total loss: 0.016474
  🔄 Trial 81, Fold 3/5
    Fold 3 best total loss: 0.225780
  🔄 Trial 81, Fold 4/5
    Fold 4 best total loss: 0.010585
  🔄 Trial 81, Fold 5/5


[I 2025-09-30 01:30:50,180] Trial 81 finished with value: 0.11573967811418698 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 6, 'lstm_dropout': 0.4, 'decoder_hidden_size': 48, 'decoder_n_layers': 2, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 48, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 59 with value: 0.011053004348650575.


    Fold 5 best total loss: 0.010953
  📊 Trial 81 - Average CV Loss: 0.115740 (±0.129358)
  🔄 Trial 82, Fold 1/5
    Fold 1 best total loss: 0.007322
  🔄 Trial 82, Fold 2/5
    Fold 2 best total loss: 0.018230
  🔄 Trial 82, Fold 3/5
    Fold 3 best total loss: 0.014411
  🔄 Trial 82, Fold 4/5
    Fold 4 best total loss: 0.012650
  🔄 Trial 82, Fold 5/5


[I 2025-09-30 01:38:47,255] Trial 82 finished with value: 0.012596767884679139 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 5, 'lstm_dropout': 0.4, 'decoder_hidden_size': 48, 'decoder_n_layers': 2, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 48, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 59 with value: 0.011053004348650575.


    Fold 5 best total loss: 0.010371
  📊 Trial 82 - Average CV Loss: 0.012597 (±0.003683)
  🔄 Trial 83, Fold 1/5
    Fold 1 best total loss: 0.007551
  🔄 Trial 83, Fold 2/5
    Fold 2 best total loss: 0.169967
  🔄 Trial 83, Fold 3/5
    Fold 3 best total loss: 0.016925
  🔄 Trial 83, Fold 4/5
    Fold 4 best total loss: 0.008351
  🔄 Trial 83, Fold 5/5


[I 2025-09-30 01:43:37,685] Trial 83 finished with value: 0.04281790408422239 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 3, 'lstm_dropout': 0.4, 'decoder_hidden_size': 48, 'decoder_n_layers': 2, 'decoder_dropout': 0.2, 'current_hidden_size': 48, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 59 with value: 0.011053004348650575.


    Fold 5 best total loss: 0.011296
  📊 Trial 83 - Average CV Loss: 0.042818 (±0.063660)
  🔄 Trial 84, Fold 1/5
    Fold 1 best total loss: 0.007773
  🔄 Trial 84, Fold 2/5
    Fold 2 best total loss: 0.016391
  🔄 Trial 84, Fold 3/5
    Fold 3 best total loss: 0.015276
  🔄 Trial 84, Fold 4/5
    Fold 4 best total loss: 0.010450
  🔄 Trial 84, Fold 5/5


[I 2025-09-30 01:48:29,547] Trial 84 finished with value: 0.012695529748452828 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 3, 'lstm_dropout': 0.4, 'decoder_hidden_size': 48, 'decoder_n_layers': 2, 'decoder_dropout': 0.2, 'current_hidden_size': 48, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 59 with value: 0.011053004348650575.


    Fold 5 best total loss: 0.013587
  📊 Trial 84 - Average CV Loss: 0.012696 (±0.003174)
  🔄 Trial 85, Fold 1/5
    Fold 1 best total loss: 0.007039
  🔄 Trial 85, Fold 2/5
    Fold 2 best total loss: 0.017313
  🔄 Trial 85, Fold 3/5
    Fold 3 best total loss: 0.016155
  🔄 Trial 85, Fold 4/5
    Fold 4 best total loss: 0.009595
  🔄 Trial 85, Fold 5/5


[I 2025-09-30 01:54:51,677] Trial 85 finished with value: 0.012296197121031582 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 48, 'decoder_n_layers': 2, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 72, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 59 with value: 0.011053004348650575.


    Fold 5 best total loss: 0.011379
  📊 Trial 85 - Average CV Loss: 0.012296 (±0.003895)
  🔄 Trial 86, Fold 1/5
    Fold 1 best total loss: 0.225660
  🔄 Trial 86, Fold 2/5
    Fold 2 best total loss: 0.317249
  🔄 Trial 86, Fold 3/5
    Fold 3 best total loss: 0.203079
  🔄 Trial 86, Fold 4/5
    Fold 4 best total loss: 0.203895
  🔄 Trial 86, Fold 5/5


[I 2025-09-30 02:02:41,832] Trial 86 finished with value: 0.2593259785324335 and parameters: {'lstm_hidden_size': 16, 'lstm_n_layers': 5, 'lstm_dropout': 0.4, 'decoder_hidden_size': 48, 'decoder_n_layers': 2, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 72, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 59 with value: 0.011053004348650575.


    Fold 5 best total loss: 0.346746
  📊 Trial 86 - Average CV Loss: 0.259326 (±0.060609)
  🔄 Trial 87, Fold 1/5
    Fold 1 best total loss: 0.008168
  🔄 Trial 87, Fold 2/5
    Fold 2 best total loss: 0.319762
  🔄 Trial 87, Fold 3/5
    Fold 3 best total loss: 0.013415
  🔄 Trial 87, Fold 4/5
    Fold 4 best total loss: 0.009097
  🔄 Trial 87, Fold 5/5


[I 2025-09-30 02:10:36,439] Trial 87 finished with value: 0.0724971821764484 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 5, 'lstm_dropout': 0.4, 'decoder_hidden_size': 48, 'decoder_n_layers': 2, 'decoder_dropout': 0.4, 'current_hidden_size': 72, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 59 with value: 0.011053004348650575.


    Fold 5 best total loss: 0.012044
  📊 Trial 87 - Average CV Loss: 0.072497 (±0.123647)
  🔄 Trial 88, Fold 1/5
    Fold 1 best total loss: 0.007281
  🔄 Trial 88, Fold 2/5
    Fold 2 best total loss: 0.017744
  🔄 Trial 88, Fold 3/5
    Fold 3 best total loss: 0.016086
  🔄 Trial 88, Fold 4/5
    Fold 4 best total loss: 0.012035
  🔄 Trial 88, Fold 5/5


[I 2025-09-30 02:16:57,184] Trial 88 finished with value: 0.012915444280952215 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 48, 'decoder_n_layers': 2, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 72, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 59 with value: 0.011053004348650575.


    Fold 5 best total loss: 0.011431
  📊 Trial 88 - Average CV Loss: 0.012915 (±0.003690)
  🔄 Trial 89, Fold 1/5
    Fold 1 best total loss: 0.006076
  🔄 Trial 89, Fold 2/5
    Fold 2 best total loss: 0.319088
  🔄 Trial 89, Fold 3/5
    Fold 3 best total loss: 0.012706
  🔄 Trial 89, Fold 4/5
    Fold 4 best total loss: 0.008580
  🔄 Trial 89, Fold 5/5


[I 2025-09-30 02:24:50,648] Trial 89 finished with value: 0.07117516039288603 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 5, 'lstm_dropout': 0.4, 'decoder_hidden_size': 64, 'decoder_n_layers': 2, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 72, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 59 with value: 0.011053004348650575.


    Fold 5 best total loss: 0.009426
  📊 Trial 89 - Average CV Loss: 0.071175 (±0.123975)
  🔄 Trial 90, Fold 1/5
    Fold 1 best total loss: 0.007776
  🔄 Trial 90, Fold 2/5
    Fold 2 best total loss: 0.013041
  🔄 Trial 90, Fold 3/5
    Fold 3 best total loss: 0.013092
  🔄 Trial 90, Fold 4/5
    Fold 4 best total loss: 0.010267
  🔄 Trial 90, Fold 5/5


[I 2025-09-30 02:31:16,166] Trial 90 finished with value: 0.01037523580598645 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 96, 'decoder_n_layers': 3, 'decoder_dropout': 0.4, 'current_hidden_size': 48, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 90 with value: 0.01037523580598645.


    Fold 5 best total loss: 0.007701
  📊 Trial 90 - Average CV Loss: 0.010375 (±0.002384)
  🔄 Trial 91, Fold 1/5
    Fold 1 best total loss: 0.007500
  🔄 Trial 91, Fold 2/5
    Fold 2 best total loss: 0.013209
  🔄 Trial 91, Fold 3/5
    Fold 3 best total loss: 0.015934
  🔄 Trial 91, Fold 4/5
    Fold 4 best total loss: 0.010323
  🔄 Trial 91, Fold 5/5


[I 2025-09-30 02:37:41,041] Trial 91 finished with value: 0.011771868134383112 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 96, 'decoder_n_layers': 3, 'decoder_dropout': 0.4, 'current_hidden_size': 64, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 90 with value: 0.01037523580598645.


    Fold 5 best total loss: 0.011894
  📊 Trial 91 - Average CV Loss: 0.011772 (±0.002820)
  🔄 Trial 92, Fold 1/5
    Fold 1 best total loss: 0.006556
  🔄 Trial 92, Fold 2/5
    Fold 2 best total loss: 0.013934
  🔄 Trial 92, Fold 3/5
    Fold 3 best total loss: 0.015874
  🔄 Trial 92, Fold 4/5
    Fold 4 best total loss: 0.010201
  🔄 Trial 92, Fold 5/5


[I 2025-09-30 02:44:05,077] Trial 92 finished with value: 0.011114907200681045 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 96, 'decoder_n_layers': 4, 'decoder_dropout': 0.4, 'current_hidden_size': 64, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 90 with value: 0.01037523580598645.


    Fold 5 best total loss: 0.009010
  📊 Trial 92 - Average CV Loss: 0.011115 (±0.003366)
  🔄 Trial 93, Fold 1/5
    Fold 1 best total loss: 0.006565
  🔄 Trial 93, Fold 2/5
    Fold 2 best total loss: 0.016461
  🔄 Trial 93, Fold 3/5
    Fold 3 best total loss: 0.015265
  🔄 Trial 93, Fold 4/5
    Fold 4 best total loss: 0.010555
  🔄 Trial 93, Fold 5/5


[I 2025-09-30 02:50:30,405] Trial 93 finished with value: 0.011538766743615269 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 96, 'decoder_n_layers': 4, 'decoder_dropout': 0.4, 'current_hidden_size': 64, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 90 with value: 0.01037523580598645.


    Fold 5 best total loss: 0.008848
  📊 Trial 93 - Average CV Loss: 0.011539 (±0.003770)
  🔄 Trial 94, Fold 1/5
    Fold 1 best total loss: 0.006888
  🔄 Trial 94, Fold 2/5
    Fold 2 best total loss: 0.013449
  🔄 Trial 94, Fold 3/5
    Fold 3 best total loss: 0.014706
  🔄 Trial 94, Fold 4/5
    Fold 4 best total loss: 0.007735
  🔄 Trial 94, Fold 5/5


[I 2025-09-30 02:56:55,666] Trial 94 finished with value: 0.07781103013549 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 96, 'decoder_n_layers': 4, 'decoder_dropout': 0.4, 'current_hidden_size': 64, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 90 with value: 0.01037523580598645.


    Fold 5 best total loss: 0.346277
  📊 Trial 94 - Average CV Loss: 0.077811 (±0.134268)
  🔄 Trial 95, Fold 1/5
    Fold 1 best total loss: 0.007267
  🔄 Trial 95, Fold 2/5
    Fold 2 best total loss: 0.015714
  🔄 Trial 95, Fold 3/5
    Fold 3 best total loss: 0.014895
  🔄 Trial 95, Fold 4/5
    Fold 4 best total loss: 0.008526
  🔄 Trial 95, Fold 5/5


[I 2025-09-30 03:03:19,086] Trial 95 finished with value: 0.01105070678750053 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 96, 'decoder_n_layers': 4, 'decoder_dropout': 0.4, 'current_hidden_size': 64, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 90 with value: 0.01037523580598645.


    Fold 5 best total loss: 0.008852
  📊 Trial 95 - Average CV Loss: 0.011051 (±0.003523)
  🔄 Trial 96, Fold 1/5
    Fold 1 best total loss: 0.006200
  🔄 Trial 96, Fold 2/5
    Fold 2 best total loss: 0.017766
  🔄 Trial 96, Fold 3/5
    Fold 3 best total loss: 0.014444
  🔄 Trial 96, Fold 4/5
    Fold 4 best total loss: 0.197928
  🔄 Trial 96, Fold 5/5


[I 2025-09-30 03:09:46,074] Trial 96 finished with value: 0.0492623207566794 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 96, 'decoder_n_layers': 4, 'decoder_dropout': 0.4, 'current_hidden_size': 64, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 90 with value: 0.01037523580598645.


    Fold 5 best total loss: 0.009974
  📊 Trial 96 - Average CV Loss: 0.049262 (±0.074436)
  🔄 Trial 97, Fold 1/5
    Fold 1 best total loss: 0.007456
  🔄 Trial 97, Fold 2/5
    Fold 2 best total loss: 0.318505
  🔄 Trial 97, Fold 3/5
    Fold 3 best total loss: 0.013603
  🔄 Trial 97, Fold 4/5
    Fold 4 best total loss: 0.198169
  🔄 Trial 97, Fold 5/5


[I 2025-09-30 03:16:12,748] Trial 97 finished with value: 0.17694908314733765 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 96, 'decoder_n_layers': 4, 'decoder_dropout': 0.4, 'current_hidden_size': 64, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 90 with value: 0.01037523580598645.


    Fold 5 best total loss: 0.347013
  📊 Trial 97 - Average CV Loss: 0.176949 (±0.144790)
  🔄 Trial 98, Fold 1/5
    Fold 1 best total loss: 0.007154
  🔄 Trial 98, Fold 2/5
    Fold 2 best total loss: 0.319546
  🔄 Trial 98, Fold 3/5
    Fold 3 best total loss: 0.015433
  🔄 Trial 98, Fold 4/5
    Fold 4 best total loss: 0.011444
  🔄 Trial 98, Fold 5/5


[I 2025-09-30 03:22:39,281] Trial 98 finished with value: 0.1400912269076798 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 96, 'decoder_n_layers': 4, 'decoder_dropout': 0.4, 'current_hidden_size': 64, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 90 with value: 0.01037523580598645.


    Fold 5 best total loss: 0.346880
  📊 Trial 98 - Average CV Loss: 0.140091 (±0.157941)
  🔄 Trial 99, Fold 1/5
    Fold 1 best total loss: 0.314737
  🔄 Trial 99, Fold 2/5
    Fold 2 best total loss: 0.318782
  🔄 Trial 99, Fold 3/5
    Fold 3 best total loss: 0.023356
  🔄 Trial 99, Fold 4/5
    Fold 4 best total loss: 0.019322
  🔄 Trial 99, Fold 5/5


[I 2025-09-30 03:29:09,642] Trial 99 finished with value: 0.20434297625906767 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 96, 'decoder_n_layers': 4, 'decoder_dropout': 0.4, 'current_hidden_size': 64, 'current_n_layers': 2, 'current_dropout': 0.6}. Best is trial 90 with value: 0.01037523580598645.


    Fold 5 best total loss: 0.345518
  📊 Trial 99 - Average CV Loss: 0.204343 (±0.149802)
  🔄 Trial 100, Fold 1/5
    Fold 1 best total loss: 0.006667
  🔄 Trial 100, Fold 2/5
    Fold 2 best total loss: 0.019275
  🔄 Trial 100, Fold 3/5
    Fold 3 best total loss: 0.013895
  🔄 Trial 100, Fold 4/5
    Fold 4 best total loss: 0.198015
  🔄 Trial 100, Fold 5/5


[I 2025-09-30 03:35:35,443] Trial 100 finished with value: 0.050149848725413906 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 96, 'decoder_n_layers': 4, 'decoder_dropout': 0.4, 'current_hidden_size': 64, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 90 with value: 0.01037523580598645.


    Fold 5 best total loss: 0.012897
  📊 Trial 100 - Average CV Loss: 0.050150 (±0.074041)
  🔄 Trial 101, Fold 1/5
    Fold 1 best total loss: 0.316438
  🔄 Trial 101, Fold 2/5
    Fold 2 best total loss: 0.318465
  🔄 Trial 101, Fold 3/5
    Fold 3 best total loss: 0.014745
  🔄 Trial 101, Fold 4/5
    Fold 4 best total loss: 0.012277
  🔄 Trial 101, Fold 5/5


[I 2025-09-30 03:42:02,019] Trial 101 finished with value: 0.134557080763625 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 96, 'decoder_n_layers': 5, 'decoder_dropout': 0.5, 'current_hidden_size': 64, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 90 with value: 0.01037523580598645.


    Fold 5 best total loss: 0.010860
  📊 Trial 101 - Average CV Loss: 0.134557 (±0.149339)
  🔄 Trial 102, Fold 1/5
    Fold 1 best total loss: 0.007484
  🔄 Trial 102, Fold 2/5
    Fold 2 best total loss: 0.318255
  🔄 Trial 102, Fold 3/5
    Fold 3 best total loss: 0.016750
  🔄 Trial 102, Fold 4/5
    Fold 4 best total loss: 0.202375
  🔄 Trial 102, Fold 5/5


[I 2025-09-30 03:48:26,102] Trial 102 finished with value: 0.1448242523125373 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 96, 'decoder_n_layers': 4, 'decoder_dropout': 0.4, 'current_hidden_size': 64, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 90 with value: 0.01037523580598645.


    Fold 5 best total loss: 0.179257
  📊 Trial 102 - Average CV Loss: 0.144824 (±0.118187)
  🔄 Trial 103, Fold 1/5
    Fold 1 best total loss: 0.006906
  🔄 Trial 103, Fold 2/5
    Fold 2 best total loss: 0.317966
  🔄 Trial 103, Fold 3/5
    Fold 3 best total loss: 0.013581
  🔄 Trial 103, Fold 4/5
    Fold 4 best total loss: 0.197590
  🔄 Trial 103, Fold 5/5


[I 2025-09-30 03:54:50,051] Trial 103 finished with value: 0.10927098568063229 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 96, 'decoder_n_layers': 4, 'decoder_dropout': 0.4, 'current_hidden_size': 64, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 90 with value: 0.01037523580598645.


    Fold 5 best total loss: 0.010312
  📊 Trial 103 - Average CV Loss: 0.109271 (±0.127108)
  🔄 Trial 104, Fold 1/5
    Fold 1 best total loss: 0.009932
  🔄 Trial 104, Fold 2/5
    Fold 2 best total loss: 0.018078
  🔄 Trial 104, Fold 3/5
    Fold 3 best total loss: 0.019715
  🔄 Trial 104, Fold 4/5
    Fold 4 best total loss: 0.009551
  🔄 Trial 104, Fold 5/5


[I 2025-09-30 04:01:14,787] Trial 104 finished with value: 0.014249698049388826 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 96, 'decoder_n_layers': 4, 'decoder_dropout': 0.4, 'current_hidden_size': 16, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 90 with value: 0.01037523580598645.


    Fold 5 best total loss: 0.013973
  📊 Trial 104 - Average CV Loss: 0.014250 (±0.004131)
  🔄 Trial 105, Fold 1/5
    Fold 1 best total loss: 0.007453
  🔄 Trial 105, Fold 2/5
    Fold 2 best total loss: 0.016064
  🔄 Trial 105, Fold 3/5
    Fold 3 best total loss: 0.014783
  🔄 Trial 105, Fold 4/5
    Fold 4 best total loss: 0.010080
  🔄 Trial 105, Fold 5/5


[I 2025-09-30 04:07:38,976] Trial 105 finished with value: 0.01201908343937248 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 96, 'decoder_n_layers': 3, 'decoder_dropout': 0.5, 'current_hidden_size': 64, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 90 with value: 0.01037523580598645.


    Fold 5 best total loss: 0.011715
  📊 Trial 105 - Average CV Loss: 0.012019 (±0.003121)
  🔄 Trial 106, Fold 1/5
    Fold 1 best total loss: 0.006054
  🔄 Trial 106, Fold 2/5
    Fold 2 best total loss: 0.017833
  🔄 Trial 106, Fold 3/5
    Fold 3 best total loss: 0.017654
  🔄 Trial 106, Fold 4/5
    Fold 4 best total loss: 0.010149
  🔄 Trial 106, Fold 5/5


[I 2025-09-30 04:14:02,780] Trial 106 finished with value: 0.0122321946779266 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 96, 'decoder_n_layers': 3, 'decoder_dropout': 0.5, 'current_hidden_size': 64, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 90 with value: 0.01037523580598645.


    Fold 5 best total loss: 0.009471
  📊 Trial 106 - Average CV Loss: 0.012232 (±0.004710)
  🔄 Trial 107, Fold 1/5
    Fold 1 best total loss: 0.007563
  🔄 Trial 107, Fold 2/5
    Fold 2 best total loss: 0.319399
  🔄 Trial 107, Fold 3/5
    Fold 3 best total loss: 0.013876
  🔄 Trial 107, Fold 4/5
    Fold 4 best total loss: 0.011233
  🔄 Trial 107, Fold 5/5


[I 2025-09-30 04:20:28,987] Trial 107 finished with value: 0.1399868838780094 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 96, 'decoder_n_layers': 3, 'decoder_dropout': 0.6, 'current_hidden_size': 64, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 90 with value: 0.01037523580598645.


    Fold 5 best total loss: 0.347864
  📊 Trial 107 - Average CV Loss: 0.139987 (±0.158379)
  🔄 Trial 108, Fold 1/5
    Fold 1 best total loss: 0.006524
  🔄 Trial 108, Fold 2/5
    Fold 2 best total loss: 0.319618
  🔄 Trial 108, Fold 3/5
    Fold 3 best total loss: 0.014300
  🔄 Trial 108, Fold 4/5
    Fold 4 best total loss: 0.014824
  🔄 Trial 108, Fold 5/5


[I 2025-09-30 04:26:55,086] Trial 108 finished with value: 0.07297651512781159 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.5, 'decoder_hidden_size': 96, 'decoder_n_layers': 3, 'decoder_dropout': 0.5, 'current_hidden_size': 64, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 90 with value: 0.01037523580598645.


    Fold 5 best total loss: 0.009616
  📊 Trial 108 - Average CV Loss: 0.072977 (±0.123359)
  🔄 Trial 109, Fold 1/5
    Fold 1 best total loss: 0.313984
  🔄 Trial 109, Fold 2/5
    Fold 2 best total loss: 0.015887
  🔄 Trial 109, Fold 3/5
    Fold 3 best total loss: 0.223597
  🔄 Trial 109, Fold 4/5
    Fold 4 best total loss: 0.008605
  🔄 Trial 109, Fold 5/5


[I 2025-09-30 04:33:21,309] Trial 109 finished with value: 0.1144974802969955 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 96, 'decoder_n_layers': 5, 'decoder_dropout': 0.5, 'current_hidden_size': 64, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 90 with value: 0.01037523580598645.


    Fold 5 best total loss: 0.010414
  📊 Trial 109 - Average CV Loss: 0.114497 (±0.129204)
  🔄 Trial 110, Fold 1/5
    Fold 1 best total loss: 0.316003
  🔄 Trial 110, Fold 2/5
    Fold 2 best total loss: 0.018803
  🔄 Trial 110, Fold 3/5
    Fold 3 best total loss: 0.017659
  🔄 Trial 110, Fold 4/5
    Fold 4 best total loss: 0.013967
  🔄 Trial 110, Fold 5/5


[I 2025-09-30 04:39:44,919] Trial 110 finished with value: 0.1151801246451214 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 96, 'decoder_n_layers': 3, 'decoder_dropout': 0.4, 'current_hidden_size': 64, 'current_n_layers': 2, 'current_dropout': 0.5}. Best is trial 90 with value: 0.01037523580598645.


    Fold 5 best total loss: 0.209468
  📊 Trial 110 - Average CV Loss: 0.115180 (±0.125111)
  🔄 Trial 111, Fold 1/5
    Fold 1 best total loss: 0.005617
  🔄 Trial 111, Fold 2/5
    Fold 2 best total loss: 0.018227
  🔄 Trial 111, Fold 3/5
    Fold 3 best total loss: 0.014943
  🔄 Trial 111, Fold 4/5
    Fold 4 best total loss: 0.010206
  🔄 Trial 111, Fold 5/5


[I 2025-09-30 04:46:11,927] Trial 111 finished with value: 0.012059571006102488 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 96, 'decoder_n_layers': 4, 'decoder_dropout': 0.4, 'current_hidden_size': 64, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 90 with value: 0.01037523580598645.


    Fold 5 best total loss: 0.011304
  📊 Trial 111 - Average CV Loss: 0.012060 (±0.004286)
  🔄 Trial 112, Fold 1/5
    Fold 1 best total loss: 0.007873
  🔄 Trial 112, Fold 2/5
    Fold 2 best total loss: 0.013523
  🔄 Trial 112, Fold 3/5
    Fold 3 best total loss: 0.014931
  🔄 Trial 112, Fold 4/5
    Fold 4 best total loss: 0.009744
  🔄 Trial 112, Fold 5/5


[I 2025-09-30 04:52:37,109] Trial 112 finished with value: 0.011060933908447623 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 96, 'decoder_n_layers': 4, 'decoder_dropout': 0.4, 'current_hidden_size': 48, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 90 with value: 0.01037523580598645.


    Fold 5 best total loss: 0.009234
  📊 Trial 112 - Average CV Loss: 0.011061 (±0.002694)

📊 OPTIMIZATION RESULTS
✅ 완료된 trials: 113
🏆 최고 성능 trial: 90
💯 최고 성능 값: 0.010375

🎯 최적 하이퍼파라미터:
   lstm_hidden_size: 72
   lstm_n_layers: 4
   lstm_dropout: 0.4
   decoder_hidden_size: 96
   decoder_n_layers: 3
   decoder_dropout: 0.4
   current_hidden_size: 48
   current_n_layers: 2
   current_dropout: 0.1

📈 상위 5개 Trials:
   1. Trial 90: 0.010375
   2. Trial 95: 0.011051
   3. Trial 59: 0.011053
   4. Trial 112: 0.011061
   5. Trial 92: 0.011115
💾 모든 trials 결과가 저장되었습니다: bmed_optuna_trials_20250930_045237.csv
💾 SQLite 데이터베이스에 실시간 저장됨: sqlite:///bmed_hpopt_study.db
   - 중단 후 재시작 시 자동으로 기존 결과를 불러옵니다
   - 다른 프로세스에서 진행상황 모니터링 가능합니다
🎉 하이퍼파라미터 최적화 완료!
