In [34]:
# import libraries
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import TensorDataset, DataLoader, Subset
from sklearn.model_selection import KFold
import optuna
from datetime import datetime
from optuna.trial import TrialState
import numpy as np

In [35]:
# LSTM with layer normalization
class LayerNormLSTM(nn.Module):
    def __init__(self, input_node, hidden_node):
        super().__init__()
        self.input_node = input_node
        self.hidden_node = hidden_node

        self.w_x = nn.Linear(input_node, 4*hidden_node, bias=False)
        self.w_h = nn.Linear(hidden_node, 4*hidden_node, bias=False)
        self.ln = nn.LayerNorm(4 * hidden_node)
        self.ln_c = nn.LayerNorm(hidden_node)

    def forward(self, x, hx):
        h, c = hx

        gates = self.w_x(x) + self.w_h(h)
        gates = self.ln(gates)

        i, f, g, o = gates.chunk(4, dim=1)

        i = torch.sigmoid(i)
        f = torch.sigmoid(f)
        o = torch.sigmoid(o)
        g = torch.tanh(g)

        c_new = f * c + i * g
        c_new = self.ln_c(c_new)
        h_new = o * torch.tanh(c_new)

        return h_new, c_new

In [36]:
# State feature extractor using LayerNorm LSTM
class StateExtr(nn.Module):
    def __init__(self, input_node, hidden_node, n_layer, dropout):
        super().__init__()
        self.hidden_node = hidden_node
        self.n_layer = n_layer
        self.input_node = input_node

        self.lstm_cells = nn.ModuleList()
        self.lstm_cells.append(LayerNormLSTM(input_node, hidden_node))

        for i in range(n_layer - 1):
            self.lstm_cells.append(LayerNormLSTM(hidden_node, hidden_node))

        self.dropout = nn.Dropout(dropout)

    def forward(self, x, seq_len):
        batch_size, max_len, _ = x.size()
        device = x.device

        h_states = []
        c_states = []

        for _ in range(self.n_layer):
            h_states.append(torch.zeros(batch_size, self.hidden_node, device=device))
            c_states.append(torch.zeros(batch_size, self.hidden_node, device=device))

        outputs = []
        for t in range(max_len):
            x_t = x[:, t, :]

            layer_input = x_t # initialize layer input with input tensor
            for layer_idx, lstm_cell in enumerate(self.lstm_cells):
                h_new, c_new = lstm_cell(layer_input, (h_states[layer_idx], c_states[layer_idx]))
                
                h_states[layer_idx] = h_new
                c_states[layer_idx] = c_new

                if layer_idx < len(self.lstm_cells) - 1:
                    layer_input = self.dropout(h_new)
                else:
                    layer_input = h_new

            outputs.append(layer_input)

        output_tensor = torch.stack(outputs, dim=1)
        seq_len_cpu = seq_len.detach().cpu().long()
        mask = torch.arange(max_len, device='cpu')[None, :] < seq_len_cpu[:, None]
        mask = mask.float().to(device).unsqueeze(-1)

        masked_output = output_tensor * mask
        return self.dropout(masked_output)

In [37]:
# Physical change regressor
class PhysRegr(nn.Module):
    def __init__(self, input_node, output_node, n_layer, hidden_node, dropout):
        super().__init__()

        layers = []

        layers.extend([
            nn.Linear(input_node, hidden_node),
            nn.ReLU(),
            nn.Dropout(dropout)
        ])

        for _ in range(n_layer - 1):
            layers.extend([
                nn.Linear(hidden_node, hidden_node),
                nn.ReLU(),
                nn.Dropout(dropout)
            ])
        
        layers.append(nn.Linear(hidden_node, output_node))
        layers.append(nn.Sigmoid())

        self.layers = nn.Sequential(*layers)

    def forward(self, hidden_states):
        return self.layers(hidden_states)

In [38]:
# Current regressor
class CurrRegr(nn.Module):
    def __init__(self, input_node, hidden_node, n_layer, dropout):
        super().__init__()

        layers = []

        layers.extend([
            nn.Linear(input_node, hidden_node),
            nn.ReLU(),
            nn.Dropout(dropout)
        ])

        for _ in range(n_layer - 1):
            layers.extend([
                nn.Linear(hidden_node, hidden_node),
                nn.ReLU(),
                nn.Dropout(dropout)
            ])
        
        layers.append(nn.Linear(hidden_node, 1))

        self.layers = nn.Sequential(*layers)

    def forward(self, hidden_states):
        return self.layers(hidden_states)   

In [39]:
# Physical Constraint Layer
class PhysConstr(nn.Module):
    def __init__(self, range_mm, curr_regr, eps=1e-2):
        super().__init__()
        
        self.eps = eps
        self.curr_regr = curr_regr
        self.register_buffer('range_mm_tensor',self._range2tensor(range_mm))

    def _range2tensor(self, range_mm):
        feature_names = ['V', 'E', 'VF', 'VA', 'VB', 'CFLA', 'CALA', 'CFK', 'CBK', 'I']
        ranges = torch.zeros(len(feature_names), 2)

        for i, name in enumerate(feature_names):
            ranges[i, 0] = range_mm[name]['min']
            ranges[i, 1] = range_mm[name]['max']

        return ranges

    def _norm_tensor(self, data, feature_idx):
        min_val = self.range_mm_tensor[feature_idx, 0]
        max_val = self.range_mm_tensor[feature_idx, 1]
        return (data - min_val) / (max_val - min_val)

    def _denorm_tensor(self, norm_data, feature_idx):
        min_val = self.range_mm_tensor[feature_idx, 0]
        max_val = self.range_mm_tensor[feature_idx, 1]
        return norm_data * (max_val - min_val) + min_val

    def forward(self, physical_changes, current_state):
        V_idx, E_idx, VF_idx, VA_idx, VB_idx = 0, 1, 2, 3, 4
        CFLA_idx, CALA_idx, CFK_idx, CBK_idx, I_idx = 5, 6, 7, 8, 9

        VF = self._denorm_tensor(current_state[..., 2:3], VF_idx)
        VA = self._denorm_tensor(current_state[..., 3:4], VA_idx)
        VB = self._denorm_tensor(current_state[..., 4:5], VB_idx)
        CFLA = self._denorm_tensor(current_state[..., 5:6], CFLA_idx)
        CALA = self._denorm_tensor(current_state[..., 6:7], CALA_idx)
        CFK = self._denorm_tensor(current_state[..., 7:8], CFK_idx)
        CBK = self._denorm_tensor(current_state[..., 8:9], CBK_idx)

        NFLA = CFLA * VF
        NALA = CALA * VA
        NFK = CFK * VF
        NBK = CBK * VB

        rdVA = physical_changes[..., 0:1]
        rdVB = physical_changes[..., 1:2]
        rLA2K = physical_changes[..., 2:3]
        rdNBK = physical_changes[..., 3:4]

        dVA = VF*(rdVA -0.5)
        dVB = VF*(rdVB - 0.5)
        dNBK = NFK*rdNBK
        dNALA = dNBK * rLA2K

        nVF = VF - dVA - dVB
        nVA = VA + dVA
        nVB = VB + dVB
        
        nNFLA = NFLA - dNALA
        nNALA = NALA + dNALA
        nNFK = NFK - dNBK
        nNBK = NBK + dNBK

        nCFLA = nNFLA / nVF
        nCALA = nNALA / nVA
        nCFK = nNFK / nVF
        nCBK = nNBK / nVB

        V = current_state[..., 0:1]
        E = current_state[..., 1:2]
        nVF_norm = self._norm_tensor(nVF, VF_idx)
        nVA_norm = self._norm_tensor(nVA, VA_idx)
        nVB_norm = self._norm_tensor(nVB, VB_idx)
        nCFLA_norm = self._norm_tensor(nCFLA, CFLA_idx)
        nCALA_norm = self._norm_tensor(nCALA, CALA_idx)
        nCFK_norm = self._norm_tensor(nCFK, CFK_idx)
        nCBK_norm = self._norm_tensor(nCBK, CBK_idx)

        temp_state = torch.cat([
            V, E, nVF_norm, nVA_norm, nVB_norm, nCFLA_norm, nCALA_norm, nCFK_norm, nCBK_norm
        ], dim=-1)
        
        nI_pred_norm = self.curr_regr(temp_state)
        nI_real = self._denorm_tensor(nI_pred_norm, I_idx)
        nI_real = torch.clamp(nI_real, min=0.0)
        nI_norm = self._norm_tensor(nI_real, I_idx)

        next_state = torch.cat([
            V, E, nVF_norm, nVA_norm, nVB_norm, nCFLA_norm, nCALA_norm, nCFK_norm, nCBK_norm, nI_norm
        ], dim=-1)
        
        return next_state

In [40]:
class BMEDAutoregressiveModel(nn.Module):
    def __init__(self, state_extr_params, phys_regr_params, curr_regr_params, range_mm):
        super().__init__()
        self.state_extr = StateExtr(**state_extr_params)
        self.phys_regr = PhysRegr(**phys_regr_params)
        self.curr_regr = CurrRegr(**curr_regr_params)
        self.phys_constr = PhysConstr(range_mm, self.curr_regr)

    def forward(self, x, seq_len):
        hidden_states = self.state_extr(x, seq_len)
        physical_changes = self.phys_regr(hidden_states)
        new_x = self.phys_constr(physical_changes, x)
        return new_x

In [41]:
class NoamScheduler:
    def __init__(self, optimizer, model_size, warmup_epochs, factor=1.0):
        self.optimizer = optimizer
        self.model_size = model_size
        self.warmup_epochs = warmup_epochs
        self.factor = 1
        self.epoch_num = 0

    def step_epoch(self):
        self.epoch_num += 1
        lr = self.factor * (
            self.model_size ** (-0.5) *
            min(self.epoch_num ** (-0.5), self.epoch_num * self.warmup_epochs ** (-1.5))
        )
        for param_group in self.optimizer.param_groups:
            param_group['lr'] = lr
        return lr

In [42]:
# 유틸리티 함수들
def df_treat(name):
    df = pd.read_csv(name)
    ndf = pd.DataFrame()
    range_mm={
        'V': {'min':df['V'].min()*0.8, 'max': df['V'].max()*1.2},
        'E': {'min':df['E'].min()*0.8, 'max': df['E'].max()*1.2},
        'VF': {'min':df['VF'].min()*0.8, 'max': df['VF'].max()*1.2},
        'VA': {'min':df['VA'].min()*0.8, 'max': df['VA'].max()*1.2},
        'VB': {'min':df['VB'].min()*0.8, 'max': df['VB'].max()*1.2},
        'CFLA': {'min':0, 'max': df['CFLA'].max()*1.2},
        'CALA': {'min':0, 'max': df['CALA'].max()*1.2},
        'CFK': {'min':0, 'max': df['CFK'].max()*1.2},
        'CBK': {'min':0, 'max': df['CBK'].max()*1.2},
        'I': {'min':0, 'max': df['I'].max()*1.2},
    }
    ndf['exp'] = df['exp']; ndf['t'] = df['t']

    for col in ['V', 'E', 'VF', 'VA', 'VB', 'CFLA', 'CALA', 'CFK', 'CBK', 'I']:
        if col in range_mm:
            ndf[col] = (df[col] - range_mm[col]['min'])/(range_mm[col]['max'] - range_mm[col]['min'])
        else:
            ndf[col] = df[col]

    exp_num_list = sorted(ndf['exp'].unique())
    return df, ndf, range_mm, exp_num_list

def seq_data(ndf, exp_num_list):
    seq = []
    feature_cols = ['V', 'E', 'VF', 'VA', 'VB', 'CFLA', 'CALA', 'CFK', 'CBK', 'I']
    for exp in exp_num_list:
        exp_df = ndf[ndf['exp'] == exp]
        seq.append(exp_df[feature_cols].values)
    return seq

def pad_seq(seq):
    max_len = max([len(s) for s in seq])
    seq_len = [len(s) for s in seq]
    pad_seq = pad_sequence([torch.tensor(s) for s in seq], batch_first=True, padding_value=-1)
    return pad_seq, seq_len, max_len

def gen_dataset(pad_seq, seq_len):
    input_tensor = pad_seq.float()
    seq_len_tensor = torch.tensor(seq_len)
    dataset = TensorDataset(input_tensor, seq_len_tensor)
    return dataset

def masked_mse_loss(pred, target, seq_len):
    batch_size, max_len, features = pred.shape
    seq_len_cpu = seq_len.detach().cpu().long()
    mask = torch.arange(max_len, device='cpu')[None, :] < seq_len_cpu[:, None]
    mask = mask.float().to(pred.device)
    loss = F.mse_loss(pred, target, reduction='none')
    masked_loss = loss * mask.unsqueeze(-1)
    total_loss = masked_loss.sum()
    total_elements = mask.sum()
    masked_loss = total_loss / total_elements
    return masked_loss

def tf_data(input_seq, seq_len):
    inputs = input_seq[:, :-1, :-1]
    targets = input_seq[:, 1:, :]
    target_seq_len = seq_len - 1
    return inputs, targets, target_seq_len

In [43]:
# Optuna 목적 함수
def objective(trial):
    """
    Optuna trial을 위한 목적 함수
    K-fold cross validation을 사용하여 하이퍼파라미터 최적화
    """
    
    # 1. 하이퍼파라미터 제안
    # LSTM StateExtractor 파라미터
    lstm_hidden_size = trial.suggest_categorical('lstm_hidden_size', [16, 32, 48, 64, 72, 96])
    lstm_n_layers = trial.suggest_int('lstm_n_layers', 2, 6, step=1)
    lstm_dropout = trial.suggest_float('lstm_dropout', 0.1, 0.5, step=0.1)
    
    # PhysicalChangeDecoder 파라미터
    decoder_hidden_size = trial.suggest_categorical('decoder_hidden_size', [16, 32, 48, 64, 72, 96])
    decoder_n_layers = trial.suggest_int('decoder_n_layers', 2, 6, step=1)
    decoder_dropout = trial.suggest_float('decoder_dropout', 0.1, 0.6, step=0.1)
    
    # CurrentPredictor 파라미터
    current_hidden_size = trial.suggest_categorical('current_hidden_size', [16, 32, 48, 64, 72, 96])
    current_n_layers = trial.suggest_int('current_n_layers', 2, 6, step=1)
    current_dropout = trial.suggest_float('current_dropout', 0.1, 0.6, step=0.1)
    
    # 2. K-fold Cross Validation
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    n_splits = 5
    kfold = KFold(n_splits=n_splits, shuffle=True, random_state=42)
    
    fold_losses = []
    
    # 데이터 로드 (global 변수 사용)
    indices = list(range(len(dataset)))
    
    for fold, (train_idx, val_idx) in enumerate(kfold.split(indices)):
        print(f"  🔄 Trial {trial.number}, Fold {fold+1}/{n_splits}")
        
        # 폴드별 데이터셋 준비
        train_subset = Subset(dataset, train_idx)
        val_subset = Subset(dataset, val_idx)
        
        train_loader = DataLoader(train_subset, batch_size=3, shuffle=True)
        val_loader = DataLoader(val_subset, batch_size=3, shuffle=False)
        
        # 3. 모델 파라미터 설정
        state_extr_params = {
            'input_node': 9,
            'hidden_node': lstm_hidden_size,
            'n_layer': lstm_n_layers,
            'dropout': lstm_dropout
        }
        
        phys_regr_params = {
            'input_node': lstm_hidden_size,
            'hidden_node': decoder_hidden_size,
            'n_layer': decoder_n_layers,
            'dropout': decoder_dropout,
            'output_node': 4
        }
        
        curr_regr_params = {
            'input_node': 9,
            'hidden_node': current_hidden_size,
            'n_layer': current_n_layers,
            'dropout': current_dropout
        }
        
        # 4. 모델 초기화
        model = BMEDAutoregressiveModel(state_extr_params, phys_regr_params, curr_regr_params, range_mm)
        model = model.to(device)
        
        # 5. 옵티마이저 및 스케줄러 설정
        optimizer = torch.optim.AdamW(model.parameters(), lr=1.0)
        
        # 총 에포크 수와 warmup 에포크 계산
        total_epochs = 100  # Optuna 최적화를 위해 에포크 수 감소
        warmup_epochs = int(total_epochs * 0.1)
        
        scheduler = NoamScheduler(
            optimizer, 
            model_size=lstm_hidden_size,
            warmup_epochs=warmup_epochs,
            factor=1
        )
        
        # 6. 훈련
        best_total_loss = float('inf')
        
        for epoch in range(total_epochs):
            # Learning rate 업데이트
            current_lr = scheduler.step_epoch()
            
            # 훈련
            model.train()
            train_loss = 0.0
            train_batches = 0
            
            for input_seq, seq_len in train_loader:
                try:
                    input_seq = input_seq.to(device)
                    seq_len = seq_len.to(device)
                    
                    inputs, targets, target_seq_len = tf_data(input_seq, seq_len)
                    
                    optimizer.zero_grad()
                    pred = model(inputs, target_seq_len)
                    loss = masked_mse_loss(pred, targets, target_seq_len)
                    
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                    optimizer.step()
                    
                    train_loss += loss.item()
                    train_batches += 1
                    
                except Exception as e:
                    print(f"❌ Error in training: {str(e)}")
                    continue
            
            if train_batches == 0:
                break
                
            train_loss = train_loss / train_batches
            
            # 검증
            model.eval()
            val_loss = 0.0
            val_batches = 0
            
            with torch.no_grad():
                for input_seq, seq_len in val_loader:
                    try:
                        input_seq = input_seq.to(device)
                        seq_len = seq_len.to(device)
                        
                        inputs, targets, target_seq_len = tf_data(input_seq, seq_len)
                        
                        pred = model(inputs, target_seq_len)
                        loss = masked_mse_loss(pred, targets, target_seq_len)
                        
                        val_loss += loss.item()
                        val_batches += 1
                        
                    except Exception as e:
                        continue
            
            if val_batches == 0:
                break
                
            val_loss = val_loss / val_batches
            
            # Calculate total loss
            total_loss = train_loss + val_loss
            
            # Early stopping
            if total_loss < best_total_loss:
                best_total_loss = total_loss
        
        fold_losses.append(best_total_loss)
        print(f"    Fold {fold+1} best total loss: {best_total_loss:.6f}")
        
        # 메모리 정리
        del model, optimizer, scheduler
        torch.cuda.empty_cache()
    
    # 7. K-fold 평균 손실 반환
    avg_loss = np.mean(fold_losses)
    std_loss = np.std(fold_losses)
    
    print(f"  📊 Trial {trial.number} - Average CV Loss: {avg_loss:.6f} (±{std_loss:.6f})")
    
    return avg_loss

In [44]:
# 메인 최적화 함수
def run_optuna_optimization():
    """Optuna를 사용한 하이퍼파라미터 최적화 실행"""
    
    print("🚀 BMED TF Model Hyperparameter Optimization with Optuna")
    print("="*80)
    
    # 전역 데이터 로드
    global dataset, range_mm
    
    print("📋 데이터 로드 중...")
    df, ndf, range_mm, exp_num_list = df_treat('BMED_DATA_AG.csv')
    seq = seq_data(ndf, exp_num_list)
    pad, seq_len, max_len = pad_seq(seq)
    dataset = gen_dataset(pad, seq_len)
    
    print(f"   - 총 실험 개수: {len(exp_num_list)}")
    print(f"   - 총 데이터 포인트: {len(dataset)}")
    print(f"   - 최대 시퀀스 길이: {max_len}")
    
    # SQLite 데이터베이스를 사용한 Optuna study 생성
    #timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    timestamp = '20250915_234452'
    db_url = f"sqlite:///bmed_hpopt_study.db"
    
    study = optuna.create_study(
        direction='minimize',
        study_name='bmed_tf_optimization',
        sampler=optuna.samplers.TPESampler(seed=42),
        storage=db_url,
        load_if_exists=True
    )
    
    # 최적화 실행
    n_trials = 100
    print(f"🔍 최적화 시작 (총 {n_trials} trials)")
    
    try:
        study.optimize(objective, n_trials=n_trials, timeout=None)
    except KeyboardInterrupt:
        print("\n⚠️ 최적화가 사용자에 의해 중단되었습니다.")
    
    # 결과 분석
    print("\n" + "="*80)
    print("📊 OPTIMIZATION RESULTS")
    print("="*80)
    
    print(f"✅ 완료된 trials: {len(study.trials)}")
    print(f"🏆 최고 성능 trial: {study.best_trial.number}")
    print(f"💯 최고 성능 값: {study.best_value:.6f}")
    
    print(f"\n🎯 최적 하이퍼파라미터:")
    for key, value in study.best_params.items():
        print(f"   {key}: {value}")
    
    # 상위 5개 trial 정보
    print(f"\n📈 상위 5개 Trials:")
    trials_df = study.trials_dataframe().sort_values('value').head(5)
    for idx, (_, trial) in enumerate(trials_df.iterrows()):
        print(f"   {idx+1}. Trial {int(trial['number'])}: {trial['value']:.6f}")
    
    # 결과 저장
    result_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    # Trials 결과 CSV로 저장
    trials_file = f"bmed_optuna_trials_{result_timestamp}.csv"
    trials_df = study.trials_dataframe()
    trials_df.to_csv(trials_file, index=False)
    print(f"💾 모든 trials 결과가 저장되었습니다: {trials_file}")
    
    # SQLite 데이터베이스 정보
    print(f"💾 SQLite 데이터베이스에 실시간 저장됨: {db_url}")
    print(f"   - 중단 후 재시작 시 자동으로 기존 결과를 불러옵니다")
    print(f"   - 다른 프로세스에서 진행상황 모니터링 가능합니다")
    
    print("="*80)
    print("🎉 하이퍼파라미터 최적화 완료!")
    
    return study

if __name__ == "__main__":
    study = run_optuna_optimization()

[I 2025-10-09 23:32:51,669] A new study created in RDB with name: bmed_tf_optimization


🚀 BMED TF Model Hyperparameter Optimization with Optuna
📋 데이터 로드 중...
   - 총 실험 개수: 15
   - 총 데이터 포인트: 15
   - 최대 시퀀스 길이: 37
🔍 최적화 시작 (총 100 trials)
  🔄 Trial 0, Fold 1/5
    Fold 1 best total loss: 0.011586
  🔄 Trial 0, Fold 2/5
    Fold 2 best total loss: 0.016059
  🔄 Trial 0, Fold 3/5
    Fold 3 best total loss: 0.024833
  🔄 Trial 0, Fold 4/5
    Fold 4 best total loss: 0.029543
  🔄 Trial 0, Fold 5/5


[I 2025-10-09 23:35:15,231] Trial 0 finished with value: 0.019913131697103383 and parameters: {'lstm_hidden_size': 32, 'lstm_n_layers': 2, 'lstm_dropout': 0.5, 'decoder_hidden_size': 64, 'decoder_n_layers': 2, 'decoder_dropout': 0.2, 'current_hidden_size': 72, 'current_n_layers': 3, 'current_dropout': 0.30000000000000004}. Best is trial 0 with value: 0.019913131697103383.


    Fold 5 best total loss: 0.017545
  📊 Trial 0 - Average CV Loss: 0.019913 (±0.006430)
  🔄 Trial 1, Fold 1/5
    Fold 1 best total loss: 0.027200
  🔄 Trial 1, Fold 2/5
    Fold 2 best total loss: 0.019491
  🔄 Trial 1, Fold 3/5
    Fold 3 best total loss: 0.027404
  🔄 Trial 1, Fold 4/5
    Fold 4 best total loss: 0.019561
  🔄 Trial 1, Fold 5/5


[I 2025-10-09 23:41:05,014] Trial 1 finished with value: 0.02260777198243886 and parameters: {'lstm_hidden_size': 32, 'lstm_n_layers': 5, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 5, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 64, 'current_n_layers': 3, 'current_dropout': 0.4}. Best is trial 0 with value: 0.019913131697103383.


    Fold 5 best total loss: 0.019382
  📊 Trial 1 - Average CV Loss: 0.022608 (±0.003834)
  🔄 Trial 2, Fold 1/5
    Fold 1 best total loss: 0.315511
  🔄 Trial 2, Fold 2/5
    Fold 2 best total loss: 0.016537
  🔄 Trial 2, Fold 3/5
    Fold 3 best total loss: 0.019712
  🔄 Trial 2, Fold 4/5
    Fold 4 best total loss: 0.014096
  🔄 Trial 2, Fold 5/5


[I 2025-10-09 23:45:42,067] Trial 2 finished with value: 0.07623667320003732 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 4, 'lstm_dropout': 0.5, 'decoder_hidden_size': 72, 'decoder_n_layers': 6, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 96, 'current_n_layers': 5, 'current_dropout': 0.2}. Best is trial 0 with value: 0.019913131697103383.


    Fold 5 best total loss: 0.015327
  📊 Trial 2 - Average CV Loss: 0.076237 (±0.119652)
  🔄 Trial 3, Fold 1/5
    Fold 1 best total loss: 0.033728
  🔄 Trial 3, Fold 2/5
    Fold 2 best total loss: 0.099750
  🔄 Trial 3, Fold 3/5
    Fold 3 best total loss: 0.226241
  🔄 Trial 3, Fold 4/5
    Fold 4 best total loss: 0.042650
  🔄 Trial 3, Fold 5/5


[I 2025-10-09 23:49:15,085] Trial 3 finished with value: 0.09484804426319897 and parameters: {'lstm_hidden_size': 32, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 16, 'decoder_n_layers': 5, 'decoder_dropout': 0.4, 'current_hidden_size': 16, 'current_n_layers': 5, 'current_dropout': 0.30000000000000004}. Best is trial 0 with value: 0.019913131697103383.


    Fold 5 best total loss: 0.071872
  📊 Trial 3 - Average CV Loss: 0.094848 (±0.069680)
  🔄 Trial 4, Fold 1/5
    Fold 1 best total loss: 0.055547
  🔄 Trial 4, Fold 2/5
    Fold 2 best total loss: 0.091778
  🔄 Trial 4, Fold 3/5
    Fold 3 best total loss: 0.045753
  🔄 Trial 4, Fold 4/5
    Fold 4 best total loss: 0.049969
  🔄 Trial 4, Fold 5/5


[I 2025-10-09 23:52:42,496] Trial 4 finished with value: 0.06321743852458894 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 3, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 16, 'decoder_n_layers': 3, 'decoder_dropout': 0.1, 'current_hidden_size': 16, 'current_n_layers': 6, 'current_dropout': 0.4}. Best is trial 0 with value: 0.019913131697103383.


    Fold 5 best total loss: 0.073040
  📊 Trial 4 - Average CV Loss: 0.063217 (±0.017038)
  🔄 Trial 5, Fold 1/5
    Fold 1 best total loss: 0.101596
  🔄 Trial 5, Fold 2/5
    Fold 2 best total loss: 0.020421
  🔄 Trial 5, Fold 3/5
    Fold 3 best total loss: 0.019629
  🔄 Trial 5, Fold 4/5
    Fold 4 best total loss: 0.017517
  🔄 Trial 5, Fold 5/5


[I 2025-10-09 23:59:17,608] Trial 5 finished with value: 0.03642082493752241 and parameters: {'lstm_hidden_size': 32, 'lstm_n_layers': 6, 'lstm_dropout': 0.5, 'decoder_hidden_size': 32, 'decoder_n_layers': 6, 'decoder_dropout': 0.2, 'current_hidden_size': 64, 'current_n_layers': 4, 'current_dropout': 0.2}. Best is trial 0 with value: 0.019913131697103383.


    Fold 5 best total loss: 0.022941
  📊 Trial 5 - Average CV Loss: 0.036421 (±0.032634)
  🔄 Trial 6, Fold 1/5
    Fold 1 best total loss: 0.317571
  🔄 Trial 6, Fold 2/5
    Fold 2 best total loss: 0.014572
  🔄 Trial 6, Fold 3/5
    Fold 3 best total loss: 0.016941
  🔄 Trial 6, Fold 4/5
    Fold 4 best total loss: 0.012692
  🔄 Trial 6, Fold 5/5


[I 2025-10-10 00:05:53,451] Trial 6 finished with value: 0.0754659479483962 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 6, 'lstm_dropout': 0.2, 'decoder_hidden_size': 48, 'decoder_n_layers': 3, 'decoder_dropout': 0.5, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.2}. Best is trial 0 with value: 0.019913131697103383.


    Fold 5 best total loss: 0.015554
  📊 Trial 6 - Average CV Loss: 0.075466 (±0.121061)
  🔄 Trial 7, Fold 1/5
    Fold 1 best total loss: 0.025590
  🔄 Trial 7, Fold 2/5
    Fold 2 best total loss: 0.028072
  🔄 Trial 7, Fold 3/5
    Fold 3 best total loss: 0.030069
  🔄 Trial 7, Fold 4/5
    Fold 4 best total loss: 0.026878
  🔄 Trial 7, Fold 5/5


[I 2025-10-10 00:11:16,019] Trial 7 finished with value: 0.028423114400357007 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 5, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 6, 'decoder_dropout': 0.6, 'current_hidden_size': 48, 'current_n_layers': 2, 'current_dropout': 0.6}. Best is trial 0 with value: 0.019913131697103383.


    Fold 5 best total loss: 0.031507
  📊 Trial 7 - Average CV Loss: 0.028423 (±0.002133)
  🔄 Trial 8, Fold 1/5
    Fold 1 best total loss: 0.315123
  🔄 Trial 8, Fold 2/5
    Fold 2 best total loss: 0.319119
  🔄 Trial 8, Fold 3/5
    Fold 3 best total loss: 0.075013
  🔄 Trial 8, Fold 4/5
    Fold 4 best total loss: 0.205025
  🔄 Trial 8, Fold 5/5


[I 2025-10-10 00:17:44,501] Trial 8 finished with value: 0.20203491467982532 and parameters: {'lstm_hidden_size': 16, 'lstm_n_layers': 6, 'lstm_dropout': 0.4, 'decoder_hidden_size': 64, 'decoder_n_layers': 2, 'decoder_dropout': 0.4, 'current_hidden_size': 64, 'current_n_layers': 5, 'current_dropout': 0.2}. Best is trial 0 with value: 0.019913131697103383.


    Fold 5 best total loss: 0.095895
  📊 Trial 8 - Average CV Loss: 0.202035 (±0.103833)
  🔄 Trial 9, Fold 1/5
    Fold 1 best total loss: 0.012002
  🔄 Trial 9, Fold 2/5
    Fold 2 best total loss: 0.019492
  🔄 Trial 9, Fold 3/5
    Fold 3 best total loss: 0.021686
  🔄 Trial 9, Fold 4/5
    Fold 4 best total loss: 0.204739
  🔄 Trial 9, Fold 5/5


[I 2025-10-10 00:20:00,551] Trial 9 finished with value: 0.05464674348477274 and parameters: {'lstm_hidden_size': 64, 'lstm_n_layers': 2, 'lstm_dropout': 0.2, 'decoder_hidden_size': 48, 'decoder_n_layers': 5, 'decoder_dropout': 0.4, 'current_hidden_size': 64, 'current_n_layers': 5, 'current_dropout': 0.2}. Best is trial 0 with value: 0.019913131697103383.


    Fold 5 best total loss: 0.015313
  📊 Trial 9 - Average CV Loss: 0.054647 (±0.075121)
  🔄 Trial 10, Fold 1/5
    Fold 1 best total loss: 0.018694
  🔄 Trial 10, Fold 2/5
    Fold 2 best total loss: 0.018515
  🔄 Trial 10, Fold 3/5
    Fold 3 best total loss: 0.024906
  🔄 Trial 10, Fold 4/5
    Fold 4 best total loss: 0.018880
  🔄 Trial 10, Fold 5/5


[I 2025-10-10 00:22:22,657] Trial 10 finished with value: 0.019537586160004138 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 2, 'lstm_dropout': 0.4, 'decoder_hidden_size': 64, 'decoder_n_layers': 2, 'decoder_dropout': 0.1, 'current_hidden_size': 72, 'current_n_layers': 2, 'current_dropout': 0.6}. Best is trial 10 with value: 0.019537586160004138.


    Fold 5 best total loss: 0.016692
  📊 Trial 10 - Average CV Loss: 0.019538 (±0.002797)
  🔄 Trial 11, Fold 1/5
    Fold 1 best total loss: 0.016449
  🔄 Trial 11, Fold 2/5
    Fold 2 best total loss: 0.019170
  🔄 Trial 11, Fold 3/5
    Fold 3 best total loss: 0.025008
  🔄 Trial 11, Fold 4/5
    Fold 4 best total loss: 0.020374
  🔄 Trial 11, Fold 5/5


[I 2025-10-10 00:24:42,141] Trial 11 finished with value: 0.020738555886782705 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 2, 'lstm_dropout': 0.4, 'decoder_hidden_size': 64, 'decoder_n_layers': 2, 'decoder_dropout': 0.1, 'current_hidden_size': 72, 'current_n_layers': 2, 'current_dropout': 0.6}. Best is trial 10 with value: 0.019537586160004138.


    Fold 5 best total loss: 0.022692
  📊 Trial 11 - Average CV Loss: 0.020739 (±0.002934)
  🔄 Trial 12, Fold 1/5
    Fold 1 best total loss: 0.016690
  🔄 Trial 12, Fold 2/5
    Fold 2 best total loss: 0.019627
  🔄 Trial 12, Fold 3/5
    Fold 3 best total loss: 0.023440
  🔄 Trial 12, Fold 4/5
    Fold 4 best total loss: 0.017742
  🔄 Trial 12, Fold 5/5


[I 2025-10-10 00:26:58,066] Trial 12 finished with value: 0.020465927687473594 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 2, 'lstm_dropout': 0.4, 'decoder_hidden_size': 64, 'decoder_n_layers': 3, 'decoder_dropout': 0.2, 'current_hidden_size': 72, 'current_n_layers': 3, 'current_dropout': 0.5}. Best is trial 10 with value: 0.019537586160004138.


    Fold 5 best total loss: 0.024831
  📊 Trial 12 - Average CV Loss: 0.020466 (±0.003171)
  🔄 Trial 13, Fold 1/5
    Fold 1 best total loss: 0.017480
  🔄 Trial 13, Fold 2/5
    Fold 2 best total loss: 0.018342
  🔄 Trial 13, Fold 3/5
    Fold 3 best total loss: 0.026779
  🔄 Trial 13, Fold 4/5
    Fold 4 best total loss: 0.033353
  🔄 Trial 13, Fold 5/5


[I 2025-10-10 00:30:19,323] Trial 13 finished with value: 0.024801072967238724 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 3, 'lstm_dropout': 0.5, 'decoder_hidden_size': 96, 'decoder_n_layers': 2, 'decoder_dropout': 0.2, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.5}. Best is trial 10 with value: 0.019537586160004138.


    Fold 5 best total loss: 0.028051
  📊 Trial 13 - Average CV Loss: 0.024801 (±0.006049)
  🔄 Trial 14, Fold 1/5
    Fold 1 best total loss: 0.009000
  🔄 Trial 14, Fold 2/5
    Fold 2 best total loss: 0.013610
  🔄 Trial 14, Fold 3/5
    Fold 3 best total loss: 0.019865
  🔄 Trial 14, Fold 4/5
    Fold 4 best total loss: 0.012016
  🔄 Trial 14, Fold 5/5


[I 2025-10-10 00:32:39,239] Trial 14 finished with value: 0.08012944562360644 and parameters: {'lstm_hidden_size': 16, 'lstm_n_layers': 2, 'lstm_dropout': 0.4, 'decoder_hidden_size': 64, 'decoder_n_layers': 4, 'decoder_dropout': 0.1, 'current_hidden_size': 72, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 10 with value: 0.019537586160004138.


    Fold 5 best total loss: 0.346156
  📊 Trial 14 - Average CV Loss: 0.080129 (±0.133061)
  🔄 Trial 15, Fold 1/5
    Fold 1 best total loss: 0.010357
  🔄 Trial 15, Fold 2/5
    Fold 2 best total loss: 0.013830
  🔄 Trial 15, Fold 3/5
    Fold 3 best total loss: 0.023810
  🔄 Trial 15, Fold 4/5
    Fold 4 best total loss: 0.018630
  🔄 Trial 15, Fold 5/5


[I 2025-10-10 00:36:03,304] Trial 15 finished with value: 0.08245901779737323 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 3, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 64, 'decoder_n_layers': 3, 'decoder_dropout': 0.2, 'current_hidden_size': 72, 'current_n_layers': 4, 'current_dropout': 0.30000000000000004}. Best is trial 10 with value: 0.019537586160004138.


    Fold 5 best total loss: 0.345669
  📊 Trial 15 - Average CV Loss: 0.082459 (±0.131683)
  🔄 Trial 16, Fold 1/5
    Fold 1 best total loss: 0.012032
  🔄 Trial 16, Fold 2/5
    Fold 2 best total loss: 0.016069
  🔄 Trial 16, Fold 3/5
    Fold 3 best total loss: 0.022829
  🔄 Trial 16, Fold 4/5
    Fold 4 best total loss: 0.019519
  🔄 Trial 16, Fold 5/5


[I 2025-10-10 00:40:25,674] Trial 16 finished with value: 0.017243169341236354 and parameters: {'lstm_hidden_size': 64, 'lstm_n_layers': 4, 'lstm_dropout': 0.5, 'decoder_hidden_size': 96, 'decoder_n_layers': 2, 'decoder_dropout': 0.1, 'current_hidden_size': 72, 'current_n_layers': 2, 'current_dropout': 0.5}. Best is trial 16 with value: 0.017243169341236354.


    Fold 5 best total loss: 0.015768
  📊 Trial 16 - Average CV Loss: 0.017243 (±0.003663)
  🔄 Trial 17, Fold 1/5
    Fold 1 best total loss: 0.018633
  🔄 Trial 17, Fold 2/5
    Fold 2 best total loss: 0.026560
  🔄 Trial 17, Fold 3/5
    Fold 3 best total loss: 0.031362
  🔄 Trial 17, Fold 4/5
    Fold 4 best total loss: 0.023000
  🔄 Trial 17, Fold 5/5


[I 2025-10-10 00:44:50,266] Trial 17 finished with value: 0.02409415498841554 and parameters: {'lstm_hidden_size': 64, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 96, 'decoder_n_layers': 4, 'decoder_dropout': 0.1, 'current_hidden_size': 32, 'current_n_layers': 2, 'current_dropout': 0.5}. Best is trial 16 with value: 0.017243169341236354.


    Fold 5 best total loss: 0.020916
  📊 Trial 17 - Average CV Loss: 0.024094 (±0.004472)
  🔄 Trial 18, Fold 1/5
    Fold 1 best total loss: 0.029636
  🔄 Trial 18, Fold 2/5
    Fold 2 best total loss: 0.023470
  🔄 Trial 18, Fold 3/5
    Fold 3 best total loss: 0.031140
  🔄 Trial 18, Fold 4/5
    Fold 4 best total loss: 0.024700
  🔄 Trial 18, Fold 5/5


[I 2025-10-10 00:50:13,341] Trial 18 finished with value: 0.027769148349761963 and parameters: {'lstm_hidden_size': 64, 'lstm_n_layers': 5, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 96, 'decoder_n_layers': 2, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 48, 'current_n_layers': 2, 'current_dropout': 0.6}. Best is trial 16 with value: 0.017243169341236354.


    Fold 5 best total loss: 0.029900
  📊 Trial 18 - Average CV Loss: 0.027769 (±0.003075)
  🔄 Trial 19, Fold 1/5
    Fold 1 best total loss: 0.020165
  🔄 Trial 19, Fold 2/5
    Fold 2 best total loss: 0.031549
  🔄 Trial 19, Fold 3/5
    Fold 3 best total loss: 0.033971
  🔄 Trial 19, Fold 4/5
    Fold 4 best total loss: 0.028661
  🔄 Trial 19, Fold 5/5


[I 2025-10-10 00:54:41,379] Trial 19 finished with value: 0.031031481688842177 and parameters: {'lstm_hidden_size': 64, 'lstm_n_layers': 4, 'lstm_dropout': 0.5, 'decoder_hidden_size': 72, 'decoder_n_layers': 4, 'decoder_dropout': 0.1, 'current_hidden_size': 72, 'current_n_layers': 4, 'current_dropout': 0.5}. Best is trial 16 with value: 0.017243169341236354.


    Fold 5 best total loss: 0.040812
  📊 Trial 19 - Average CV Loss: 0.031031 (±0.006757)
  🔄 Trial 20, Fold 1/5
    Fold 1 best total loss: 0.021819
  🔄 Trial 20, Fold 2/5
    Fold 2 best total loss: 0.021854
  🔄 Trial 20, Fold 3/5
    Fold 3 best total loss: 0.029784
  🔄 Trial 20, Fold 4/5
    Fold 4 best total loss: 0.021453
  🔄 Trial 20, Fold 5/5


[I 2025-10-10 01:00:06,401] Trial 20 finished with value: 0.02473641000688076 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 5, 'lstm_dropout': 0.4, 'decoder_hidden_size': 32, 'decoder_n_layers': 3, 'decoder_dropout': 0.6, 'current_hidden_size': 72, 'current_n_layers': 2, 'current_dropout': 0.6}. Best is trial 16 with value: 0.017243169341236354.


    Fold 5 best total loss: 0.028772
  📊 Trial 20 - Average CV Loss: 0.024736 (±0.003725)
  🔄 Trial 21, Fold 1/5
    Fold 1 best total loss: 0.017611
  🔄 Trial 21, Fold 2/5
    Fold 2 best total loss: 0.016065
  🔄 Trial 21, Fold 3/5
    Fold 3 best total loss: 0.025706
  🔄 Trial 21, Fold 4/5
    Fold 4 best total loss: 0.018250
  🔄 Trial 21, Fold 5/5


[I 2025-10-10 01:02:31,117] Trial 21 finished with value: 0.020187967782840134 and parameters: {'lstm_hidden_size': 32, 'lstm_n_layers': 2, 'lstm_dropout': 0.5, 'decoder_hidden_size': 96, 'decoder_n_layers': 2, 'decoder_dropout': 0.2, 'current_hidden_size': 72, 'current_n_layers': 3, 'current_dropout': 0.4}. Best is trial 16 with value: 0.017243169341236354.


    Fold 5 best total loss: 0.023308
  📊 Trial 21 - Average CV Loss: 0.020188 (±0.003676)
  🔄 Trial 22, Fold 1/5
    Fold 1 best total loss: 0.011784
  🔄 Trial 22, Fold 2/5
    Fold 2 best total loss: 0.020787
  🔄 Trial 22, Fold 3/5
    Fold 3 best total loss: 0.022419
  🔄 Trial 22, Fold 4/5
    Fold 4 best total loss: 0.016675
  🔄 Trial 22, Fold 5/5


[I 2025-10-10 01:05:54,742] Trial 22 finished with value: 0.022849538549780845 and parameters: {'lstm_hidden_size': 32, 'lstm_n_layers': 3, 'lstm_dropout': 0.5, 'decoder_hidden_size': 64, 'decoder_n_layers': 2, 'decoder_dropout': 0.1, 'current_hidden_size': 72, 'current_n_layers': 3, 'current_dropout': 0.30000000000000004}. Best is trial 16 with value: 0.017243169341236354.


    Fold 5 best total loss: 0.042582
  📊 Trial 22 - Average CV Loss: 0.022850 (±0.010530)
  🔄 Trial 23, Fold 1/5
    Fold 1 best total loss: 0.013130
  🔄 Trial 23, Fold 2/5
    Fold 2 best total loss: 0.016396
  🔄 Trial 23, Fold 3/5
    Fold 3 best total loss: 0.022979
  🔄 Trial 23, Fold 4/5
    Fold 4 best total loss: 0.017787
  🔄 Trial 23, Fold 5/5


[I 2025-10-10 01:10:27,132] Trial 23 finished with value: 0.017505783098749818 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.5, 'decoder_hidden_size': 64, 'decoder_n_layers': 2, 'decoder_dropout': 0.2, 'current_hidden_size': 72, 'current_n_layers': 2, 'current_dropout': 0.5}. Best is trial 16 with value: 0.017243169341236354.


    Fold 5 best total loss: 0.017237
  📊 Trial 23 - Average CV Loss: 0.017506 (±0.003178)
  🔄 Trial 24, Fold 1/5
    Fold 1 best total loss: 0.011415
  🔄 Trial 24, Fold 2/5
    Fold 2 best total loss: 0.015918
  🔄 Trial 24, Fold 3/5
    Fold 3 best total loss: 0.023108
  🔄 Trial 24, Fold 4/5
    Fold 4 best total loss: 0.019502
  🔄 Trial 24, Fold 5/5


[I 2025-10-10 01:14:56,032] Trial 24 finished with value: 0.01806987024610862 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 96, 'decoder_n_layers': 3, 'decoder_dropout': 0.1, 'current_hidden_size': 72, 'current_n_layers': 2, 'current_dropout': 0.5}. Best is trial 16 with value: 0.017243169341236354.


    Fold 5 best total loss: 0.020408
  📊 Trial 24 - Average CV Loss: 0.018070 (±0.004045)
  🔄 Trial 25, Fold 1/5
    Fold 1 best total loss: 0.011830
  🔄 Trial 25, Fold 2/5
    Fold 2 best total loss: 0.014612
  🔄 Trial 25, Fold 3/5
    Fold 3 best total loss: 0.022620
  🔄 Trial 25, Fold 4/5
    Fold 4 best total loss: 0.035343
  🔄 Trial 25, Fold 5/5


[I 2025-10-10 01:19:20,729] Trial 25 finished with value: 0.02005293101537973 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.5, 'decoder_hidden_size': 96, 'decoder_n_layers': 3, 'decoder_dropout': 0.2, 'current_hidden_size': 72, 'current_n_layers': 2, 'current_dropout': 0.5}. Best is trial 16 with value: 0.017243169341236354.


    Fold 5 best total loss: 0.015859
  📊 Trial 25 - Average CV Loss: 0.020053 (±0.008428)
  🔄 Trial 26, Fold 1/5
    Fold 1 best total loss: 0.009200
  🔄 Trial 26, Fold 2/5
    Fold 2 best total loss: 0.014445
  🔄 Trial 26, Fold 3/5
    Fold 3 best total loss: 0.019418
  🔄 Trial 26, Fold 4/5
    Fold 4 best total loss: 0.015119
  🔄 Trial 26, Fold 5/5


[I 2025-10-10 01:23:47,366] Trial 26 finished with value: 0.014126383780967445 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.4, 'decoder_hidden_size': 96, 'decoder_n_layers': 3, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 96, 'current_n_layers': 2, 'current_dropout': 0.4}. Best is trial 26 with value: 0.014126383780967445.


    Fold 5 best total loss: 0.012450
  📊 Trial 26 - Average CV Loss: 0.014126 (±0.003352)
  🔄 Trial 27, Fold 1/5
    Fold 1 best total loss: 0.032622
  🔄 Trial 27, Fold 2/5
    Fold 2 best total loss: 0.036387
  🔄 Trial 27, Fold 3/5
    Fold 3 best total loss: 0.047130
  🔄 Trial 27, Fold 4/5
    Fold 4 best total loss: 0.199643
  🔄 Trial 27, Fold 5/5


[I 2025-10-10 01:28:13,523] Trial 27 finished with value: 0.06963636595755815 and parameters: {'lstm_hidden_size': 64, 'lstm_n_layers': 4, 'lstm_dropout': 0.5, 'decoder_hidden_size': 96, 'decoder_n_layers': 4, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 96, 'current_n_layers': 6, 'current_dropout': 0.4}. Best is trial 26 with value: 0.014126383780967445.


    Fold 5 best total loss: 0.032400
  📊 Trial 27 - Average CV Loss: 0.069636 (±0.065223)
  🔄 Trial 28, Fold 1/5
    Fold 1 best total loss: 0.011420
  🔄 Trial 28, Fold 2/5
    Fold 2 best total loss: 0.016776
  🔄 Trial 28, Fold 3/5
    Fold 3 best total loss: 0.021376
  🔄 Trial 28, Fold 4/5
    Fold 4 best total loss: 0.018085
  🔄 Trial 28, Fold 5/5


[I 2025-10-10 01:33:41,665] Trial 28 finished with value: 0.016421660175547005 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 5, 'lstm_dropout': 0.4, 'decoder_hidden_size': 96, 'decoder_n_layers': 3, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.4}. Best is trial 26 with value: 0.014126383780967445.


    Fold 5 best total loss: 0.014451
  📊 Trial 28 - Average CV Loss: 0.016422 (±0.003357)
  🔄 Trial 29, Fold 1/5
    Fold 1 best total loss: 0.021949
  🔄 Trial 29, Fold 2/5
    Fold 2 best total loss: 0.032598
  🔄 Trial 29, Fold 3/5
    Fold 3 best total loss: 0.223520
  🔄 Trial 29, Fold 4/5
    Fold 4 best total loss: 0.023905
  🔄 Trial 29, Fold 5/5


[I 2025-10-10 01:39:12,180] Trial 29 finished with value: 0.06695467648096383 and parameters: {'lstm_hidden_size': 16, 'lstm_n_layers': 5, 'lstm_dropout': 0.2, 'decoder_hidden_size': 96, 'decoder_n_layers': 3, 'decoder_dropout': 0.5, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.4}. Best is trial 26 with value: 0.014126383780967445.


    Fold 5 best total loss: 0.032801
  📊 Trial 29 - Average CV Loss: 0.066955 (±0.078407)
  🔄 Trial 30, Fold 1/5
    Fold 1 best total loss: 0.009420
  🔄 Trial 30, Fold 2/5
    Fold 2 best total loss: 0.015008
  🔄 Trial 30, Fold 3/5
    Fold 3 best total loss: 0.020275
  🔄 Trial 30, Fold 4/5
    Fold 4 best total loss: 0.014840
  🔄 Trial 30, Fold 5/5


[I 2025-10-10 01:44:42,479] Trial 30 finished with value: 0.015068040089681744 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 5, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 96, 'decoder_n_layers': 3, 'decoder_dropout': 0.5, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.30000000000000004}. Best is trial 26 with value: 0.014126383780967445.


    Fold 5 best total loss: 0.015797
  📊 Trial 30 - Average CV Loss: 0.015068 (±0.003452)
  🔄 Trial 31, Fold 1/5
    Fold 1 best total loss: 0.011221
  🔄 Trial 31, Fold 2/5
    Fold 2 best total loss: 0.011364
  🔄 Trial 31, Fold 3/5
    Fold 3 best total loss: 0.019157
  🔄 Trial 31, Fold 4/5
    Fold 4 best total loss: 0.013873
  🔄 Trial 31, Fold 5/5


[I 2025-10-10 01:50:16,979] Trial 31 finished with value: 0.013910780136939138 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 5, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 96, 'decoder_n_layers': 3, 'decoder_dropout': 0.5, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.30000000000000004}. Best is trial 31 with value: 0.013910780136939138.


    Fold 5 best total loss: 0.013939
  📊 Trial 31 - Average CV Loss: 0.013911 (±0.002872)
  🔄 Trial 32, Fold 1/5
    Fold 1 best total loss: 0.010313
  🔄 Trial 32, Fold 2/5
    Fold 2 best total loss: 0.014313
  🔄 Trial 32, Fold 3/5
    Fold 3 best total loss: 0.021718
  🔄 Trial 32, Fold 4/5
    Fold 4 best total loss: 0.013488
  🔄 Trial 32, Fold 5/5


[I 2025-10-10 01:55:44,328] Trial 32 finished with value: 0.014836173190269619 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 5, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 96, 'decoder_n_layers': 4, 'decoder_dropout': 0.5, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.30000000000000004}. Best is trial 31 with value: 0.013910780136939138.


    Fold 5 best total loss: 0.014349
  📊 Trial 32 - Average CV Loss: 0.014836 (±0.003745)
  🔄 Trial 33, Fold 1/5
    Fold 1 best total loss: 0.315966
  🔄 Trial 33, Fold 2/5
    Fold 2 best total loss: 0.016699
  🔄 Trial 33, Fold 3/5
    Fold 3 best total loss: 0.021844
  🔄 Trial 33, Fold 4/5
    Fold 4 best total loss: 0.027650
  🔄 Trial 33, Fold 5/5


[I 2025-10-10 02:01:30,804] Trial 33 finished with value: 0.08495545428013429 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 5, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 96, 'decoder_n_layers': 4, 'decoder_dropout': 0.5, 'current_hidden_size': 96, 'current_n_layers': 4, 'current_dropout': 0.30000000000000004}. Best is trial 31 with value: 0.013910780136939138.


    Fold 5 best total loss: 0.042619
  📊 Trial 33 - Average CV Loss: 0.084955 (±0.115831)
  🔄 Trial 34, Fold 1/5
    Fold 1 best total loss: 0.014936
  🔄 Trial 34, Fold 2/5
    Fold 2 best total loss: 0.014545
  🔄 Trial 34, Fold 3/5
    Fold 3 best total loss: 0.225517
  🔄 Trial 34, Fold 4/5
    Fold 4 best total loss: 0.012014
  🔄 Trial 34, Fold 5/5


[I 2025-10-10 02:09:26,345] Trial 34 finished with value: 0.0563773411209695 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 6, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 72, 'decoder_n_layers': 5, 'decoder_dropout': 0.5, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.30000000000000004}. Best is trial 31 with value: 0.013910780136939138.


    Fold 5 best total loss: 0.014874
  📊 Trial 34 - Average CV Loss: 0.056377 (±0.084577)
  🔄 Trial 35, Fold 1/5
    Fold 1 best total loss: 0.011426
  🔄 Trial 35, Fold 2/5
    Fold 2 best total loss: 0.019440
  🔄 Trial 35, Fold 3/5
    Fold 3 best total loss: 0.023739
  🔄 Trial 35, Fold 4/5
    Fold 4 best total loss: 0.199914
  🔄 Trial 35, Fold 5/5


[I 2025-10-10 02:15:58,474] Trial 35 finished with value: 0.05381861531641334 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 5, 'lstm_dropout': 0.2, 'decoder_hidden_size': 16, 'decoder_n_layers': 4, 'decoder_dropout': 0.6, 'current_hidden_size': 96, 'current_n_layers': 4, 'current_dropout': 0.30000000000000004}. Best is trial 31 with value: 0.013910780136939138.


    Fold 5 best total loss: 0.014574
  📊 Trial 35 - Average CV Loss: 0.053819 (±0.073168)
  🔄 Trial 36, Fold 1/5
    Fold 1 best total loss: 0.010180
  🔄 Trial 36, Fold 2/5
    Fold 2 best total loss: 0.014628
  🔄 Trial 36, Fold 3/5
    Fold 3 best total loss: 0.019684
  🔄 Trial 36, Fold 4/5
    Fold 4 best total loss: 0.019065
  🔄 Trial 36, Fold 5/5


[I 2025-10-10 02:23:53,761] Trial 36 finished with value: 0.015376304415985942 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 6, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 96, 'decoder_n_layers': 3, 'decoder_dropout': 0.4, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.30000000000000004}. Best is trial 31 with value: 0.013910780136939138.


    Fold 5 best total loss: 0.013325
  📊 Trial 36 - Average CV Loss: 0.015376 (±0.003576)
  🔄 Trial 37, Fold 1/5
    Fold 1 best total loss: 0.008755
  🔄 Trial 37, Fold 2/5
    Fold 2 best total loss: 0.013089
  🔄 Trial 37, Fold 3/5
    Fold 3 best total loss: 0.018160
  🔄 Trial 37, Fold 4/5
    Fold 4 best total loss: 0.201445
  🔄 Trial 37, Fold 5/5


[I 2025-10-10 02:30:37,531] Trial 37 finished with value: 0.11773528123740107 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 5, 'lstm_dropout': 0.2, 'decoder_hidden_size': 32, 'decoder_n_layers': 4, 'decoder_dropout': 0.5, 'current_hidden_size': 96, 'current_n_layers': 4, 'current_dropout': 0.1}. Best is trial 31 with value: 0.013910780136939138.


    Fold 5 best total loss: 0.347226
  📊 Trial 37 - Average CV Loss: 0.117735 (±0.135953)
  🔄 Trial 38, Fold 1/5
    Fold 1 best total loss: 0.018483
  🔄 Trial 38, Fold 2/5
    Fold 2 best total loss: 0.023589
  🔄 Trial 38, Fold 3/5
    Fold 3 best total loss: 0.023850
  🔄 Trial 38, Fold 4/5
    Fold 4 best total loss: 0.202960
  🔄 Trial 38, Fold 5/5


[I 2025-10-10 02:38:35,731] Trial 38 finished with value: 0.05713206280488521 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 6, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 96, 'decoder_n_layers': 5, 'decoder_dropout': 0.4, 'current_hidden_size': 16, 'current_n_layers': 3, 'current_dropout': 0.2}. Best is trial 31 with value: 0.013910780136939138.


    Fold 5 best total loss: 0.016778
  📊 Trial 38 - Average CV Loss: 0.057132 (±0.072967)
  🔄 Trial 39, Fold 1/5
    Fold 1 best total loss: 0.014397
  🔄 Trial 39, Fold 2/5
    Fold 2 best total loss: 0.021087
  🔄 Trial 39, Fold 3/5
    Fold 3 best total loss: 0.021632
  🔄 Trial 39, Fold 4/5
    Fold 4 best total loss: 0.018613
  🔄 Trial 39, Fold 5/5


[I 2025-10-10 02:45:16,967] Trial 39 finished with value: 0.01850024755112827 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 5, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 16, 'decoder_n_layers': 3, 'decoder_dropout': 0.5, 'current_hidden_size': 96, 'current_n_layers': 4, 'current_dropout': 0.4}. Best is trial 31 with value: 0.013910780136939138.


    Fold 5 best total loss: 0.016773
  📊 Trial 39 - Average CV Loss: 0.018500 (±0.002696)
  🔄 Trial 40, Fold 1/5
    Fold 1 best total loss: 0.012637
  🔄 Trial 40, Fold 2/5
    Fold 2 best total loss: 0.016681
  🔄 Trial 40, Fold 3/5
    Fold 3 best total loss: 0.019485
  🔄 Trial 40, Fold 4/5
    Fold 4 best total loss: 0.017545
  🔄 Trial 40, Fold 5/5


[I 2025-10-10 02:51:52,740] Trial 40 finished with value: 0.016370690334588288 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 5, 'lstm_dropout': 0.2, 'decoder_hidden_size': 96, 'decoder_n_layers': 3, 'decoder_dropout': 0.6, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.30000000000000004}. Best is trial 31 with value: 0.013910780136939138.


    Fold 5 best total loss: 0.015505
  📊 Trial 40 - Average CV Loss: 0.016371 (±0.002274)
  🔄 Trial 41, Fold 1/5
    Fold 1 best total loss: 0.010704
  🔄 Trial 41, Fold 2/5
    Fold 2 best total loss: 0.016223
  🔄 Trial 41, Fold 3/5
    Fold 3 best total loss: 0.223530
  🔄 Trial 41, Fold 4/5
    Fold 4 best total loss: 0.017667
  🔄 Trial 41, Fold 5/5


[I 2025-10-10 03:00:03,825] Trial 41 finished with value: 0.05684002712368965 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 6, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 96, 'decoder_n_layers': 3, 'decoder_dropout': 0.4, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.30000000000000004}. Best is trial 31 with value: 0.013910780136939138.


    Fold 5 best total loss: 0.016076
  📊 Trial 41 - Average CV Loss: 0.056840 (±0.083379)
  🔄 Trial 42, Fold 1/5
    Fold 1 best total loss: 0.011121
  🔄 Trial 42, Fold 2/5
    Fold 2 best total loss: 0.017351
  🔄 Trial 42, Fold 3/5
    Fold 3 best total loss: 0.021814
  🔄 Trial 42, Fold 4/5
    Fold 4 best total loss: 0.021553
  🔄 Trial 42, Fold 5/5


[I 2025-10-10 03:06:53,115] Trial 42 finished with value: 0.017006628471426664 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 6, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 96, 'decoder_n_layers': 3, 'decoder_dropout': 0.5, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.30000000000000004}. Best is trial 31 with value: 0.013910780136939138.


    Fold 5 best total loss: 0.013194
  📊 Trial 42 - Average CV Loss: 0.017007 (±0.004314)
  🔄 Trial 43, Fold 1/5
    Fold 1 best total loss: 0.009085
  🔄 Trial 43, Fold 2/5
    Fold 2 best total loss: 0.013254
  🔄 Trial 43, Fold 3/5
    Fold 3 best total loss: 0.017606
  🔄 Trial 43, Fold 4/5
    Fold 4 best total loss: 0.201457
  🔄 Trial 43, Fold 5/5


[I 2025-10-10 03:13:44,182] Trial 43 finished with value: 0.050695474608801305 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 6, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 48, 'decoder_n_layers': 4, 'decoder_dropout': 0.4, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.2}. Best is trial 31 with value: 0.013910780136939138.


    Fold 5 best total loss: 0.012076
  📊 Trial 43 - Average CV Loss: 0.050695 (±0.075430)
  🔄 Trial 44, Fold 1/5
    Fold 1 best total loss: 0.013225
  🔄 Trial 44, Fold 2/5
    Fold 2 best total loss: 0.021165
  🔄 Trial 44, Fold 3/5
    Fold 3 best total loss: 0.228376
  🔄 Trial 44, Fold 4/5
    Fold 4 best total loss: 0.019834
  🔄 Trial 44, Fold 5/5


[I 2025-10-10 03:20:16,407] Trial 44 finished with value: 0.060921439458616077 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 6, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 96, 'decoder_n_layers': 3, 'decoder_dropout': 0.4, 'current_hidden_size': 48, 'current_n_layers': 3, 'current_dropout': 0.4}. Best is trial 31 with value: 0.013910780136939138.


    Fold 5 best total loss: 0.022008
  📊 Trial 44 - Average CV Loss: 0.060921 (±0.083784)
  🔄 Trial 45, Fold 1/5
    Fold 1 best total loss: 0.021338
  🔄 Trial 45, Fold 2/5
    Fold 2 best total loss: 0.024638
  🔄 Trial 45, Fold 3/5
    Fold 3 best total loss: 0.025472
  🔄 Trial 45, Fold 4/5
    Fold 4 best total loss: 0.026242
  🔄 Trial 45, Fold 5/5


[I 2025-10-10 03:24:53,552] Trial 45 finished with value: 0.02508127922192216 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.1, 'decoder_hidden_size': 72, 'decoder_n_layers': 5, 'decoder_dropout': 0.5, 'current_hidden_size': 16, 'current_n_layers': 4, 'current_dropout': 0.2}. Best is trial 31 with value: 0.013910780136939138.


    Fold 5 best total loss: 0.027715
  📊 Trial 45 - Average CV Loss: 0.025081 (±0.002128)
  🔄 Trial 46, Fold 1/5
    Fold 1 best total loss: 0.009669
  🔄 Trial 46, Fold 2/5
    Fold 2 best total loss: 0.014242
  🔄 Trial 46, Fold 3/5
    Fold 3 best total loss: 0.020575
  🔄 Trial 46, Fold 4/5
    Fold 4 best total loss: 0.013750
  🔄 Trial 46, Fold 5/5


[I 2025-10-10 03:30:28,858] Trial 46 finished with value: 0.014080356736667455 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 5, 'lstm_dropout': 0.2, 'decoder_hidden_size': 96, 'decoder_n_layers': 3, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.30000000000000004}. Best is trial 31 with value: 0.013910780136939138.


    Fold 5 best total loss: 0.012167
  📊 Trial 46 - Average CV Loss: 0.014080 (±0.003618)
  🔄 Trial 47, Fold 1/5
    Fold 1 best total loss: 0.018116
  🔄 Trial 47, Fold 2/5
    Fold 2 best total loss: 0.024858
  🔄 Trial 47, Fold 3/5
    Fold 3 best total loss: 0.028514
  🔄 Trial 47, Fold 4/5
    Fold 4 best total loss: 0.018629
  🔄 Trial 47, Fold 5/5


[I 2025-10-10 03:35:55,129] Trial 47 finished with value: 0.023989258101209998 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 5, 'lstm_dropout': 0.1, 'decoder_hidden_size': 32, 'decoder_n_layers': 4, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 64, 'current_n_layers': 5, 'current_dropout': 0.30000000000000004}. Best is trial 31 with value: 0.013910780136939138.


    Fold 5 best total loss: 0.029829
  📊 Trial 47 - Average CV Loss: 0.023989 (±0.004870)
  🔄 Trial 48, Fold 1/5
    Fold 1 best total loss: 0.011203
  🔄 Trial 48, Fold 2/5
    Fold 2 best total loss: 0.319646
  🔄 Trial 48, Fold 3/5
    Fold 3 best total loss: 0.022018
  🔄 Trial 48, Fold 4/5
    Fold 4 best total loss: 0.022524
  🔄 Trial 48, Fold 5/5


[I 2025-10-10 03:41:34,750] Trial 48 finished with value: 0.07861798752564937 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 5, 'lstm_dropout': 0.2, 'decoder_hidden_size': 96, 'decoder_n_layers': 3, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 32, 'current_n_layers': 4, 'current_dropout': 0.2}. Best is trial 31 with value: 0.013910780136939138.


    Fold 5 best total loss: 0.017699
  📊 Trial 48 - Average CV Loss: 0.078618 (±0.120582)
  🔄 Trial 49, Fold 1/5
    Fold 1 best total loss: 0.022825
  🔄 Trial 49, Fold 2/5
    Fold 2 best total loss: 0.026256
  🔄 Trial 49, Fold 3/5
    Fold 3 best total loss: 0.024870
  🔄 Trial 49, Fold 4/5
    Fold 4 best total loss: 0.025004
  🔄 Trial 49, Fold 5/5


[I 2025-10-10 03:46:57,250] Trial 49 finished with value: 0.027244634088128807 and parameters: {'lstm_hidden_size': 16, 'lstm_n_layers': 5, 'lstm_dropout': 0.2, 'decoder_hidden_size': 48, 'decoder_n_layers': 4, 'decoder_dropout': 0.6, 'current_hidden_size': 96, 'current_n_layers': 2, 'current_dropout': 0.4}. Best is trial 31 with value: 0.013910780136939138.


    Fold 5 best total loss: 0.037268
  📊 Trial 49 - Average CV Loss: 0.027245 (±0.005131)
  🔄 Trial 50, Fold 1/5
    Fold 1 best total loss: 0.010953
  🔄 Trial 50, Fold 2/5
    Fold 2 best total loss: 0.013254
  🔄 Trial 50, Fold 3/5
    Fold 3 best total loss: 0.019987
  🔄 Trial 50, Fold 4/5
    Fold 4 best total loss: 0.012239
  🔄 Trial 50, Fold 5/5


[I 2025-10-10 03:51:33,444] Trial 50 finished with value: 0.014043116068933159 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 16, 'decoder_n_layers': 3, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.30000000000000004}. Best is trial 31 with value: 0.013910780136939138.


    Fold 5 best total loss: 0.013783
  📊 Trial 50 - Average CV Loss: 0.014043 (±0.003125)
  🔄 Trial 51, Fold 1/5
    Fold 1 best total loss: 0.010517
  🔄 Trial 51, Fold 2/5
    Fold 2 best total loss: 0.014496
  🔄 Trial 51, Fold 3/5
    Fold 3 best total loss: 0.021183
  🔄 Trial 51, Fold 4/5
    Fold 4 best total loss: 0.014180
  🔄 Trial 51, Fold 5/5


[I 2025-10-10 03:56:00,377] Trial 51 finished with value: 0.015348013932816684 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 16, 'decoder_n_layers': 3, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.30000000000000004}. Best is trial 31 with value: 0.013910780136939138.


    Fold 5 best total loss: 0.016364
  📊 Trial 51 - Average CV Loss: 0.015348 (±0.003479)
  🔄 Trial 52, Fold 1/5
    Fold 1 best total loss: 0.010516
  🔄 Trial 52, Fold 2/5
    Fold 2 best total loss: 0.013201
  🔄 Trial 52, Fold 3/5
    Fold 3 best total loss: 0.018938
  🔄 Trial 52, Fold 4/5
    Fold 4 best total loss: 0.014652
  🔄 Trial 52, Fold 5/5


[I 2025-10-10 04:00:34,570] Trial 52 finished with value: 0.014318390423431993 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 16, 'decoder_n_layers': 3, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.30000000000000004}. Best is trial 31 with value: 0.013910780136939138.


    Fold 5 best total loss: 0.014285
  📊 Trial 52 - Average CV Loss: 0.014318 (±0.002726)
  🔄 Trial 53, Fold 1/5
    Fold 1 best total loss: 0.010514
  🔄 Trial 53, Fold 2/5
    Fold 2 best total loss: 0.018267
  🔄 Trial 53, Fold 3/5
    Fold 3 best total loss: 0.022729
  🔄 Trial 53, Fold 4/5
    Fold 4 best total loss: 0.015839
  🔄 Trial 53, Fold 5/5


[I 2025-10-10 04:04:00,031] Trial 53 finished with value: 0.01610900752712041 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 16, 'decoder_n_layers': 6, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 96, 'current_n_layers': 2, 'current_dropout': 0.4}. Best is trial 31 with value: 0.013910780136939138.


    Fold 5 best total loss: 0.013197
  📊 Trial 53 - Average CV Loss: 0.016109 (±0.004203)
  🔄 Trial 54, Fold 1/5
    Fold 1 best total loss: 0.011886
  🔄 Trial 54, Fold 2/5
    Fold 2 best total loss: 0.321010
  🔄 Trial 54, Fold 3/5
    Fold 3 best total loss: 0.022710
  🔄 Trial 54, Fold 4/5
    Fold 4 best total loss: 0.015988
  🔄 Trial 54, Fold 5/5


[I 2025-10-10 04:08:38,192] Trial 54 finished with value: 0.14339394122362137 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 16, 'decoder_n_layers': 3, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 96, 'current_n_layers': 5, 'current_dropout': 0.30000000000000004}. Best is trial 31 with value: 0.013910780136939138.


    Fold 5 best total loss: 0.345377
  📊 Trial 54 - Average CV Loss: 0.143394 (±0.155200)
  🔄 Trial 55, Fold 1/5
    Fold 1 best total loss: 0.012201
  🔄 Trial 55, Fold 2/5
    Fold 2 best total loss: 0.021629
  🔄 Trial 55, Fold 3/5
    Fold 3 best total loss: 0.225955
  🔄 Trial 55, Fold 4/5
    Fold 4 best total loss: 0.199733
  🔄 Trial 55, Fold 5/5


[I 2025-10-10 04:11:57,379] Trial 55 finished with value: 0.09481996302492916 and parameters: {'lstm_hidden_size': 32, 'lstm_n_layers': 3, 'lstm_dropout': 0.2, 'decoder_hidden_size': 16, 'decoder_n_layers': 2, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 64, 'current_n_layers': 3, 'current_dropout': 0.2}. Best is trial 31 with value: 0.013910780136939138.


    Fold 5 best total loss: 0.014582
  📊 Trial 55 - Average CV Loss: 0.094820 (±0.096772)
  🔄 Trial 56, Fold 1/5
    Fold 1 best total loss: 0.014174
  🔄 Trial 56, Fold 2/5
    Fold 2 best total loss: 0.018715
  🔄 Trial 56, Fold 3/5
    Fold 3 best total loss: 0.023076
  🔄 Trial 56, Fold 4/5
    Fold 4 best total loss: 0.022403
  🔄 Trial 56, Fold 5/5


[I 2025-10-10 04:16:22,692] Trial 56 finished with value: 0.019466557702980936 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.1, 'decoder_hidden_size': 16, 'decoder_n_layers': 3, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 48, 'current_n_layers': 2, 'current_dropout': 0.4}. Best is trial 31 with value: 0.013910780136939138.


    Fold 5 best total loss: 0.018965
  📊 Trial 56 - Average CV Loss: 0.019467 (±0.003177)
  🔄 Trial 57, Fold 1/5
    Fold 1 best total loss: 0.316117
  🔄 Trial 57, Fold 2/5
    Fold 2 best total loss: 0.021003
  🔄 Trial 57, Fold 3/5
    Fold 3 best total loss: 0.025056
  🔄 Trial 57, Fold 4/5
    Fold 4 best total loss: 0.020436
  🔄 Trial 57, Fold 5/5


[I 2025-10-10 04:20:54,368] Trial 57 finished with value: 0.08205506387166679 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 16, 'decoder_n_layers': 2, 'decoder_dropout': 0.2, 'current_hidden_size': 32, 'current_n_layers': 3, 'current_dropout': 0.30000000000000004}. Best is trial 31 with value: 0.013910780136939138.


    Fold 5 best total loss: 0.027664
  📊 Trial 57 - Average CV Loss: 0.082055 (±0.117061)
  🔄 Trial 58, Fold 1/5
    Fold 1 best total loss: 0.009708
  🔄 Trial 58, Fold 2/5
    Fold 2 best total loss: 0.014125
  🔄 Trial 58, Fold 3/5
    Fold 3 best total loss: 0.019867
  🔄 Trial 58, Fold 4/5
    Fold 4 best total loss: 0.013534
  🔄 Trial 58, Fold 5/5


[I 2025-10-10 04:25:29,661] Trial 58 finished with value: 0.014896432845853269 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 16, 'decoder_n_layers': 5, 'decoder_dropout': 0.4, 'current_hidden_size': 96, 'current_n_layers': 4, 'current_dropout': 0.2}. Best is trial 31 with value: 0.013910780136939138.


    Fold 5 best total loss: 0.017248
  📊 Trial 58 - Average CV Loss: 0.014896 (±0.003453)
  🔄 Trial 59, Fold 1/5
    Fold 1 best total loss: 0.024980
  🔄 Trial 59, Fold 2/5
    Fold 2 best total loss: 0.026252
  🔄 Trial 59, Fold 3/5
    Fold 3 best total loss: 0.031841
  🔄 Trial 59, Fold 4/5
    Fold 4 best total loss: 0.030966
  🔄 Trial 59, Fold 5/5


[I 2025-10-10 04:28:55,624] Trial 59 finished with value: 0.028123172605410217 and parameters: {'lstm_hidden_size': 16, 'lstm_n_layers': 3, 'lstm_dropout': 0.4, 'decoder_hidden_size': 72, 'decoder_n_layers': 3, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 16, 'current_n_layers': 2, 'current_dropout': 0.30000000000000004}. Best is trial 31 with value: 0.013910780136939138.


    Fold 5 best total loss: 0.026576
  📊 Trial 59 - Average CV Loss: 0.028123 (±0.002745)
  🔄 Trial 60, Fold 1/5
    Fold 1 best total loss: 0.013021
  🔄 Trial 60, Fold 2/5
    Fold 2 best total loss: 0.020317
  🔄 Trial 60, Fold 3/5
    Fold 3 best total loss: 0.022765
  🔄 Trial 60, Fold 4/5
    Fold 4 best total loss: 0.019705
  🔄 Trial 60, Fold 5/5


[I 2025-10-10 04:33:24,526] Trial 60 finished with value: 0.019061443046666682 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.1, 'decoder_hidden_size': 16, 'decoder_n_layers': 4, 'decoder_dropout': 0.2, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.4}. Best is trial 31 with value: 0.013910780136939138.


    Fold 5 best total loss: 0.019499
  📊 Trial 60 - Average CV Loss: 0.019061 (±0.003237)
  🔄 Trial 61, Fold 1/5
    Fold 1 best total loss: 0.009622
  🔄 Trial 61, Fold 2/5
    Fold 2 best total loss: 0.017654
  🔄 Trial 61, Fold 3/5
    Fold 3 best total loss: 0.224307
  🔄 Trial 61, Fold 4/5
    Fold 4 best total loss: 0.013073
  🔄 Trial 61, Fold 5/5


[I 2025-10-10 04:37:57,968] Trial 61 finished with value: 0.055371083074714986 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 16, 'decoder_n_layers': 5, 'decoder_dropout': 0.4, 'current_hidden_size': 96, 'current_n_layers': 4, 'current_dropout': 0.2}. Best is trial 31 with value: 0.013910780136939138.


    Fold 5 best total loss: 0.012199
  📊 Trial 61 - Average CV Loss: 0.055371 (±0.084508)
  🔄 Trial 62, Fold 1/5
    Fold 1 best total loss: 0.012489
  🔄 Trial 62, Fold 2/5
    Fold 2 best total loss: 0.319426
  🔄 Trial 62, Fold 3/5
    Fold 3 best total loss: 0.019254
  🔄 Trial 62, Fold 4/5
    Fold 4 best total loss: 0.011830
  🔄 Trial 62, Fold 5/5


[I 2025-10-10 04:42:33,388] Trial 62 finished with value: 0.14248327368404717 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 16, 'decoder_n_layers': 6, 'decoder_dropout': 0.4, 'current_hidden_size': 96, 'current_n_layers': 4, 'current_dropout': 0.2}. Best is trial 31 with value: 0.013910780136939138.


    Fold 5 best total loss: 0.349416
  📊 Trial 62 - Average CV Loss: 0.142483 (±0.157025)
  🔄 Trial 63, Fold 1/5
    Fold 1 best total loss: 0.007983
  🔄 Trial 63, Fold 2/5
    Fold 2 best total loss: 0.012642
  🔄 Trial 63, Fold 3/5
    Fold 3 best total loss: 0.018990
  🔄 Trial 63, Fold 4/5
    Fold 4 best total loss: 0.009801
  🔄 Trial 63, Fold 5/5


[I 2025-10-10 04:47:08,603] Trial 63 finished with value: 0.01233225737232715 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 16, 'decoder_n_layers': 5, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 63 with value: 0.01233225737232715.


    Fold 5 best total loss: 0.012244
  📊 Trial 63 - Average CV Loss: 0.012332 (±0.003735)
  🔄 Trial 64, Fold 1/5
    Fold 1 best total loss: 0.008864
  🔄 Trial 64, Fold 2/5
    Fold 2 best total loss: 0.014144
  🔄 Trial 64, Fold 3/5
    Fold 3 best total loss: 0.016008
  🔄 Trial 64, Fold 4/5
    Fold 4 best total loss: 0.199501
  🔄 Trial 64, Fold 5/5


[I 2025-10-10 04:51:39,553] Trial 64 finished with value: 0.050520474556833506 and parameters: {'lstm_hidden_size': 32, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 16, 'decoder_n_layers': 4, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 63 with value: 0.01233225737232715.


    Fold 5 best total loss: 0.014086
  📊 Trial 64 - Average CV Loss: 0.050520 (±0.074528)
  🔄 Trial 65, Fold 1/5
    Fold 1 best total loss: 0.007583
  🔄 Trial 65, Fold 2/5
    Fold 2 best total loss: 0.012520
  🔄 Trial 65, Fold 3/5
    Fold 3 best total loss: 0.016949
  🔄 Trial 65, Fold 4/5
    Fold 4 best total loss: 0.008358
  🔄 Trial 65, Fold 5/5


[I 2025-10-10 04:57:15,878] Trial 65 finished with value: 0.010987767169717699 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 5, 'lstm_dropout': 0.2, 'decoder_hidden_size': 48, 'decoder_n_layers': 3, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 65 with value: 0.010987767169717699.


    Fold 5 best total loss: 0.009529
  📊 Trial 65 - Average CV Loss: 0.010988 (±0.003421)
  🔄 Trial 66, Fold 1/5
    Fold 1 best total loss: 0.007831
  🔄 Trial 66, Fold 2/5
    Fold 2 best total loss: 0.012333
  🔄 Trial 66, Fold 3/5
    Fold 3 best total loss: 0.225198
  🔄 Trial 66, Fold 4/5
    Fold 4 best total loss: 0.012515
  🔄 Trial 66, Fold 5/5


[I 2025-10-10 05:01:52,745] Trial 66 finished with value: 0.05355148912640288 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 48, 'decoder_n_layers': 3, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 65 with value: 0.010987767169717699.


    Fold 5 best total loss: 0.009880
  📊 Trial 66 - Average CV Loss: 0.053551 (±0.085841)
  🔄 Trial 67, Fold 1/5
    Fold 1 best total loss: 0.006730
  🔄 Trial 67, Fold 2/5
    Fold 2 best total loss: 0.018364
  🔄 Trial 67, Fold 3/5
    Fold 3 best total loss: 0.227011
  🔄 Trial 67, Fold 4/5
    Fold 4 best total loss: 0.013103
  🔄 Trial 67, Fold 5/5


[I 2025-10-10 05:06:23,228] Trial 67 finished with value: 0.05494946867693216 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 48, 'decoder_n_layers': 3, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 96, 'current_n_layers': 2, 'current_dropout': 0.1}. Best is trial 65 with value: 0.010987767169717699.


    Fold 5 best total loss: 0.009539
  📊 Trial 67 - Average CV Loss: 0.054949 (±0.086119)
  🔄 Trial 68, Fold 1/5
    Fold 1 best total loss: 0.318166
  🔄 Trial 68, Fold 2/5
    Fold 2 best total loss: 0.317909
  🔄 Trial 68, Fold 3/5
    Fold 3 best total loss: 0.222151
  🔄 Trial 68, Fold 4/5
    Fold 4 best total loss: 0.010949
  🔄 Trial 68, Fold 5/5


[I 2025-10-10 05:11:47,901] Trial 68 finished with value: 0.2432169772335328 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 5, 'lstm_dropout': 0.2, 'decoder_hidden_size': 48, 'decoder_n_layers': 2, 'decoder_dropout': 0.2, 'current_hidden_size': 64, 'current_n_layers': 6, 'current_dropout': 0.1}. Best is trial 65 with value: 0.010987767169717699.


    Fold 5 best total loss: 0.346910
  📊 Trial 68 - Average CV Loss: 0.243217 (±0.123564)
  🔄 Trial 69, Fold 1/5
    Fold 1 best total loss: 0.008262
  🔄 Trial 69, Fold 2/5
    Fold 2 best total loss: 0.014474
  🔄 Trial 69, Fold 3/5
    Fold 3 best total loss: 0.225435
  🔄 Trial 69, Fold 4/5
    Fold 4 best total loss: 0.014625
  🔄 Trial 69, Fold 5/5


[I 2025-10-10 05:15:10,420] Trial 69 finished with value: 0.05505283065140247 and parameters: {'lstm_hidden_size': 64, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 48, 'decoder_n_layers': 3, 'decoder_dropout': 0.30000000000000004, 'current_hidden_size': 48, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 65 with value: 0.010987767169717699.


    Fold 5 best total loss: 0.012468
  📊 Trial 69 - Average CV Loss: 0.055053 (±0.085222)
  🔄 Trial 70, Fold 1/5
    Fold 1 best total loss: 0.016933
  🔄 Trial 70, Fold 2/5
    Fold 2 best total loss: 0.013267
  🔄 Trial 70, Fold 3/5
    Fold 3 best total loss: 0.014353
  🔄 Trial 70, Fold 4/5
    Fold 4 best total loss: 0.008735
  🔄 Trial 70, Fold 5/5


[I 2025-10-10 05:19:42,198] Trial 70 finished with value: 0.012706608301959932 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 32, 'decoder_n_layers': 3, 'decoder_dropout': 0.2, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 65 with value: 0.010987767169717699.


    Fold 5 best total loss: 0.010245
  📊 Trial 70 - Average CV Loss: 0.012707 (±0.002923)
  🔄 Trial 71, Fold 1/5
    Fold 1 best total loss: 0.006952
  🔄 Trial 71, Fold 2/5
    Fold 2 best total loss: 0.014554
  🔄 Trial 71, Fold 3/5
    Fold 3 best total loss: 0.015340
  🔄 Trial 71, Fold 4/5
    Fold 4 best total loss: 0.012147
  🔄 Trial 71, Fold 5/5


[I 2025-10-10 05:24:13,331] Trial 71 finished with value: 0.011748734686989337 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 32, 'decoder_n_layers': 3, 'decoder_dropout': 0.2, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 65 with value: 0.010987767169717699.


    Fold 5 best total loss: 0.009751
  📊 Trial 71 - Average CV Loss: 0.011749 (±0.003096)
  🔄 Trial 72, Fold 1/5
    Fold 1 best total loss: 0.007163
  🔄 Trial 72, Fold 2/5
    Fold 2 best total loss: 0.320350
  🔄 Trial 72, Fold 3/5
    Fold 3 best total loss: 0.014370
  🔄 Trial 72, Fold 4/5
    Fold 4 best total loss: 0.011315
  🔄 Trial 72, Fold 5/5


[I 2025-10-10 05:28:40,478] Trial 72 finished with value: 0.0723947043530643 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 32, 'decoder_n_layers': 3, 'decoder_dropout': 0.2, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 65 with value: 0.010987767169717699.


    Fold 5 best total loss: 0.008775
  📊 Trial 72 - Average CV Loss: 0.072395 (±0.124002)
  🔄 Trial 73, Fold 1/5
    Fold 1 best total loss: 0.317592
  🔄 Trial 73, Fold 2/5
    Fold 2 best total loss: 0.321549
  🔄 Trial 73, Fold 3/5
    Fold 3 best total loss: 0.013667
  🔄 Trial 73, Fold 4/5
    Fold 4 best total loss: 0.011067
  🔄 Trial 73, Fold 5/5


[I 2025-10-10 05:34:13,456] Trial 73 finished with value: 0.20198029876919463 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 5, 'lstm_dropout': 0.2, 'decoder_hidden_size': 32, 'decoder_n_layers': 3, 'decoder_dropout': 0.2, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 65 with value: 0.010987767169717699.


    Fold 5 best total loss: 0.346027
  📊 Trial 73 - Average CV Loss: 0.201980 (±0.155127)
  🔄 Trial 74, Fold 1/5
    Fold 1 best total loss: 0.006886
  🔄 Trial 74, Fold 2/5
    Fold 2 best total loss: 0.318086
  🔄 Trial 74, Fold 3/5
    Fold 3 best total loss: 0.016199
  🔄 Trial 74, Fold 4/5
    Fold 4 best total loss: 0.011401
  🔄 Trial 74, Fold 5/5


[I 2025-10-10 05:38:45,761] Trial 74 finished with value: 0.07267411489738151 and parameters: {'lstm_hidden_size': 72, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 32, 'decoder_n_layers': 3, 'decoder_dropout': 0.2, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 65 with value: 0.010987767169717699.


    Fold 5 best total loss: 0.010799
  📊 Trial 74 - Average CV Loss: 0.072674 (±0.122741)
  🔄 Trial 75, Fold 1/5
    Fold 1 best total loss: 0.007445
  🔄 Trial 75, Fold 2/5
    Fold 2 best total loss: 0.014458
  🔄 Trial 75, Fold 3/5
    Fold 3 best total loss: 0.013575
  🔄 Trial 75, Fold 4/5
    Fold 4 best total loss: 0.009009
  🔄 Trial 75, Fold 5/5


[I 2025-10-10 05:43:18,483] Trial 75 finished with value: 0.010700262093450874 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 32, 'decoder_n_layers': 3, 'decoder_dropout': 0.2, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 75 with value: 0.010700262093450874.


    Fold 5 best total loss: 0.009014
  📊 Trial 75 - Average CV Loss: 0.010700 (±0.002781)
  🔄 Trial 76, Fold 1/5
    Fold 1 best total loss: 0.315833
  🔄 Trial 76, Fold 2/5
    Fold 2 best total loss: 0.014465
  🔄 Trial 76, Fold 3/5
    Fold 3 best total loss: 0.223078
  🔄 Trial 76, Fold 4/5
    Fold 4 best total loss: 0.010293
  🔄 Trial 76, Fold 5/5


[I 2025-10-10 05:48:47,709] Trial 76 finished with value: 0.11487422230420634 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 5, 'lstm_dropout': 0.2, 'decoder_hidden_size': 32, 'decoder_n_layers': 2, 'decoder_dropout': 0.2, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 75 with value: 0.010700262093450874.


    Fold 5 best total loss: 0.010702
  📊 Trial 76 - Average CV Loss: 0.114874 (±0.129587)
  🔄 Trial 77, Fold 1/5
    Fold 1 best total loss: 0.009778
  🔄 Trial 77, Fold 2/5
    Fold 2 best total loss: 0.319176
  🔄 Trial 77, Fold 3/5
    Fold 3 best total loss: 0.022978
  🔄 Trial 77, Fold 4/5
    Fold 4 best total loss: 0.010948
  🔄 Trial 77, Fold 5/5


[I 2025-10-10 05:53:12,594] Trial 77 finished with value: 0.07509504902409389 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 32, 'decoder_n_layers': 3, 'decoder_dropout': 0.2, 'current_hidden_size': 32, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 75 with value: 0.010700262093450874.


    Fold 5 best total loss: 0.012595
  📊 Trial 77 - Average CV Loss: 0.075095 (±0.122131)
  🔄 Trial 78, Fold 1/5
    Fold 1 best total loss: 0.007834
  🔄 Trial 78, Fold 2/5
    Fold 2 best total loss: 0.012345
  🔄 Trial 78, Fold 3/5
    Fold 3 best total loss: 0.015092
  🔄 Trial 78, Fold 4/5
    Fold 4 best total loss: 0.008806
  🔄 Trial 78, Fold 5/5


[I 2025-10-10 05:57:48,234] Trial 78 finished with value: 0.01047220446052961 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 32, 'decoder_n_layers': 3, 'decoder_dropout': 0.1, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 78 with value: 0.01047220446052961.


    Fold 5 best total loss: 0.008284
  📊 Trial 78 - Average CV Loss: 0.010472 (±0.002806)
  🔄 Trial 79, Fold 1/5
    Fold 1 best total loss: 0.317160
  🔄 Trial 79, Fold 2/5
    Fold 2 best total loss: 0.014557
  🔄 Trial 79, Fold 3/5
    Fold 3 best total loss: 0.014306
  🔄 Trial 79, Fold 4/5
    Fold 4 best total loss: 0.010820
  🔄 Trial 79, Fold 5/5


[I 2025-10-10 06:01:12,280] Trial 79 finished with value: 0.07296772074187174 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 3, 'lstm_dropout': 0.1, 'decoder_hidden_size': 32, 'decoder_n_layers': 3, 'decoder_dropout': 0.1, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 78 with value: 0.01047220446052961.


    Fold 5 best total loss: 0.007996
  📊 Trial 79 - Average CV Loss: 0.072968 (±0.122120)
  🔄 Trial 80, Fold 1/5
    Fold 1 best total loss: 0.014292
  🔄 Trial 80, Fold 2/5
    Fold 2 best total loss: 0.014393
  🔄 Trial 80, Fold 3/5
    Fold 3 best total loss: 0.223632
  🔄 Trial 80, Fold 4/5
    Fold 4 best total loss: 0.014989
  🔄 Trial 80, Fold 5/5


[I 2025-10-10 06:05:45,059] Trial 80 finished with value: 0.1225443810923025 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 32, 'decoder_n_layers': 2, 'decoder_dropout': 0.1, 'current_hidden_size': 16, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 78 with value: 0.01047220446052961.


    Fold 5 best total loss: 0.345416
  📊 Trial 80 - Average CV Loss: 0.122544 (±0.137749)
  🔄 Trial 81, Fold 1/5
    Fold 1 best total loss: 0.007063
  🔄 Trial 81, Fold 2/5
    Fold 2 best total loss: 0.318017
  🔄 Trial 81, Fold 3/5
    Fold 3 best total loss: 0.017889
  🔄 Trial 81, Fold 4/5
    Fold 4 best total loss: 0.203776
  🔄 Trial 81, Fold 5/5


[I 2025-10-10 06:10:12,526] Trial 81 finished with value: 0.11137107646791264 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 32, 'decoder_n_layers': 3, 'decoder_dropout': 0.1, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 78 with value: 0.01047220446052961.


    Fold 5 best total loss: 0.010110
  📊 Trial 81 - Average CV Loss: 0.111371 (±0.127369)
  🔄 Trial 82, Fold 1/5
    Fold 1 best total loss: 0.007168
  🔄 Trial 82, Fold 2/5
    Fold 2 best total loss: 0.012520
  🔄 Trial 82, Fold 3/5
    Fold 3 best total loss: 0.015164
  🔄 Trial 82, Fold 4/5
    Fold 4 best total loss: 0.010974
  🔄 Trial 82, Fold 5/5


[I 2025-10-10 06:14:45,951] Trial 82 finished with value: 0.011198591743595897 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 32, 'decoder_n_layers': 3, 'decoder_dropout': 0.2, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 78 with value: 0.01047220446052961.


    Fold 5 best total loss: 0.010167
  📊 Trial 82 - Average CV Loss: 0.011199 (±0.002639)
  🔄 Trial 83, Fold 1/5
    Fold 1 best total loss: 0.008458
  🔄 Trial 83, Fold 2/5
    Fold 2 best total loss: 0.013383
  🔄 Trial 83, Fold 3/5
    Fold 3 best total loss: 0.013652
  🔄 Trial 83, Fold 4/5
    Fold 4 best total loss: 0.010532
  🔄 Trial 83, Fold 5/5


[I 2025-10-10 06:19:12,576] Trial 83 finished with value: 0.07844388668891042 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 32, 'decoder_n_layers': 3, 'decoder_dropout': 0.2, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 78 with value: 0.01047220446052961.


    Fold 5 best total loss: 0.346195
  📊 Trial 83 - Average CV Loss: 0.078444 (±0.133889)
  🔄 Trial 84, Fold 1/5
    Fold 1 best total loss: 0.317096
  🔄 Trial 84, Fold 2/5
    Fold 2 best total loss: 0.014002
  🔄 Trial 84, Fold 3/5
    Fold 3 best total loss: 0.224799
  🔄 Trial 84, Fold 4/5
    Fold 4 best total loss: 0.021265
  🔄 Trial 84, Fold 5/5


[I 2025-10-10 06:23:43,081] Trial 84 finished with value: 0.11729741001036018 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 32, 'decoder_n_layers': 3, 'decoder_dropout': 0.1, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 78 with value: 0.01047220446052961.


    Fold 5 best total loss: 0.009325
  📊 Trial 84 - Average CV Loss: 0.117297 (±0.128862)
  🔄 Trial 85, Fold 1/5
    Fold 1 best total loss: 0.007131
  🔄 Trial 85, Fold 2/5
    Fold 2 best total loss: 0.320695
  🔄 Trial 85, Fold 3/5
    Fold 3 best total loss: 0.019077
  🔄 Trial 85, Fold 4/5
    Fold 4 best total loss: 0.012451
  🔄 Trial 85, Fold 5/5


[I 2025-10-10 06:28:10,023] Trial 85 finished with value: 0.07389358913060277 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 32, 'decoder_n_layers': 3, 'decoder_dropout': 0.2, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 78 with value: 0.01047220446052961.


    Fold 5 best total loss: 0.010115
  📊 Trial 85 - Average CV Loss: 0.073894 (±0.123463)
  🔄 Trial 86, Fold 1/5
    Fold 1 best total loss: 0.007072
  🔄 Trial 86, Fold 2/5
    Fold 2 best total loss: 0.012203
  🔄 Trial 86, Fold 3/5
    Fold 3 best total loss: 0.225867
  🔄 Trial 86, Fold 4/5
    Fold 4 best total loss: 0.013135
  🔄 Trial 86, Fold 5/5


[I 2025-10-10 06:32:40,147] Trial 86 finished with value: 0.053199829341610896 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 32, 'decoder_n_layers': 3, 'decoder_dropout': 0.2, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 78 with value: 0.01047220446052961.


    Fold 5 best total loss: 0.007722
  📊 Trial 86 - Average CV Loss: 0.053200 (±0.086367)
  🔄 Trial 87, Fold 1/5
    Fold 1 best total loss: 0.008806
  🔄 Trial 87, Fold 2/5
    Fold 2 best total loss: 0.022388
  🔄 Trial 87, Fold 3/5
    Fold 3 best total loss: 0.018612
  🔄 Trial 87, Fold 4/5
    Fold 4 best total loss: 0.202069
  🔄 Trial 87, Fold 5/5


[I 2025-10-10 06:37:03,373] Trial 87 finished with value: 0.05918559983838349 and parameters: {'lstm_hidden_size': 16, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 32, 'decoder_n_layers': 3, 'decoder_dropout': 0.1, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 78 with value: 0.01047220446052961.


    Fold 5 best total loss: 0.044054
  📊 Trial 87 - Average CV Loss: 0.059186 (±0.072364)
  🔄 Trial 88, Fold 1/5
    Fold 1 best total loss: 0.317876
  🔄 Trial 88, Fold 2/5
    Fold 2 best total loss: 0.015827
  🔄 Trial 88, Fold 3/5
    Fold 3 best total loss: 0.019392
  🔄 Trial 88, Fold 4/5
    Fold 4 best total loss: 0.015742
  🔄 Trial 88, Fold 5/5


[I 2025-10-10 06:40:26,818] Trial 88 finished with value: 0.07642195001244545 and parameters: {'lstm_hidden_size': 48, 'lstm_n_layers': 3, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 32, 'decoder_n_layers': 3, 'decoder_dropout': 0.2, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.2}. Best is trial 78 with value: 0.01047220446052961.


    Fold 5 best total loss: 0.013273
  📊 Trial 88 - Average CV Loss: 0.076422 (±0.120743)
  🔄 Trial 89, Fold 1/5
    Fold 1 best total loss: 0.007130
  🔄 Trial 89, Fold 2/5
    Fold 2 best total loss: 0.017477
  🔄 Trial 89, Fold 3/5
    Fold 3 best total loss: 0.015955
  🔄 Trial 89, Fold 4/5
    Fold 4 best total loss: 0.011688
  🔄 Trial 89, Fold 5/5


[I 2025-10-10 06:44:53,959] Trial 89 finished with value: 0.01227777972817421 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 64, 'decoder_n_layers': 4, 'decoder_dropout': 0.2, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 78 with value: 0.01047220446052961.


    Fold 5 best total loss: 0.009139
  📊 Trial 89 - Average CV Loss: 0.012278 (±0.003931)
  🔄 Trial 90, Fold 1/5
    Fold 1 best total loss: 0.008291
  🔄 Trial 90, Fold 2/5
    Fold 2 best total loss: 0.013661
  🔄 Trial 90, Fold 3/5
    Fold 3 best total loss: 0.019955
  🔄 Trial 90, Fold 4/5
    Fold 4 best total loss: 0.011991
  🔄 Trial 90, Fold 5/5


[I 2025-10-10 06:49:18,355] Trial 90 finished with value: 0.013307246717158706 and parameters: {'lstm_hidden_size': 64, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 64, 'decoder_n_layers': 4, 'decoder_dropout': 0.2, 'current_hidden_size': 48, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 78 with value: 0.01047220446052961.


    Fold 5 best total loss: 0.012639
  📊 Trial 90 - Average CV Loss: 0.013307 (±0.003786)
  🔄 Trial 91, Fold 1/5
    Fold 1 best total loss: 0.008303
  🔄 Trial 91, Fold 2/5
    Fold 2 best total loss: 0.012369
  🔄 Trial 91, Fold 3/5
    Fold 3 best total loss: 0.015867
  🔄 Trial 91, Fold 4/5
    Fold 4 best total loss: 0.014088
  🔄 Trial 91, Fold 5/5


[I 2025-10-10 06:53:49,795] Trial 91 finished with value: 0.07901732748141513 and parameters: {'lstm_hidden_size': 64, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 64, 'decoder_n_layers': 5, 'decoder_dropout': 0.2, 'current_hidden_size': 48, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 78 with value: 0.01047220446052961.


    Fold 5 best total loss: 0.344460
  📊 Trial 91 - Average CV Loss: 0.079017 (±0.132745)
  🔄 Trial 92, Fold 1/5
    Fold 1 best total loss: 0.007366
  🔄 Trial 92, Fold 2/5
    Fold 2 best total loss: 0.013302
  🔄 Trial 92, Fold 3/5
    Fold 3 best total loss: 0.016312
  🔄 Trial 92, Fold 4/5
    Fold 4 best total loss: 0.203023
  🔄 Trial 92, Fold 5/5


[I 2025-10-10 06:58:15,278] Trial 92 finished with value: 0.11690378971397877 and parameters: {'lstm_hidden_size': 64, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 64, 'decoder_n_layers': 4, 'decoder_dropout': 0.2, 'current_hidden_size': 64, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 78 with value: 0.01047220446052961.


    Fold 5 best total loss: 0.344516
  📊 Trial 92 - Average CV Loss: 0.116904 (±0.135702)
  🔄 Trial 93, Fold 1/5
    Fold 1 best total loss: 0.007782
  🔄 Trial 93, Fold 2/5
    Fold 2 best total loss: 0.014637
  🔄 Trial 93, Fold 3/5
    Fold 3 best total loss: 0.025106
  🔄 Trial 93, Fold 4/5
    Fold 4 best total loss: 0.014110
  🔄 Trial 93, Fold 5/5


[I 2025-10-10 07:02:40,929] Trial 93 finished with value: 0.014970642235130071 and parameters: {'lstm_hidden_size': 64, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 64, 'decoder_n_layers': 4, 'decoder_dropout': 0.2, 'current_hidden_size': 48, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 78 with value: 0.01047220446052961.


    Fold 5 best total loss: 0.013218
  📊 Trial 93 - Average CV Loss: 0.014971 (±0.005627)
  🔄 Trial 94, Fold 1/5
    Fold 1 best total loss: 0.316903
  🔄 Trial 94, Fold 2/5
    Fold 2 best total loss: 0.018333
  🔄 Trial 94, Fold 3/5
    Fold 3 best total loss: 0.015346
  🔄 Trial 94, Fold 4/5
    Fold 4 best total loss: 0.012104
  🔄 Trial 94, Fold 5/5


[I 2025-10-10 07:07:02,012] Trial 94 finished with value: 0.07487166506471113 and parameters: {'lstm_hidden_size': 64, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 64, 'decoder_n_layers': 5, 'decoder_dropout': 0.2, 'current_hidden_size': 48, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 78 with value: 0.01047220446052961.


    Fold 5 best total loss: 0.011672
  📊 Trial 94 - Average CV Loss: 0.074872 (±0.121039)
  🔄 Trial 95, Fold 1/5
    Fold 1 best total loss: 0.009777
  🔄 Trial 95, Fold 2/5
    Fold 2 best total loss: 0.011303
  🔄 Trial 95, Fold 3/5
    Fold 3 best total loss: 0.018728
  🔄 Trial 95, Fold 4/5
    Fold 4 best total loss: 0.015736
  🔄 Trial 95, Fold 5/5


[I 2025-10-10 07:11:33,668] Trial 95 finished with value: 0.08031374369747937 and parameters: {'lstm_hidden_size': 32, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 64, 'decoder_n_layers': 4, 'decoder_dropout': 0.1, 'current_hidden_size': 48, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 78 with value: 0.01047220446052961.


    Fold 5 best total loss: 0.346024
  📊 Trial 95 - Average CV Loss: 0.080314 (±0.132893)
  🔄 Trial 96, Fold 1/5
    Fold 1 best total loss: 0.007738
  🔄 Trial 96, Fold 2/5
    Fold 2 best total loss: 0.318304
  🔄 Trial 96, Fold 3/5
    Fold 3 best total loss: 0.014809
  🔄 Trial 96, Fold 4/5
    Fold 4 best total loss: 0.011728
  🔄 Trial 96, Fold 5/5


[I 2025-10-10 07:16:02,843] Trial 96 finished with value: 0.07269281900953502 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 64, 'decoder_n_layers': 4, 'decoder_dropout': 0.2, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 78 with value: 0.01047220446052961.


    Fold 5 best total loss: 0.010885
  📊 Trial 96 - Average CV Loss: 0.072693 (±0.122826)
  🔄 Trial 97, Fold 1/5
    Fold 1 best total loss: 0.009809
  🔄 Trial 97, Fold 2/5
    Fold 2 best total loss: 0.318293
  🔄 Trial 97, Fold 3/5
    Fold 3 best total loss: 0.226188
  🔄 Trial 97, Fold 4/5
    Fold 4 best total loss: 0.021662
  🔄 Trial 97, Fold 5/5


[I 2025-10-10 07:20:37,323] Trial 97 finished with value: 0.11784809022210538 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.2, 'decoder_hidden_size': 32, 'decoder_n_layers': 6, 'decoder_dropout': 0.2, 'current_hidden_size': 32, 'current_n_layers': 4, 'current_dropout': 0.1}. Best is trial 78 with value: 0.01047220446052961.


    Fold 5 best total loss: 0.013288
  📊 Trial 97 - Average CV Loss: 0.117848 (±0.129440)
  🔄 Trial 98, Fold 1/5
    Fold 1 best total loss: 0.177718
  🔄 Trial 98, Fold 2/5
    Fold 2 best total loss: 0.012387
  🔄 Trial 98, Fold 3/5
    Fold 3 best total loss: 0.013366
  🔄 Trial 98, Fold 4/5
    Fold 4 best total loss: 0.013142
  🔄 Trial 98, Fold 5/5


[I 2025-10-10 07:25:02,191] Trial 98 finished with value: 0.045036126428749414 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.1, 'decoder_hidden_size': 64, 'decoder_n_layers': 3, 'decoder_dropout': 0.1, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 78 with value: 0.01047220446052961.


    Fold 5 best total loss: 0.008566
  📊 Trial 98 - Average CV Loss: 0.045036 (±0.066364)
  🔄 Trial 99, Fold 1/5
    Fold 1 best total loss: 0.008258
  🔄 Trial 99, Fold 2/5
    Fold 2 best total loss: 0.321320
  🔄 Trial 99, Fold 3/5
    Fold 3 best total loss: 0.015255
  🔄 Trial 99, Fold 4/5
    Fold 4 best total loss: 0.008749
  🔄 Trial 99, Fold 5/5


[I 2025-10-10 07:29:36,774] Trial 99 finished with value: 0.07288989116204903 and parameters: {'lstm_hidden_size': 96, 'lstm_n_layers': 4, 'lstm_dropout': 0.30000000000000004, 'decoder_hidden_size': 32, 'decoder_n_layers': 5, 'decoder_dropout': 0.2, 'current_hidden_size': 96, 'current_n_layers': 3, 'current_dropout': 0.1}. Best is trial 78 with value: 0.01047220446052961.


    Fold 5 best total loss: 0.010867
  📊 Trial 99 - Average CV Loss: 0.072890 (±0.124240)

📊 OPTIMIZATION RESULTS
✅ 완료된 trials: 100
🏆 최고 성능 trial: 78
💯 최고 성능 값: 0.010472

🎯 최적 하이퍼파라미터:
   lstm_hidden_size: 96
   lstm_n_layers: 4
   lstm_dropout: 0.2
   decoder_hidden_size: 32
   decoder_n_layers: 3
   decoder_dropout: 0.1
   current_hidden_size: 96
   current_n_layers: 3
   current_dropout: 0.1

📈 상위 5개 Trials:
   1. Trial 78: 0.010472
   2. Trial 75: 0.010700
   3. Trial 65: 0.010988
   4. Trial 82: 0.011199
   5. Trial 71: 0.011749
💾 모든 trials 결과가 저장되었습니다: bmed_optuna_trials_20251010_072936.csv
💾 SQLite 데이터베이스에 실시간 저장됨: sqlite:///bmed_hpopt_study.db
   - 중단 후 재시작 시 자동으로 기존 결과를 불러옵니다
   - 다른 프로세스에서 진행상황 모니터링 가능합니다
🎉 하이퍼파라미터 최적화 완료!
