In [1]:
import pandas as pd
import numpy as np
import re

import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import roc_auc_score, average_precision_score, classification_report, accuracy_score

import torch
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import TensorDataset, DataLoader
from torchmetrics.regression import MeanSquaredError, MeanAbsoluteError
from torchmetrics.classification import BinaryAUROC, BinaryAveragePrecision
from torchmetrics.classification import BinaryAccuracy, BinaryPrecision, BinaryRecall, BinaryF1Score

import lightning as L
from lightning.pytorch.callbacks import ModelCheckpoint, EarlyStopping
from lightning.pytorch.callbacks import Callback

import optuna
from optuna.integration import PyTorchLightningPruningCallback

  from .autonotebook import tqdm as notebook_tqdm


### 데이터셋 정의

In [2]:
DATA_MAKER = "jiseock"
if DATA_MAKER == "jiseock":
    DATA_PATH = "../dataset/jiseock"
else:
    DATA_PATH = "../dataset/yeonseo"

X_train = pd.read_csv(f"{DATA_PATH}/X_train.csv")
y_train = pd.read_csv(f"{DATA_PATH}/y_train.csv")

In [3]:
X_train

Unnamed: 0,나이,"성별 (M:1,F:2)","Rt:1,Lt:2",Height,Weight,"Tearsize (AP,cm)",Tearsize (ML),Tearsize (retraction),"흡연여부 (비흡연:1,흡연:2)","흡연여부 (비흡연:1,흡연:2) Missing flag",...,6M Goutallier (ISP),6M Goutallier (TM),Pre Goutallier (SSP) Missing flag,Pre Goutallier (SSC) Missing flag,Pre Goutallier (ISP) Missing flag,Pre Goutallier (TM) Missing flag,6M Goutallier (SSP) Missing flag,6M Goutallier (SSC) Missing flag,6M Goutallier (ISP) Missing flag,6M Goutallier (TM) Missing flag
0,70,2,1,-0.112260,-0.239010,0.529347,1.299314,1.236586,1.0,1.0,...,-0.229042,-0.201304,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,61,1,1,0.429514,-0.072467,-0.673177,-0.693529,-0.671659,1.0,1.0,...,-0.229042,-0.201304,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,73,2,1,-0.344449,-0.488825,-0.913682,-0.927981,-0.896158,1.0,0.0,...,-0.229042,-0.201304,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,60,1,2,0.584307,0.268947,-0.071915,-0.693529,-0.671659,1.0,1.0,...,-0.229042,-0.201304,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,75,2,2,-0.050343,-0.209865,0.529347,0.478732,0.450838,1.0,1.0,...,-0.229042,-0.201304,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7173,72,2,2,-0.371571,-1.414487,0.014274,-0.777214,-0.751792,1.0,1.0,...,-0.229042,-0.201304,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0
7174,80,1,2,0.038328,0.032052,0.070864,-0.498668,-0.485071,1.0,1.0,...,-0.229042,-0.201304,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
7175,83,2,1,0.778539,0.094077,-0.174047,-0.206960,-0.205745,1.0,1.0,...,-0.229042,-0.201304,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
7176,65,2,1,0.048063,-0.588316,-1.114365,-1.123613,-1.083486,1.0,1.0,...,-0.229042,-0.201304,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [4]:
y_train

Unnamed: 0,POD 6M retear
0,0
1,0
2,0
3,0
4,0
...,...
7173,1
7174,1
7175,1
7176,1


In [5]:
columns = list(X_train.columns)
columns

['나이',
 '성별 (M:1,F:2)',
 'Rt:1,Lt:2',
 'Height',
 'Weight',
 'Tearsize (AP,cm)',
 'Tearsize (ML)',
 'Tearsize (retraction)',
 '흡연여부 (비흡연:1,흡연:2)',
 '흡연여부 (비흡연:1,흡연:2) Missing flag',
 'Hospital 0',
 'Hospital 1',
 'Hospital 2',
 'Hospital 3',
 'Hospital 4',
 'Hospital 5',
 'Hospital 6',
 'Disease 0',
 'Disease 1',
 'Disease 2',
 'Disease 3',
 'Disease 4',
 'Disease 5',
 'Disease 6',
 'Disease 7',
 '0M ASES',
 '0M CSS',
 '0M ERabd',
 '0M ERside',
 '0M FF',
 '0M IR',
 '0M KSS',
 '0M MMTgrade',
 '0M MMTsec',
 '0M VAS(activity)',
 '0M VAS(resting)',
 '0M add',
 '2M ERabd',
 '2M ERside',
 '2M FF',
 '2M IR',
 '2M MMTgrade',
 '2M MMTsec',
 '2M add',
 '3M ASES',
 '3M CSS',
 '3M ERabd',
 '3M ERside',
 '3M FF',
 '3M IR',
 '3M KSS',
 '3M MMTgrade',
 '3M MMTsec',
 '3M VAS(activity)',
 '3M VAS(resting)',
 '3M add',
 '4M ASES',
 '4M CSS',
 '4M ERabd',
 '4M ERside',
 '4M FF',
 '4M IR',
 '4M KSS',
 '4M MMTgrade',
 '4M MMTsec',
 '4M VAS(activity)',
 '4M VAS(resting)',
 '4M add',
 '6M ASES',
 '6M CSS',
 

In [6]:
static_columns = columns[:25]

# static 데이터 칼럼
static_columns

['나이',
 '성별 (M:1,F:2)',
 'Rt:1,Lt:2',
 'Height',
 'Weight',
 'Tearsize (AP,cm)',
 'Tearsize (ML)',
 'Tearsize (retraction)',
 '흡연여부 (비흡연:1,흡연:2)',
 '흡연여부 (비흡연:1,흡연:2) Missing flag',
 'Hospital 0',
 'Hospital 1',
 'Hospital 2',
 'Hospital 3',
 'Hospital 4',
 'Hospital 5',
 'Hospital 6',
 'Disease 0',
 'Disease 1',
 'Disease 2',
 'Disease 3',
 'Disease 4',
 'Disease 5',
 'Disease 6',
 'Disease 7']

In [7]:
seq_columns = columns[25:-16]

# 시퀀셜 데이터 관련 칼럼들
seq_columns_0M = seq_columns[:12]
seq_columns_2M = seq_columns[12:19]
seq_columns_3M = seq_columns[19:31]
seq_columns_4M = seq_columns[31:43]
seq_columns_6M = seq_columns[43:]

seq_columns_all = [seq_columns_0M, seq_columns_2M, seq_columns_3M, seq_columns_4M, seq_columns_6M]

for seq_col in seq_columns_all:
    print(seq_col)

['0M ASES', '0M CSS', '0M ERabd', '0M ERside', '0M FF', '0M IR', '0M KSS', '0M MMTgrade', '0M MMTsec', '0M VAS(activity)', '0M VAS(resting)', '0M add']
['2M ERabd', '2M ERside', '2M FF', '2M IR', '2M MMTgrade', '2M MMTsec', '2M add']
['3M ASES', '3M CSS', '3M ERabd', '3M ERside', '3M FF', '3M IR', '3M KSS', '3M MMTgrade', '3M MMTsec', '3M VAS(activity)', '3M VAS(resting)', '3M add']
['4M ASES', '4M CSS', '4M ERabd', '4M ERside', '4M FF', '4M IR', '4M KSS', '4M MMTgrade', '4M MMTsec', '4M VAS(activity)', '4M VAS(resting)', '4M add']
['6M ASES', '6M CSS', '6M ERabd', '6M ERside', '6M FF', '6M IR', '6M KSS', '6M MMTgrade', '6M MMTsec', '6M VAS(activity)', '6M VAS(resting)', '6M add']


In [8]:
goutallier_columns = columns[-16:]

# goutaliar 관련 칼럼들
goutallier_columns_0M = goutallier_columns [:4]
goutallier_columns_6M = goutallier_columns [4:8]
goutallier_columns_0M_missing = goutallier_columns [8:12]
goutallier_columns_6M_missing = goutallier_columns [12:]

print(goutallier_columns_0M)
print(goutallier_columns_6M)
print(goutallier_columns_0M_missing)
print(goutallier_columns_6M_missing)

['Pre Goutallier (SSP)', 'Pre Goutallier (SSC)', 'Pre Goutallier (ISP)', 'Pre Goutallier (TM)']
['6M Goutallier (SSP)', '6M Goutallier (SSC)', '6M Goutallier (ISP)', '6M Goutallier (TM)']
['Pre Goutallier (SSP) Missing flag', 'Pre Goutallier (SSC) Missing flag', 'Pre Goutallier (ISP) Missing flag', 'Pre Goutallier (TM) Missing flag']
['6M Goutallier (SSP) Missing flag', '6M Goutallier (SSC) Missing flag', '6M Goutallier (ISP) Missing flag', '6M Goutallier (TM) Missing flag']


In [9]:
len(columns) == len(static_columns) + len(seq_columns) + len(goutallier_columns)

True

In [10]:
label_column = "POD 6M retear"
output_columns = ["6M ASES", "6M CSS", "6M KSS", "6M VAS(activity)", "6M VAS(resting)"]
input_columns = static_columns + [column for column in seq_columns if column not in output_columns] + goutallier_columns

In [11]:
output_columns

['6M ASES', '6M CSS', '6M KSS', '6M VAS(activity)', '6M VAS(resting)']

In [12]:
input_columns

['나이',
 '성별 (M:1,F:2)',
 'Rt:1,Lt:2',
 'Height',
 'Weight',
 'Tearsize (AP,cm)',
 'Tearsize (ML)',
 'Tearsize (retraction)',
 '흡연여부 (비흡연:1,흡연:2)',
 '흡연여부 (비흡연:1,흡연:2) Missing flag',
 'Hospital 0',
 'Hospital 1',
 'Hospital 2',
 'Hospital 3',
 'Hospital 4',
 'Hospital 5',
 'Hospital 6',
 'Disease 0',
 'Disease 1',
 'Disease 2',
 'Disease 3',
 'Disease 4',
 'Disease 5',
 'Disease 6',
 'Disease 7',
 '0M ASES',
 '0M CSS',
 '0M ERabd',
 '0M ERside',
 '0M FF',
 '0M IR',
 '0M KSS',
 '0M MMTgrade',
 '0M MMTsec',
 '0M VAS(activity)',
 '0M VAS(resting)',
 '0M add',
 '2M ERabd',
 '2M ERside',
 '2M FF',
 '2M IR',
 '2M MMTgrade',
 '2M MMTsec',
 '2M add',
 '3M ASES',
 '3M CSS',
 '3M ERabd',
 '3M ERside',
 '3M FF',
 '3M IR',
 '3M KSS',
 '3M MMTgrade',
 '3M MMTsec',
 '3M VAS(activity)',
 '3M VAS(resting)',
 '3M add',
 '4M ASES',
 '4M CSS',
 '4M ERabd',
 '4M ERside',
 '4M FF',
 '4M IR',
 '4M KSS',
 '4M MMTgrade',
 '4M MMTsec',
 '4M VAS(activity)',
 '4M VAS(resting)',
 '4M add',
 '6M ERabd',
 '6M ERside

In [13]:
X_train[input_columns]

Unnamed: 0,나이,"성별 (M:1,F:2)","Rt:1,Lt:2",Height,Weight,"Tearsize (AP,cm)",Tearsize (ML),Tearsize (retraction),"흡연여부 (비흡연:1,흡연:2)","흡연여부 (비흡연:1,흡연:2) Missing flag",...,6M Goutallier (ISP),6M Goutallier (TM),Pre Goutallier (SSP) Missing flag,Pre Goutallier (SSC) Missing flag,Pre Goutallier (ISP) Missing flag,Pre Goutallier (TM) Missing flag,6M Goutallier (SSP) Missing flag,6M Goutallier (SSC) Missing flag,6M Goutallier (ISP) Missing flag,6M Goutallier (TM) Missing flag
0,70,2,1,-0.112260,-0.239010,0.529347,1.299314,1.236586,1.0,1.0,...,-0.229042,-0.201304,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,61,1,1,0.429514,-0.072467,-0.673177,-0.693529,-0.671659,1.0,1.0,...,-0.229042,-0.201304,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,73,2,1,-0.344449,-0.488825,-0.913682,-0.927981,-0.896158,1.0,0.0,...,-0.229042,-0.201304,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,60,1,2,0.584307,0.268947,-0.071915,-0.693529,-0.671659,1.0,1.0,...,-0.229042,-0.201304,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,75,2,2,-0.050343,-0.209865,0.529347,0.478732,0.450838,1.0,1.0,...,-0.229042,-0.201304,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7173,72,2,2,-0.371571,-1.414487,0.014274,-0.777214,-0.751792,1.0,1.0,...,-0.229042,-0.201304,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0
7174,80,1,2,0.038328,0.032052,0.070864,-0.498668,-0.485071,1.0,1.0,...,-0.229042,-0.201304,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
7175,83,2,1,0.778539,0.094077,-0.174047,-0.206960,-0.205745,1.0,1.0,...,-0.229042,-0.201304,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
7176,65,2,1,0.048063,-0.588316,-1.114365,-1.123613,-1.083486,1.0,1.0,...,-0.229042,-0.201304,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [14]:
pd.concat([y_train, X_train[output_columns]], axis=1)

Unnamed: 0,POD 6M retear,6M ASES,6M CSS,6M KSS,6M VAS(activity),6M VAS(resting)
0,0,1.377047,1.448052,0.201114,1.201822,1.202527
1,0,0.756417,0.258163,1.336235,-1.788307,-1.761167
2,0,-0.655014,1.087954,-1.004221,1.201822,1.202527
3,0,-0.594953,-0.054966,-0.699962,1.201822,1.202527
4,0,0.566225,-1.526670,-0.126550,0.205112,0.214629
...,...,...,...,...,...,...
7173,1,0.576435,-0.284644,0.671297,0.560877,0.567248
7174,1,-0.788472,0.328121,-0.568735,0.205112,0.214629
7175,1,0.678394,-0.617836,-0.355947,2.198532,2.190425
7176,1,0.932377,0.515679,0.898310,0.205112,0.214629


In [15]:
def get_dataset(split):
  assert split in ["train", "val", "test"]
  
  X_file_name = f"{DATA_PATH}/X_{split}.csv"
  y_file_name = f"{DATA_PATH}/y_{split}.csv"

  X = pd.read_csv(X_file_name)
  y = pd.read_csv(y_file_name)
  
  # static 데이터
  X_static_tensor = torch.tensor(X[static_columns].to_numpy(), dtype=torch.float32)

  # 시기별 sequential 데이터
  X_seq_tensor_0M = torch.tensor(X[seq_columns_0M].to_numpy(), dtype=torch.float32)
  X_seq_tensor_2M = torch.tensor(X[seq_columns_2M].to_numpy(), dtype=torch.float32)
  X_seq_tensor_3M = torch.tensor(X[seq_columns_3M].to_numpy(), dtype=torch.float32)
  X_seq_tensor_4M = torch.tensor(X[seq_columns_4M].to_numpy(), dtype=torch.float32)
  X_seq_tensor_6M = torch.tensor(X[seq_columns_6M].to_numpy(), dtype=torch.float32)
  
  #0M, 6M goutalier 데이터
  X_goutalier_tensor_0M = torch.tensor(X[goutallier_columns_0M + goutallier_columns_0M_missing].to_numpy(), dtype=torch.float32)
  X_goutalier_tensor_6M = torch.tensor(X[goutallier_columns_6M + goutallier_columns_6M_missing].to_numpy(), dtype=torch.float32)
  
  # 전체 인풋 데이터
  X_tensor = torch.tensor(X[input_columns].to_numpy(), dtype=torch.float32)
  
  # 6M 예측 데이터
  y_tensor = torch.tensor(pd.concat([y, X[output_columns]], axis=1).to_numpy(), dtype=torch.float32)

  return TensorDataset(X_tensor, X_static_tensor, X_seq_tensor_0M, X_seq_tensor_2M, X_seq_tensor_3M, X_seq_tensor_4M, X_seq_tensor_6M, X_goutalier_tensor_0M, X_goutalier_tensor_6M, y_tensor)

In [16]:
trainset = get_dataset("train")
valset = get_dataset("val")
testset = get_dataset("test")

print("기존 데이터셋 구조 확인")
print(f"Trainset size: {len(trainset)}")
print(f"Validset size: {len(valset)}") # jiseock
print(f"Testset size: {len(testset)}")

기존 데이터셋 구조 확인
Trainset size: 7178
Validset size: 647
Testset size: 100


In [17]:
# 특징 크기 확인
static_features = len(static_columns)
seq_features_0M = len(seq_columns_0M)
seq_features_2M = len(seq_columns_2M)
seq_features_3M = len(seq_columns_3M)
seq_features_4M = len(seq_columns_4M)
seq_features_6M = len(seq_columns_6M)
goutallier_features_0M = len(goutallier_columns_0M) + len(goutallier_columns_0M_missing)
goutallier_features_6M = len(goutallier_columns_6M) + len(goutallier_columns_6M_missing)

print(f"Static features: {static_features}")
print(f"0M features: {seq_features_0M}, 2M features: {seq_features_2M}, 3M features: {seq_features_3M}")
print(f"4M features: {seq_features_4M}, 6M features: {seq_features_6M}")
print(f"0M Goutallier features: {goutallier_features_0M}, 6M Goutallier features: {goutallier_features_6M}")

Static features: 25
0M features: 12, 2M features: 7, 3M features: 12
4M features: 12, 6M features: 12
0M Goutallier features: 8, 6M Goutallier features: 8


In [18]:
# 각 모델별 데이터셋 생성 함수
def get_dataset_model1(split):
    """Model 1: static + 0M + 0M_goutallier → 2M"""
    assert split in ["train", "val", "test"]
    
    X_file_name = f"{DATA_PATH}/X_{split}.csv"
    y_file_name = f"{DATA_PATH}/y_{split}.csv"
    
    X = pd.read_csv(X_file_name)
    y = pd.read_csv(y_file_name)
    
    # 입력: static + 0M + 0M_goutallier
    X_static = torch.tensor(X[static_columns].to_numpy(), dtype=torch.float32)
    X_0M = torch.tensor(X[seq_columns_0M].to_numpy(), dtype=torch.float32)
    X_0M_goutallier = torch.tensor(X[goutallier_columns_0M + goutallier_columns_0M_missing].to_numpy(), dtype=torch.float32)
    
    # 출력: 2M
    y_2M = torch.tensor(X[seq_columns_2M].to_numpy(), dtype=torch.float32)
    
    return TensorDataset(X_static, X_0M, X_0M_goutallier, y_2M)

def get_dataset_model2(split):
    """Model 2: static + 0M + 2M + 0M_goutallier → 3M"""
    assert split in ["train", "val", "test"]
    
    X_file_name = f"{DATA_PATH}/X_{split}.csv"
    y_file_name = f"{DATA_PATH}/y_{split}.csv"
    
    X = pd.read_csv(X_file_name)
    y = pd.read_csv(y_file_name)
    
    # 입력: static + 0M + 2M + 0M_goutallier
    X_static = torch.tensor(X[static_columns].to_numpy(), dtype=torch.float32)
    X_0M = torch.tensor(X[seq_columns_0M].to_numpy(), dtype=torch.float32)
    X_2M = torch.tensor(X[seq_columns_2M].to_numpy(), dtype=torch.float32)
    X_0M_goutallier = torch.tensor(X[goutallier_columns_0M + goutallier_columns_0M_missing].to_numpy(), dtype=torch.float32)
    
    # 출력: 3M
    y_3M = torch.tensor(X[seq_columns_3M].to_numpy(), dtype=torch.float32)
    
    return TensorDataset(X_static, X_0M, X_2M, X_0M_goutallier, y_3M)

def get_dataset_model3(split):
    """Model 3: static + 0M + 2M + 3M + 0M_goutallier → 4M"""
    assert split in ["train", "val", "test"]
    
    X_file_name = f"{DATA_PATH}/X_{split}.csv"
    y_file_name = f"{DATA_PATH}/y_{split}.csv"
    
    X = pd.read_csv(X_file_name)
    y = pd.read_csv(y_file_name)
    
    # 입력: static + 0M + 2M + 3M + 0M_goutallier
    X_static = torch.tensor(X[static_columns].to_numpy(), dtype=torch.float32)
    X_0M = torch.tensor(X[seq_columns_0M].to_numpy(), dtype=torch.float32)
    X_2M = torch.tensor(X[seq_columns_2M].to_numpy(), dtype=torch.float32)
    X_3M = torch.tensor(X[seq_columns_3M].to_numpy(), dtype=torch.float32)
    X_0M_goutallier = torch.tensor(X[goutallier_columns_0M + goutallier_columns_0M_missing].to_numpy(), dtype=torch.float32)
    
    # 출력: 4M
    y_4M = torch.tensor(X[seq_columns_4M].to_numpy(), dtype=torch.float32)
    
    return TensorDataset(X_static, X_0M, X_2M, X_3M, X_0M_goutallier, y_4M)

def get_dataset_model4(split):
    """Model 4: static + 0M + 2M + 3M + 4M + 0M_goutallier → 6M + y + 6M_goutallier"""
    assert split in ["train", "val", "test"]
    
    X_file_name = f"{DATA_PATH}/X_{split}.csv"
    y_file_name = f"{DATA_PATH}/y_{split}.csv"
    
    X = pd.read_csv(X_file_name)
    y = pd.read_csv(y_file_name)
    
    # 입력: static + 0M + 2M + 3M + 4M + 0M_goutallier
    X_static = torch.tensor(X[static_columns].to_numpy(), dtype=torch.float32)
    X_0M = torch.tensor(X[seq_columns_0M].to_numpy(), dtype=torch.float32)
    X_2M = torch.tensor(X[seq_columns_2M].to_numpy(), dtype=torch.float32)
    X_3M = torch.tensor(X[seq_columns_3M].to_numpy(), dtype=torch.float32)
    X_4M = torch.tensor(X[seq_columns_4M].to_numpy(), dtype=torch.float32)
    X_0M_goutallier = torch.tensor(X[goutallier_columns_0M + goutallier_columns_0M_missing].to_numpy(), dtype=torch.float32)
    
    # 출력: 6M + y + 6M_goutallier
    y_6M = torch.tensor(X[seq_columns_6M].to_numpy(), dtype=torch.float32)
    y_label = torch.tensor(y[label_column].to_numpy(), dtype=torch.float32).unsqueeze(1)
    y_6M_goutallier = torch.tensor(X[goutallier_columns_6M + goutallier_columns_6M_missing].to_numpy(), dtype=torch.float32)
    
    # 결합: [6M features (12) + y (1) + 6M_goutallier (8)] = 21
    y_combined = torch.cat([y_6M, y_label, y_6M_goutallier], dim=1)
    
    return TensorDataset(X_static, X_0M, X_2M, X_3M, X_4M, X_0M_goutallier, y_combined)

# 데이터셋 생성
trainset_model1 = get_dataset_model1("train")
testset_model1 = get_dataset_model1("test")
valset_model1 = get_dataset_model1("val")

trainset_model2 = get_dataset_model2("train")
testset_model2 = get_dataset_model2("test")
valset_model2 = get_dataset_model2("val")

trainset_model3 = get_dataset_model3("train")
testset_model3 = get_dataset_model3("test")
valset_model3 = get_dataset_model3("val")

trainset_model4 = get_dataset_model4("train")
testset_model4 = get_dataset_model4("test")
valset_model4 = get_dataset_model4("val")

print(f"Model 1 - Train: {len(trainset_model1)}, Test: {len(testset_model1)}, Valid: {len(valset_model1)}")
print(f"Model 2 - Train: {len(trainset_model2)}, Test: {len(testset_model2)}, Valid: {len(valset_model2)}")
print(f"Model 3 - Train: {len(trainset_model3)}, Test: {len(testset_model3)}, Valid: {len(valset_model3)}")
print(f"Model 4 - Train: {len(trainset_model4)}, Test: {len(testset_model4)}, Valid: {len(valset_model4)}")


Model 1 - Train: 7178, Test: 100, Valid: 647
Model 2 - Train: 7178, Test: 100, Valid: 647
Model 3 - Train: 7178, Test: 100, Valid: 647
Model 4 - Train: 7178, Test: 100, Valid: 647


In [None]:
class LossHistoryCallback(Callback):
    def __init__(self):
        super().__init__()
        self.train_losses = []
        self.test_losses = []
    
    def on_train_epoch_end(self, trainer, pl_module):
        if len(self.train_losses) == 0:
            print(f"[Train] Available metrics: {list(trainer.callback_metrics.keys())}")
        
        train_loss = trainer.callback_metrics.get('train/loss_epoch')
        if train_loss is not None:
            self.train_losses.append(train_loss.item())
        else:
            print(f"Warning: train/loss_epoch not found!")
    
    def on_validation_epoch_end(self, trainer, pl_module):
        if len(self.test_losses) == 0:
            print(f"[Val] Available metrics: {list(trainer.callback_metrics.keys())}")
        
        val_loss = trainer.callback_metrics.get('val/loss')
        if val_loss is not None:
            self.test_losses.append(val_loss.item())
        else:
            print(f"Warning: val/loss not found!")

class CleanupCheckpointCallback(Callback):
    """학습 종료 후 최고 성능 체크포인트만 남기고 나머지 제거"""
    def __init__(self, checkpoint_dir, filename_prefix):
        super().__init__()
        self.checkpoint_dir = checkpoint_dir
        self.filename_prefix = filename_prefix
    
    def on_train_end(self, trainer, pl_module):
        import os
        import glob
        
        # 체크포인트 디렉토리의 모든 파일 확인
        checkpoint_files = glob.glob(os.path.join(self.checkpoint_dir, f"{self.filename_prefix}*.ckpt"))
        
        if len(checkpoint_files) == 0:
            return
        
        # 최고 성능 체크포인트 찾기 (ModelCheckpoint가 저장한 best 체크포인트)
        best_checkpoint = None
        for callback in trainer.callbacks:
            if isinstance(callback, ModelCheckpoint):
                best_checkpoint = callback.best_model_path
                break
        
        # 최고 성능 체크포인트가 있으면 나머지 제거
        if best_checkpoint and os.path.exists(best_checkpoint):
            removed_count = 0
            for checkpoint_file in checkpoint_files:
                if checkpoint_file != best_checkpoint:
                    try:
                        os.remove(checkpoint_file)
                        removed_count += 1
                    except Exception as e:
                        print(f"Warning: 체크포인트 삭제 실패 {checkpoint_file}: {e}")
            
            if removed_count > 0:
                print(f"[체크포인트 정리] {removed_count}개의 체크포인트 제거 완료. 최고 성능 체크포인트만 유지: {os.path.basename(best_checkpoint)}")
        else:
            # best 체크포인트를 찾지 못한 경우, 파일명으로 최신 것만 남기기
            if checkpoint_files:
                # 파일 수정 시간 기준으로 정렬
                checkpoint_files.sort(key=lambda x: os.path.getmtime(x), reverse=True)
                best_checkpoint = checkpoint_files[0]
                
                removed_count = 0
                for checkpoint_file in checkpoint_files[1:]:
                    try:
                        os.remove(checkpoint_file)
                        removed_count += 1
                    except Exception as e:
                        print(f"Warning: 체크포인트 삭제 실패 {checkpoint_file}: {e}")
                
                if removed_count > 0:
                    print(f"[체크포인트 정리] {removed_count}개의 체크포인트 제거 완료. 최신 체크포인트만 유지: {os.path.basename(best_checkpoint)}")


### Optuna 모델 최적화

In [20]:
# ============================================================================
# NAS 기반 Optuna 하이퍼파라미터 최적화
# ============================================================================

def create_encoder(trial, input_dim, name_prefix, min_units=32, max_units=256, min_layers=1, max_layers=3):
    """동적으로 인코더 생성 - NAS 기반"""
    n_layers = trial.suggest_int(f'{name_prefix}_n_layers', min_layers, max_layers)
    layers = []
    prev_dim = input_dim
    
    for i in range(n_layers):
        units = trial.suggest_int(f'{name_prefix}_units_{i}', min_units, max_units, step=32)
        dropout = trial.suggest_float(f'{name_prefix}_dropout_{i}', 0.0, 0.5, step=0.05)
        use_batch_norm = trial.suggest_categorical(f'{name_prefix}_batch_norm_{i}', [True, False])
        
        layers.append(nn.Linear(prev_dim, units))
        if use_batch_norm:
            layers.append(nn.BatchNorm1d(units))
        else:
            layers.append(nn.LayerNorm(units))
        layers.append(nn.LeakyReLU())
        if dropout > 0:
            layers.append(nn.Dropout(dropout))
        
        prev_dim = units
    
    return nn.Sequential(*layers), prev_dim

def create_output_head(trial, input_dim, output_dim, name_prefix, min_units=64, max_units=512, min_layers=1, max_layers=4):
    """동적으로 출력 헤드 생성 - NAS 기반"""
    n_layers = trial.suggest_int(f'{name_prefix}_n_layers', min_layers, max_layers)
    layers = []
    prev_dim = input_dim
    
    for i in range(n_layers - 1):
        units = trial.suggest_int(f'{name_prefix}_units_{i}', min_units, max_units, step=32)
        dropout = trial.suggest_float(f'{name_prefix}_dropout_{i}', 0.0, 0.5, step=0.05)
        use_batch_norm = trial.suggest_categorical(f'{name_prefix}_batch_norm_{i}', [True, False])
        
        layers.append(nn.Linear(prev_dim, units))
        if use_batch_norm:
            layers.append(nn.BatchNorm1d(units))
        else:
            layers.append(nn.LayerNorm(units))
        layers.append(nn.LeakyReLU())
        if dropout > 0:
            layers.append(nn.Dropout(dropout))
        
        prev_dim = units
    
    # 마지막 레이어 (출력)
    layers.append(nn.Linear(prev_dim, output_dim))
    
    return nn.Sequential(*layers)

print("NAS 기반 인코더/헤드 생성 함수 정의 완료")


NAS 기반 인코더/헤드 생성 함수 정의 완료


In [21]:
# Model 1 최적화 버전
class OptimizedSequentialMLP1(L.LightningModule):
    def __init__(self, trial, static_features, seq_0M_features, goutallier_0M_features, out_features_2M):
        super().__init__()
        
        # 인코더 생성
        self.static_encoder, static_out = create_encoder(
            trial, static_features, 'static_encoder', min_units=64, max_units=256
        )
        
        self.seq_0M_encoder, seq_out = create_encoder(
            trial, seq_0M_features, 'seq_0M_encoder', min_units=64, max_units=256
        )
        
        self.goutallier_0M_encoder, goutalier_out = create_encoder(
            trial, goutallier_0M_features, 'goutallier_0M_encoder', min_units=32, max_units=128
        )
        
        # 특징 결합 후 출력 헤드
        feat_dim = static_out + seq_out + goutalier_out
        self.output_head = create_output_head(
            trial, feat_dim, out_features_2M, 'output_head', min_units=128, max_units=512
        )
        
        # 학습 파라미터
        self.lr = trial.suggest_float('lr', 1e-7, 1e-4, log=True)
        self.weight_decay = trial.suggest_float('weight_decay', 1e-6, 1e-3, log=True)
        
        self.train_mse = MeanSquaredError()
        self.val_mse = MeanSquaredError()
        self.test_mse = MeanSquaredError()
        
    def forward(self, x_static, x_0M, x_0M_goutallier):
        static_feat = self.static_encoder(x_static)
        seq_0M_feat = self.seq_0M_encoder(x_0M)
        goutallier_0M_feat = self.goutallier_0M_encoder(x_0M_goutallier)
        
        combined = torch.cat([static_feat, seq_0M_feat, goutallier_0M_feat], dim=1)
        output = self.output_head(combined)
        return output
    
    def training_step(self, batch, batch_idx):
        x_static, x_0M, x_0M_goutallier, y_2M = batch
        pred_2M = self.forward(x_static, x_0M, x_0M_goutallier)
        loss = F.mse_loss(pred_2M, y_2M)
        
        self.log("train/loss", loss, on_epoch=True, prog_bar=True)
        self.train_mse.update(pred_2M, y_2M)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x_static, x_0M, x_0M_goutallier, y_2M = batch
        pred_2M = self.forward(x_static, x_0M, x_0M_goutallier)
        loss = F.mse_loss(pred_2M, y_2M)
        
        self.log("val/loss", loss, on_epoch=True, prog_bar=True)
        self.val_mse.update(pred_2M, y_2M)
        return loss
    
    def on_train_epoch_end(self):
        self.log("train/mse", self.train_mse.compute())
        self.train_mse.reset()
    
    def on_validation_epoch_end(self):
        self.log("val/mse", self.val_mse.compute())
        self.val_mse.reset()
    
    def test_step(self, batch, batch_idx):
        x_static, x_0M, x_0M_goutallier, y_2M = batch
        pred_2M = self.forward(x_static, x_0M, x_0M_goutallier)
        loss = F.mse_loss(pred_2M, y_2M)
        
        self.log("test/loss", loss, on_epoch=True, prog_bar=True)
        self.test_mse.update(pred_2M, y_2M)
        return loss
    
    def on_test_epoch_end(self):
        self.log("test/mse", self.test_mse.compute())
        self.test_mse.reset()
    
    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(self.parameters(), lr=self.lr, weight_decay=self.weight_decay)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='min', factor=0.5, patience=5
        )
        return {
            "optimizer": optimizer,
            "lr_scheduler": {
                "scheduler": scheduler,
                "monitor": "val/loss"
            }
        }

# Model 2 최적화 버전
class OptimizedSequentialMLP2(L.LightningModule):
    def __init__(self, trial, static_features, seq_0M_features, seq_2M_features, goutallier_0M_features, out_features_3M):
        super().__init__()
        
        self.static_encoder, static_out = create_encoder(
            trial, static_features, 'static_encoder', min_units=32, max_units=128
        )
        
        self.seq_0M_encoder, seq_0M_out = create_encoder(
            trial, seq_0M_features, 'seq_0M_encoder', min_units=64, max_units=256
        )
        
        self.seq_2M_encoder, seq_2M_out = create_encoder(
            trial, seq_2M_features, 'seq_2M_encoder', min_units=32, max_units=128
        )
        
        self.goutallier_0M_encoder, goutalier_out = create_encoder(
            trial, goutallier_0M_features, 'goutallier_0M_encoder', min_units=32, max_units=128
        )
        
        feat_dim = static_out + seq_0M_out + seq_2M_out + goutalier_out
        self.output_head = create_output_head(
            trial, feat_dim, out_features_3M, 'output_head', min_units=64, max_units=256
        )
        
        self.lr = trial.suggest_float('lr', 1e-7, 1e-4, log=True)
        self.weight_decay = trial.suggest_float('weight_decay', 1e-6, 1e-3, log=True)
        
        self.train_mse = MeanSquaredError()
        self.val_mse = MeanSquaredError()
        self.test_mse = MeanSquaredError()
        
    def forward(self, x_static, x_0M, x_2M, x_0M_goutallier):
        static_feat = self.static_encoder(x_static)
        seq_0M_feat = self.seq_0M_encoder(x_0M)
        seq_2M_feat = self.seq_2M_encoder(x_2M)
        goutallier_0M_feat = self.goutallier_0M_encoder(x_0M_goutallier)
        
        combined = torch.cat([static_feat, seq_0M_feat, seq_2M_feat, goutallier_0M_feat], dim=1)
        output = self.output_head(combined)
        return output
    
    def training_step(self, batch, batch_idx):
        x_static, x_0M, x_2M, x_0M_goutallier, y_3M = batch
        pred_3M = self.forward(x_static, x_0M, x_2M, x_0M_goutallier)
        loss = F.mse_loss(pred_3M, y_3M)
        
        self.log("train/loss", loss, on_epoch=True, prog_bar=True)
        self.train_mse.update(pred_3M, y_3M)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x_static, x_0M, x_2M, x_0M_goutallier, y_3M = batch
        pred_3M = self.forward(x_static, x_0M, x_2M, x_0M_goutallier)
        loss = F.mse_loss(pred_3M, y_3M)
        
        self.log("val/loss", loss, on_epoch=True, prog_bar=True)
        self.val_mse.update(pred_3M, y_3M)
        return loss
    
    def on_train_epoch_end(self):
        self.log("train/mse", self.train_mse.compute())
        self.train_mse.reset()
    
    def on_validation_epoch_end(self):
        self.log("val/mse", self.val_mse.compute())
        self.val_mse.reset()
    
    def test_step(self, batch, batch_idx):
        x_static, x_0M, x_2M, x_0M_goutallier, y_3M = batch
        pred_3M = self.forward(x_static, x_0M, x_2M, x_0M_goutallier)
        loss = F.mse_loss(pred_3M, y_3M)
        
        self.log("test/loss", loss, on_epoch=True, prog_bar=True)
        self.test_mse.update(pred_3M, y_3M)
        return loss
    
    def on_test_epoch_end(self):
        self.log("test/mse", self.test_mse.compute())
        self.test_mse.reset()
    
    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(self.parameters(), lr=self.lr, weight_decay=self.weight_decay)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='min', factor=0.5, patience=5
        )
        return {
            "optimizer": optimizer,
            "lr_scheduler": {
                "scheduler": scheduler,
                "monitor": "val/loss"
            }
        }

# Model 3 최적화 버전
class OptimizedSequentialMLP3(L.LightningModule):
    def __init__(self, trial, static_features, seq_0M_features, seq_2M_features, seq_3M_features, goutallier_0M_features, out_features_4M):
        super().__init__()
        
        self.static_encoder, static_out = create_encoder(
            trial, static_features, 'static_encoder', min_units=32, max_units=128
        )
        
        self.seq_0M_encoder, seq_0M_out = create_encoder(
            trial, seq_0M_features, 'seq_0M_encoder', min_units=64, max_units=256
        )
        
        self.seq_2M_encoder, seq_2M_out = create_encoder(
            trial, seq_2M_features, 'seq_2M_encoder', min_units=32, max_units=128
        )
        
        self.seq_3M_encoder, seq_3M_out = create_encoder(
            trial, seq_3M_features, 'seq_3M_encoder', min_units=64, max_units=256
        )
        
        self.goutallier_0M_encoder, goutalier_out = create_encoder(
            trial, goutallier_0M_features, 'goutallier_0M_encoder', min_units=32, max_units=128
        )
        
        feat_dim = static_out + seq_0M_out + seq_2M_out + seq_3M_out + goutalier_out
        self.output_head = create_output_head(
            trial, feat_dim, out_features_4M, 'output_head', min_units=64, max_units=256
        )
        
        self.lr = trial.suggest_float('lr', 1e-7, 1e-4, log=True)
        self.weight_decay = trial.suggest_float('weight_decay', 1e-6, 1e-3, log=True)
        
        self.train_mse = MeanSquaredError()
        self.val_mse = MeanSquaredError()
        self.test_mse = MeanSquaredError()
        
    def forward(self, x_static, x_0M, x_2M, x_3M, x_0M_goutallier):
        static_feat = self.static_encoder(x_static)
        seq_0M_feat = self.seq_0M_encoder(x_0M)
        seq_2M_feat = self.seq_2M_encoder(x_2M)
        seq_3M_feat = self.seq_3M_encoder(x_3M)
        goutallier_0M_feat = self.goutallier_0M_encoder(x_0M_goutallier)
        
        combined = torch.cat([static_feat, seq_0M_feat, seq_2M_feat, seq_3M_feat, goutallier_0M_feat], dim=1)
        output = self.output_head(combined)
        return output
    
    def training_step(self, batch, batch_idx):
        x_static, x_0M, x_2M, x_3M, x_0M_goutallier, y_4M = batch
        pred_4M = self.forward(x_static, x_0M, x_2M, x_3M, x_0M_goutallier)
        loss = F.mse_loss(pred_4M, y_4M)
        
        self.log("train/loss", loss, on_epoch=True, prog_bar=True)
        self.train_mse.update(pred_4M, y_4M)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x_static, x_0M, x_2M, x_3M, x_0M_goutallier, y_4M = batch
        pred_4M = self.forward(x_static, x_0M, x_2M, x_3M, x_0M_goutallier)
        loss = F.mse_loss(pred_4M, y_4M)
        
        self.log("val/loss", loss, on_epoch=True, prog_bar=True)
        self.val_mse.update(pred_4M, y_4M)
        return loss
    
    def on_train_epoch_end(self):
        self.log("train/mse", self.train_mse.compute())
        self.train_mse.reset()
    
    def on_validation_epoch_end(self):
        self.log("val/mse", self.val_mse.compute())
        self.val_mse.reset()
    
    def test_step(self, batch, batch_idx):
        x_static, x_0M, x_2M, x_3M, x_0M_goutallier, y_4M = batch
        pred_4M = self.forward(x_static, x_0M, x_2M, x_3M, x_0M_goutallier)
        loss = F.mse_loss(pred_4M, y_4M)
        
        self.log("test/loss", loss, on_epoch=True, prog_bar=True)
        self.test_mse.update(pred_4M, y_4M)
        return loss
    
    def on_test_epoch_end(self):
        self.log("test/mse", self.test_mse.compute())
        self.test_mse.reset()
    
    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(self.parameters(), lr=self.lr, weight_decay=self.weight_decay)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='min', factor=0.5, patience=5
        )
        return {
            "optimizer": optimizer,
            "lr_scheduler": {
                "scheduler": scheduler,
                "monitor": "val/loss"
            }
        }

# Model 4 최적화 버전 (분류 + 회귀)
class OptimizedSequentialMLP4(L.LightningModule):
    def __init__(self, trial, static_features, seq_0M_features, seq_2M_features, seq_3M_features, seq_4M_features, goutallier_0M_features, out_features_total):
        super().__init__()
        self.register_buffer('pos_weight', torch.tensor([1.0]))
        
        # 회귀 손실 가중치 최적화
        self.reg_loss_weight = trial.suggest_float('reg_loss_weight', 0.1, 0.5, step=0.05)
        
        self.static_encoder, static_out = create_encoder(
            trial, static_features, 'static_encoder', min_units=32, max_units=128
        )
        
        self.seq_0M_encoder, seq_0M_out = create_encoder(
            trial, seq_0M_features, 'seq_0M_encoder', min_units=64, max_units=256
        )
        
        self.seq_2M_encoder, seq_2M_out = create_encoder(
            trial, seq_2M_features, 'seq_2M_encoder', min_units=32, max_units=128
        )
        
        self.seq_3M_encoder, seq_3M_out = create_encoder(
            trial, seq_3M_features, 'seq_3M_encoder', min_units=64, max_units=256
        )
        
        self.seq_4M_encoder, seq_4M_out = create_encoder(
            trial, seq_4M_features, 'seq_4M_encoder', min_units=64, max_units=256
        )
        
        self.goutallier_0M_encoder, goutalier_out = create_encoder(
            trial, goutallier_0M_features, 'goutallier_0M_encoder', min_units=32, max_units=128
        )
        
        feat_dim = static_out + seq_0M_out + seq_2M_out + seq_3M_out + seq_4M_out + goutalier_out
        
        # 분류 헤드
        self.clshead = create_output_head(
            trial, feat_dim, 1, 'clshead', min_units=64, max_units=256
        )
        
        # 회귀 헤드
        self.reghead = create_output_head(
            trial, feat_dim, out_features_total - 1, 'reghead', min_units=128, max_units=512
        )
        
        self.lr = trial.suggest_float('lr', 1e-7, 1e-4, log=True)
        self.weight_decay = trial.suggest_float('weight_decay', 1e-6, 1e-3, log=True)
        
        self.train_roc = BinaryAUROC()
        self.val_roc = BinaryAUROC()
        self.test_roc = BinaryAUROC()
        self.val_ap = BinaryAveragePrecision()
        self.test_ap = BinaryAveragePrecision()
        self.train_mse = MeanSquaredError()
        self.val_mse = MeanSquaredError()
        self.test_mse = MeanSquaredError()
        
    def forward(self, x_static, x_0M, x_2M, x_3M, x_4M, x_0M_goutallier):
        static_feat = self.static_encoder(x_static)
        seq_0M_feat = self.seq_0M_encoder(x_0M)
        seq_2M_feat = self.seq_2M_encoder(x_2M)
        seq_3M_feat = self.seq_3M_encoder(x_3M)
        seq_4M_feat = self.seq_4M_encoder(x_4M)
        goutallier_0M_feat = self.goutallier_0M_encoder(x_0M_goutallier)
        
        combined = torch.cat([static_feat, seq_0M_feat, seq_2M_feat, seq_3M_feat, seq_4M_feat, goutallier_0M_feat], dim=1)
        
        logits = self.clshead(combined)
        regs = self.reghead(combined)
        
        output = torch.cat([logits, regs], dim=1)
        return logits, regs, output
    
    def training_step(self, batch, batch_idx):
        x_static, x_0M, x_2M, x_3M, x_4M, x_0M_goutallier, y_combined = batch
        y_label = y_combined[:, seq_features_6M:seq_features_6M+1]
        y_reg = torch.cat([y_combined[:, :seq_features_6M], y_combined[:, seq_features_6M+1:]], dim=1)
        
        logits, regs, _ = self.forward(x_static, x_0M, x_2M, x_3M, x_4M, x_0M_goutallier)
        
        clf_loss = F.binary_cross_entropy_with_logits(logits, y_label, pos_weight=self.pos_weight)
        reg_loss = F.smooth_l1_loss(regs, y_reg)
        loss = clf_loss + self.reg_loss_weight * reg_loss
        
        self.log("train/loss", loss, on_epoch=True, prog_bar=True)
        self.log("train/clf_loss", clf_loss)
        self.log("train/reg_loss", reg_loss)
        
        probs = logits.sigmoid().flatten()
        targets = torch.clamp(y_label.flatten().to(torch.int), 0, 1)
        self.train_roc.update(probs, targets)
        self.train_mse.update(regs, y_reg)
        
        return loss
    
    def validation_step(self, batch, batch_idx):
        x_static, x_0M, x_2M, x_3M, x_4M, x_0M_goutallier, y_combined = batch
        y_label = y_combined[:, seq_features_6M:seq_features_6M+1]
        y_reg = torch.cat([y_combined[:, :seq_features_6M], y_combined[:, seq_features_6M+1:]], dim=1)
        
        logits, regs, _ = self.forward(x_static, x_0M, x_2M, x_3M, x_4M, x_0M_goutallier)
        
        clf_loss = F.binary_cross_entropy_with_logits(logits, y_label, pos_weight=self.pos_weight)
        reg_loss = F.smooth_l1_loss(regs, y_reg)
        loss = clf_loss + self.reg_loss_weight * reg_loss
        
        self.log("val/loss", loss, on_epoch=True, prog_bar=True)
        self.log("val/clf_loss", clf_loss)
        self.log("val/reg_loss", reg_loss)
        
        probs = logits.sigmoid().flatten()
        targets = torch.clamp(y_label.flatten().to(torch.int), 0, 1)
        self.val_roc.update(probs, targets)
        self.val_ap.update(probs, targets)
        self.val_mse.update(regs, y_reg)
        
        return loss
    
    def on_train_epoch_end(self):
        self.log("train/roc", self.train_roc.compute())
        self.log("train/mse", self.train_mse.compute())
        self.train_roc.reset()
        self.train_mse.reset()
    
    def on_validation_epoch_end(self):
        self.log("val/roc", self.val_roc.compute())
        self.log("val/ap", self.val_ap.compute())
        self.log("val/mse", self.val_mse.compute())
        self.val_roc.reset()
        self.val_ap.reset()
        self.val_mse.reset()
    
    def test_step(self, batch, batch_idx):
        x_static, x_0M, x_2M, x_3M, x_4M, x_0M_goutallier, y_combined = batch
        y_label = y_combined[:, seq_features_6M:seq_features_6M+1]
        y_reg = torch.cat([y_combined[:, :seq_features_6M], y_combined[:, seq_features_6M+1:]], dim=1)
        
        logits, regs, _ = self.forward(x_static, x_0M, x_2M, x_3M, x_4M, x_0M_goutallier)
        
        clf_loss = F.binary_cross_entropy_with_logits(logits, y_label, pos_weight=self.pos_weight)
        reg_loss = F.smooth_l1_loss(regs, y_reg)
        loss = clf_loss + self.reg_loss_weight * reg_loss
        
        self.log("test/loss", loss, on_epoch=True, prog_bar=True)
        self.log("test/clf_loss", clf_loss)
        self.log("test/reg_loss", reg_loss)
        
        probs = logits.sigmoid().flatten()
        targets = torch.clamp(y_label.flatten().to(torch.int), 0, 1)
        self.test_roc.update(probs, targets)
        self.test_ap.update(probs, targets)
        self.test_mse.update(regs, y_reg)
        
        return loss
    
    def on_test_epoch_end(self):
        self.log("test/roc", self.test_roc.compute())
        self.log("test/ap", self.test_ap.compute())
        self.log("test/mse", self.test_mse.compute())
        self.test_roc.reset()
        self.test_ap.reset()
        self.test_mse.reset()
    
    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(self.parameters(), lr=self.lr, weight_decay=self.weight_decay)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='max', factor=0.5, patience=5
        )
        return {
            "optimizer": optimizer,
            "lr_scheduler": {
                "scheduler": scheduler,
                "monitor": "val/roc"
            }
        }

print("최적화된 모델 클래스 정의 완료")


최적화된 모델 클래스 정의 완료


In [22]:
# 연결된 시계열 모델 클래스 (4개 모델을 순차적으로 실행)
class SequentialModel(nn.Module):
    """0M 입력만으로 6M 예측하는 연결된 시계열 모델"""
    def __init__(self, model1, model2, model3, model4):
        super().__init__()
        self.model1 = model1
        self.model2 = model2
        self.model3 = model3
        self.model4 = model4
        
        # 평가 모드로 설정
        self.model1.eval()
        self.model2.eval()
        self.model3.eval()
        self.model4.eval()
    
    @torch.no_grad()
    def forward(self, x_static, x_0M, x_0M_goutallier):
        """
        입력: static, 0M, 0M_goutallier
        출력: 6M features, y (logits), 6M_goutallier
        """
        # Model 1: static + 0M + 0M_goutallier → 2M
        pred_2M = self.model1(x_static, x_0M, x_0M_goutallier)
        
        # Model 2: static + 0M + 2M + 0M_goutallier → 3M
        pred_3M = self.model2(x_static, x_0M, pred_2M, x_0M_goutallier)
        
        # Model 3: static + 0M + 2M + 3M + 0M_goutallier → 4M
        pred_4M = self.model3(x_static, x_0M, pred_2M, pred_3M, x_0M_goutallier)
        
        # Model 4: static + 0M + 2M + 3M + 4M + 0M_goutallier → 6M + y + 6M_goutallier
        logits, regs, output = self.model4(x_static, x_0M, pred_2M, pred_3M, pred_4M, x_0M_goutallier)
        
        # 출력 분리: [6M (12) + y (1) + 6M_goutallier (8)]
        pred_6M = regs[:, :seq_features_6M]
        pred_y_logits = logits
        pred_6M_goutallier = regs[:, seq_features_6M:]
        
        return {
            'pred_2M': pred_2M,
            'pred_3M': pred_3M,
            'pred_4M': pred_4M,
            'pred_6M': pred_6M,
            'pred_y_logits': pred_y_logits,
            'pred_6M_goutallier': pred_6M_goutallier,
            'output': output
        }

In [None]:
# ============================================================================
# Optuna 최적화 함수들
# ============================================================================
CHECKPOINT_DIR = "../checkpoint"

def optimize_model1(trial):
    """Model 1 최적화"""
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])
    
    model = OptimizedSequentialMLP1(
        trial=trial,
        static_features=static_features,
        seq_0M_features=seq_features_0M,
        goutallier_0M_features=goutallier_features_0M,
        out_features_2M=seq_features_2M
    )
    
    trainloader = DataLoader(trainset_model1, batch_size=batch_size, shuffle=True, pin_memory=True)
    valloader = DataLoader(valset_model1, batch_size=batch_size)
    
    trainer = L.Trainer(
        max_epochs=50,
        gradient_clip_val=1.0,
        callbacks=[
            PyTorchLightningPruningCallback(trial, monitor="val/loss"),
            ModelCheckpoint(monitor='val/loss', mode='min', save_top_k=1, save_last=False, filename='optuna-model1-best', dirpath=CHECKPOINT_DIR),
            CleanupCheckpointCallback(checkpoint_dir=CHECKPOINT_DIR, filename_prefix='optuna-model1-best'),
            EarlyStopping(monitor='val/loss', mode='min', patience=10)
        ],
        enable_progress_bar=False,
        logger=False
    )
    
    trainer.fit(model, trainloader, valloader)
    return trainer.callback_metrics["val/loss"].item()

def optimize_model2(trial):
    """Model 2 최적화"""
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])
    
    model = OptimizedSequentialMLP2(
        trial=trial,
        static_features=static_features,
        seq_0M_features=seq_features_0M,
        seq_2M_features=seq_features_2M,
        goutallier_0M_features=goutallier_features_0M,
        out_features_3M=seq_features_3M
    )
    
    trainloader = DataLoader(trainset_model2, batch_size=batch_size, shuffle=True, pin_memory=True)
    valloader = DataLoader(valset_model2, batch_size=batch_size)
    
    trainer = L.Trainer(
        max_epochs=50,
        gradient_clip_val=1.0,
        callbacks=[
            PyTorchLightningPruningCallback(trial, monitor="val/loss"),
            ModelCheckpoint(monitor='val/loss', mode='min', save_top_k=1, save_last=False, filename='optuna-model2-best', dirpath=CHECKPOINT_DIR),
            CleanupCheckpointCallback(checkpoint_dir=CHECKPOINT_DIR, filename_prefix='optuna-model2-best'),
            EarlyStopping(monitor='val/loss', mode='min', patience=10)
        ],
        enable_progress_bar=False,
        logger=False
    )
    
    trainer.fit(model, trainloader, valloader)
    return trainer.callback_metrics["val/loss"].item()

def optimize_model3(trial):
    """Model 3 최적화"""
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])
    
    model = OptimizedSequentialMLP3(
        trial=trial,
        static_features=static_features,
        seq_0M_features=seq_features_0M,
        seq_2M_features=seq_features_2M,
        seq_3M_features=seq_features_3M,
        goutallier_0M_features=goutallier_features_0M,
        out_features_4M=seq_features_4M
    )
    
    trainloader = DataLoader(trainset_model3, batch_size=batch_size, shuffle=True, pin_memory=True)
    valloader = DataLoader(valset_model3, batch_size=batch_size)
    
    trainer = L.Trainer(
        max_epochs=50,
        gradient_clip_val=1.0,
        callbacks=[
            PyTorchLightningPruningCallback(trial, monitor="val/loss"),
            ModelCheckpoint(monitor='val/loss', mode='min', save_top_k=1, save_last=False, filename='optuna-model3-best', dirpath=CHECKPOINT_DIR),
            CleanupCheckpointCallback(checkpoint_dir=CHECKPOINT_DIR, filename_prefix='optuna-model3-best'),
            EarlyStopping(monitor='val/loss', mode='min', patience=10)
        ],
        enable_progress_bar=False,
        logger=False
    )
    
    trainer.fit(model, trainloader, valloader)
    return trainer.callback_metrics["val/loss"].item()

def optimize_model4(trial):
    """Model 4 최적화 (ROC AUC 최대화)"""
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])
    
    model = OptimizedSequentialMLP4(
        trial=trial,
        static_features=static_features,
        seq_0M_features=seq_features_0M,
        seq_2M_features=seq_features_2M,
        seq_3M_features=seq_features_3M,
        seq_4M_features=seq_features_4M,
        goutallier_0M_features=goutallier_features_0M,
        out_features_total=seq_features_6M + 1 + goutallier_features_6M
    )
    
    trainloader = DataLoader(trainset_model4, batch_size=batch_size, shuffle=True, pin_memory=True)
    valloader = DataLoader(valset_model4, batch_size=batch_size)
    
    trainer = L.Trainer(
        max_epochs=50,
        gradient_clip_val=1.0,
        callbacks=[
            PyTorchLightningPruningCallback(trial, monitor="val/roc"),
            ModelCheckpoint(monitor='val/roc', mode='max', save_top_k=1, save_last=False, filename='optuna-model4-best', dirpath=CHECKPOINT_DIR),
            CleanupCheckpointCallback(checkpoint_dir=CHECKPOINT_DIR, filename_prefix='optuna-model4-best'),
            EarlyStopping(monitor='val/roc', mode='max', patience=10)
        ],
        enable_progress_bar=False,
        logger=False
    )
    
    trainer.fit(model, trainloader, valloader)
    # ROC AUC를 최대화 (음수로 반환하여 최소화 문제로 변환)
    return -trainer.callback_metrics["val/roc"].item()

print("Optuna 최적화 함수 정의 완료")


Optuna 최적화 함수 정의 완료


### Optuna 실행 및 파라미터 최적화

In [None]:
n_trials = 20  # 각 모델당 시도 횟수 (필요에 따라 조정 가능)

print("=" * 80)
print("NAS 기반 Optuna 하이퍼파라미터 최적화 시작")
print("=" * 80)

# Model 1 최적화
print("\n[Model 1] 최적화 시작...")
study1 = optuna.create_study(
    direction='minimize',
    study_name='sequential_mlp1_optimization',
    pruner=optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=10)
)
study1.optimize(optimize_model1, n_trials=n_trials, show_progress_bar=True)

print(f"\n[Model 1] 최적화 완료!")
print(f"최고 성능: {study1.best_value:.6f}")
print(f"최적 파라미터:")
for key, value in study1.best_params.items():
    print(f"  {key}: {value}")

# Model 2 최적화
print("\n[Model 2] 최적화 시작...")
study2 = optuna.create_study(
    direction='minimize',
    study_name='sequential_mlp2_optimization',
    pruner=optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=10)
)
study2.optimize(optimize_model2, n_trials=n_trials, show_progress_bar=True)

print(f"\n[Model 2] 최적화 완료!")
print(f"최고 성능: {study2.best_value:.6f}")
print(f"최적 파라미터:")
for key, value in study2.best_params.items():
    print(f"  {key}: {value}")

# Model 3 최적화
print("\n[Model 3] 최적화 시작...")
study3 = optuna.create_study(
    direction='minimize',
    study_name='sequential_mlp3_optimization',
    pruner=optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=10)
)
study3.optimize(optimize_model3, n_trials=n_trials, show_progress_bar=True)

print(f"\n[Model 3] 최적화 완료!")
print(f"최고 성능: {study3.best_value:.6f}")
print(f"최적 파라미터:")
for key, value in study3.best_params.items():
    print(f"  {key}: {value}")

# Model 4 최적화
print("\n[Model 4] 최적화 시작...")
study4 = optuna.create_study(
    direction='minimize',  # 음수 ROC AUC를 최소화 = ROC AUC 최대화
    study_name='sequential_mlp4_optimization',
    pruner=optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=10)
)
study4.optimize(optimize_model4, n_trials=n_trials, show_progress_bar=True)

print(f"\n[Model 4] 최적화 완료!")
print(f"최고 성능 (음수 ROC AUC): {study4.best_value:.6f}")
print(f"실제 최고 ROC AUC: {-study4.best_value:.6f}")
print(f"최적 파라미터:")
for key, value in study4.best_params.items():
    print(f"  {key}: {value}")

print("\n" + "=" * 80)
print("모든 모델 최적화 완료!")
print("=" * 80)

[I 2025-11-09 12:04:04,784] A new study created in memory with name: sequential_mlp1_optimization


NAS 기반 Optuna 하이퍼파라미터 최적화 시작

[Model 1] 최적화 시작...


  0%|          | 0/100 [00:00<?, ?it/s]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
  return _C._get_float32_matmul_precision()
You are using a CUDA device ('NVIDIA A100-SXM4-80GB') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
/data/miniconda3/envs/arcr/lib/python3.10/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:751: Checkpoint directory /data/mulsoap0504/ARCR/checkpoint exists and is not empty.
LOCAL_

[I 2025-11-09 12:04:34,781] Trial 0 finished with value: 1.0964479446411133 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 96, 'static_encoder_dropout_1': 0.15000000000000002, 'static_encoder_batch_norm_1': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 256, 'seq_0M_encoder_dropout_0': 0.5, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 64, 'seq_0M_encoder_dropout_1': 0.15000000000000002, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 3, 'goutallier_0M_encoder_units_0': 64, 'goutallier_0M_encoder_dropout_0': 0.25, 'goutallier_0M_encoder_batch_norm_0': False, 'goutallier_0M_encoder_units_1': 32, 'goutallier_0M_encoder_dropout_1': 0.2, 'goutallier_0M_encoder_batch_norm_1': False, 'goutallier_0M_encoder_units_2': 96, 'goutallier_0M_encoder_dropout_2': 0.1, 'goutallier_0M_encoder_batch_norm_2'

Best trial: 1. Best value: 1.06607:   2%|▏         | 2/100 [00:58<47:09, 28.87s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 47.1 K | train
1 | seq_0M_encoder        | Sequential       | 91.0 K | train
2 | goutallier_0M_encoder | Sequential       | 1.4 K  | train
3 | output_head           | Sequential       | 427 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | trai

[I 2025-11-09 12:05:02,857] Trial 1 finished with value: 1.0660723447799683 and parameters: {'batch_size': 64, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 160, 'static_encoder_dropout_0': 0.5, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 96, 'static_encoder_dropout_1': 0.1, 'static_encoder_batch_norm_1': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 224, 'seq_0M_encoder_dropout_0': 0.5, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 160, 'seq_0M_encoder_dropout_1': 0.2, 'seq_0M_encoder_batch_norm_1': False, 'goutallier_0M_encoder_n_layers': 1, 'goutallier_0M_encoder_units_0': 64, 'goutallier_0M_encoder_dropout_0': 0.25, 'goutallier_0M_encoder_batch_norm_0': False, 'output_head_n_layers': 2, 'output_head_units_0': 512, 'output_head_dropout_0': 0.15000000000000002, 'output_head_batch_norm_0': True, 'lr': 4.374304819333366e-06, 'weight_decay': 2.217722707912821e-05}. Best is trial 1 with value: 1.0660723447799683.


Best trial: 1. Best value: 1.06607:   3%|▎         | 3/100 [01:10<34:41, 21.46s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 88.4 K | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | goutallier_0M_encoder | Sequential       | 5.6 K  | train
3 | output_head           | Sequential       | 393 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | trai

[I 2025-11-09 12:05:15,493] Trial 2 finished with value: 1.125120997428894 and parameters: {'batch_size': 64, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 224, 'static_encoder_dropout_0': 0.1, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 96, 'static_encoder_dropout_1': 0.4, 'static_encoder_batch_norm_1': True, 'static_encoder_units_2': 192, 'static_encoder_dropout_2': 0.45, 'static_encoder_batch_norm_2': False, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 256, 'seq_0M_encoder_dropout_0': 0.15000000000000002, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.45, 'seq_0M_encoder_batch_norm_1': False, 'seq_0M_encoder_units_2': 192, 'seq_0M_encoder_dropout_2': 0.4, 'seq_0M_encoder_batch_norm_2': False, 'goutallier_0M_encoder_n_layers': 1, 'goutallier_0M_encoder_units_0': 128, 'goutallier_0M_encoder_dropout_0': 0.25, 'goutallier_0M_encoder_batch_norm_0': False, 'output_head_n_layers': 4, 'output_head_uni

Best trial: 3. Best value: 1.05814:   4%|▍         | 4/100 [01:34<35:47, 22.37s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 69.2 K | train
1 | seq_0M_encoder        | Sequential       | 2.4 K  | train
2 | goutallier_0M_encoder | Sequential       | 23.2 K | train
3 | output_head           | Sequential       | 161 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | trai

[I 2025-11-09 12:05:39,270] Trial 3 finished with value: 1.0581392049789429 and parameters: {'batch_size': 64, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 192, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 256, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.35000000000000003, 'static_encoder_batch_norm_2': False, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.30000000000000004, 'seq_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 128, 'goutallier_0M_encoder_dropout_0': 0.15000000000000002, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 32, 'goutallier_0M_encoder_dropout_1': 0.15000000000000002, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 4, 'output_head_units_0': 448, 'output_head_dropout_0'

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 3. Best value: 1.05814:   5%|▌         | 5/100 [03:00<1:11:52, 45.39s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 2.7 K  | train
1 | seq_0M_encoder        | Sequential       | 46.6 K | train
2 | goutallier_0M_encoder | Sequential       | 22.4 K | train
3 | output_head           | Sequential       | 104 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_ms

[I 2025-11-09 12:07:05,485] Trial 4 finished with value: 1.0611248016357422 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 224, 'static_encoder_dropout_1': 0.35000000000000003, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 160, 'static_encoder_dropout_2': 0.4, 'static_encoder_batch_norm_2': False, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.2, 'seq_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_n_layers': 3, 'goutallier_0M_encoder_units_0': 96, 'goutallier_0M_encoder_dropout_0': 0.0, 'goutallier_0M_encoder_batch_norm_0': False, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.15000000000000002, 'goutallier_0M_encoder_batch_norm_1': False, 'goutallier_0M_encoder_units_2': 128, 'goutallier_0M_encoder_dropout_2': 0.0, 'goutallier_0M_encoder_batch_no

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 5. Best value: 1.03997:   6%|▌         | 6/100 [03:47<1:11:58, 45.95s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 20.4 K | train
1 | seq_0M_encoder        | Sequential       | 89.4 K | train
2 | goutallier_0M_encoder | Sequential       | 13.2 K | train
3 | output_head           | Sequential       | 431 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_ms

[I 2025-11-09 12:07:52,504] Trial 5 finished with value: 1.0399723052978516 and parameters: {'batch_size': 64, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.35000000000000003, 'static_encoder_batch_norm_0': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 192, 'seq_0M_encoder_dropout_0': 0.2, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.45, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 3, 'goutallier_0M_encoder_units_0': 128, 'goutallier_0M_encoder_dropout_0': 0.45, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 128, 'goutallier_0M_encoder_dropout_1': 0.30000000000000004, 'goutallier_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_units_2': 32, 'goutallier_0M_encoder_dropout_2': 0.45, 'goutallier_0M_encoder_batch_norm_2': True, 'output_head_n_layers': 2, 'output_head_units_0': 288, 'output_head_dropout_0': 0.45, 'out

Best trial: 5. Best value: 1.03997:   7%|▋         | 7/100 [04:01<54:54, 35.43s/it]  Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 104 K  | train
1 | seq_0M_encoder        | Sequential       | 105 K  | train
2 | goutallier_0M_encoder | Sequential       | 2.8 K  | train
3 | output_head           | Sequential       | 3.4 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:08:06,272] Trial 6 finished with value: 1.078871250152588 and parameters: {'batch_size': 64, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.45, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 128, 'static_encoder_dropout_1': 0.4, 'static_encoder_batch_norm_1': True, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.35000000000000003, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.2, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 256, 'seq_0M_encoder_dropout_2': 0.0, 'seq_0M_encoder_batch_norm_2': False, 'goutallier_0M_encoder_n_layers': 3, 'goutallier_0M_encoder_units_0': 32, 'goutallier_0M_encoder_dropout_0': 0.0, 'goutallier_0M_encoder_batch_norm_0': False, 'goutallier_0M_encoder_units_1': 128, 'goutallier_0M_encoder_dropout_1': 0.05, 'goutallier_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_un

Best trial: 5. Best value: 1.03997:   8%|▊         | 8/100 [04:09<40:50, 26.63s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 67.1 K | train
1 | seq_0M_encoder        | Sequential       | 61.8 K | train
2 | goutallier_0M_encoder | Sequential       | 704    | train
3 | output_head           | Sequential       | 225 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | trai

[I 2025-11-09 12:08:14,076] Trial 7 finished with value: 1.0667885541915894 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 160, 'static_encoder_dropout_0': 0.2, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 256, 'static_encoder_dropout_1': 0.1, 'static_encoder_batch_norm_1': True, 'static_encoder_units_2': 224, 'static_encoder_dropout_2': 0.4, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 224, 'seq_0M_encoder_dropout_0': 0.1, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.4, 'seq_0M_encoder_batch_norm_1': False, 'seq_0M_encoder_units_2': 224, 'seq_0M_encoder_dropout_2': 0.0, 'seq_0M_encoder_batch_norm_2': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 64, 'goutallier_0M_encoder_dropout_0': 0.5, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 32, 'goutallier_0M_encoder_d

Best trial: 5. Best value: 1.03997:   9%|▉         | 9/100 [04:17<31:53, 21.02s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 84.3 K | train
1 | seq_0M_encoder        | Sequential       | 20.4 K | train
2 | goutallier_0M_encoder | Sequential       | 34.9 K | train
3 | output_head           | Sequential       | 190 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | trai

[I 2025-11-09 12:08:22,770] Trial 8 pruned. Trial was pruned at epoch 14.


Best trial: 9. Best value: 1.03344:  10%|█         | 10/100 [05:18<49:47, 33.19s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 7.2 K  | train
1 | seq_0M_encoder        | Sequential       | 1.4 K  | train
2 | goutallier_0M_encoder | Sequential       | 10.6 K | train
3 | output_head           | Sequential       | 3.1 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tra

[I 2025-11-09 12:09:23,214] Trial 9 finished with value: 1.0334439277648926 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 224, 'static_encoder_dropout_0': 0.2, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 160, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 256, 'static_encoder_dropout_2': 0.05, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 256, 'seq_0M_encoder_dropout_0': 0.35000000000000003, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 64, 'seq_0M_encoder_dropout_1': 0.30000000000000004, 'seq_0M_encoder_batch_norm_1': False, 'goutallier_0M_encoder_n_layers': 3, 'goutallier_0M_encoder_units_0': 128, 'goutallier_0M_encoder_dropout_0': 0.45, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 128, 'goutallier_0M_encoder_dropout_1': 0.1, 'goutallier_0M_encoder_batch_norm_1': False, 'goutal

Best trial: 9. Best value: 1.03344:  11%|█         | 11/100 [05:33<40:57, 27.62s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 1.8 K  | train
1 | seq_0M_encoder        | Sequential       | 15.4 K | train
2 | goutallier_0M_encoder | Sequential       | 22.4 K | train
3 | output_head           | Sequential       | 54.4 K | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tra

[I 2025-11-09 12:09:38,182] Trial 10 pruned. Trial was pruned at epoch 10.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 9. Best value: 1.03344:  12%|█▏        | 12/100 [07:00<1:06:52, 45.60s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 1.8 K  | train
1 | seq_0M_encoder        | Sequential       | 12.8 K | train
2 | goutallier_0M_encoder | Sequential       | 22.4 K | train
3 | output_head           | Sequential       | 1.1 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_m

[I 2025-11-09 12:11:04,910] Trial 11 finished with value: 1.0392720699310303 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.35000000000000003, 'static_encoder_batch_norm_0': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 192, 'seq_0M_encoder_dropout_0': 0.0, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 64, 'seq_0M_encoder_dropout_1': 0.5, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 3, 'goutallier_0M_encoder_units_0': 128, 'goutallier_0M_encoder_dropout_0': 0.5, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 128, 'goutallier_0M_encoder_dropout_1': 0.4, 'goutallier_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_units_2': 32, 'goutallier_0M_encoder_dropout_2': 0.5, 'goutallier_0M_encoder_batch_norm_2': True, 'output_head_n_layers': 2, 'output_head_units_0': 320, 'output_head_dropout_0': 0.45, 'output_head_batch_norm

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 9. Best value: 1.03344:  13%|█▎        | 13/100 [08:20<1:21:31, 56.22s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 56.9 K | train
1 | seq_0M_encoder        | Sequential       | 21.6 K | train
2 | goutallier_0M_encoder | Sequential       | 10.6 K | train
3 | output_head           | Sequential       | 350 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_m

[I 2025-11-09 12:12:25,574] Trial 12 finished with value: 1.0397266149520874 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.35000000000000003, 'static_encoder_batch_norm_0': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.0, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 64, 'seq_0M_encoder_dropout_1': 0.05, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 3, 'goutallier_0M_encoder_units_0': 128, 'goutallier_0M_encoder_dropout_0': 0.45, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 128, 'goutallier_0M_encoder_dropout_1': 0.45, 'goutallier_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_units_2': 32, 'goutallier_0M_encoder_dropout_2': 0.5, 'goutallier_0M_encoder_batch_norm_2': True, 'output_head_n_layers': 1, 'lr': 6.787712136030673e-06, 'weight_decay': 6.226266895444918e-06}. Best is tr

Best trial: 9. Best value: 1.03344:  14%|█▍        | 14/100 [08:40<1:04:57, 45.32s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 6.3 K  | train
1 | seq_0M_encoder        | Sequential       | 21.6 K | train
2 | goutallier_0M_encoder | Sequential       | 34.9 K | train
3 | output_head           | Sequential       | 102 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | t

[I 2025-11-09 12:12:45,713] Trial 13 pruned. Trial was pruned at epoch 10.


Best trial: 9. Best value: 1.03344:  15%|█▌        | 15/100 [10:03<1:20:17, 56.67s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 36.6 K | train
1 | seq_0M_encoder        | Sequential       | 37.0 K | train
2 | goutallier_0M_encoder | Sequential       | 7.4 K  | train
3 | output_head           | Sequential       | 202 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | t

[I 2025-11-09 12:14:08,687] Trial 14 finished with value: 1.0394556522369385 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 224, 'static_encoder_dropout_0': 0.4, 'static_encoder_batch_norm_0': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 192, 'seq_0M_encoder_dropout_0': 0.4, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 96, 'seq_0M_encoder_dropout_1': 0.5, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 3, 'goutallier_0M_encoder_units_0': 128, 'goutallier_0M_encoder_dropout_0': 0.35000000000000003, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 128, 'goutallier_0M_encoder_dropout_1': 0.35000000000000003, 'goutallier_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_units_2': 128, 'goutallier_0M_encoder_dropout_2': 0.35000000000000003, 'goutallier_0M_encoder_batch_norm_2': True, 'output_head_n_layers': 2, 'output_head_units_0': 224, 'output_head_dropou

Best trial: 9. Best value: 1.03344:  16%|█▌        | 16/100 [10:42<1:11:47, 51.28s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 2.7 K  | train
1 | seq_0M_encoder        | Sequential       | 1.9 K  | train
2 | goutallier_0M_encoder | Sequential       | 9.0 K  | train
3 | output_head           | Sequential       | 2.2 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | t

[I 2025-11-09 12:14:47,452] Trial 15 finished with value: 1.0548932552337646 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 192, 'static_encoder_dropout_0': 0.25, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 160, 'static_encoder_dropout_1': 0.30000000000000004, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 256, 'seq_0M_encoder_dropout_0': 0.1, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 128, 'seq_0M_encoder_dropout_1': 0.05, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 96, 'goutallier_0M_encoder_dropout_0': 0.5, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 64, 'goutallier_0M_encoder_dropout_1': 0.5, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 3, 'output_head_units_0': 416, 'output_head_dropout_0': 0.4, 'output_head_batch_norm_0': True, 'output

Best trial: 9. Best value: 1.03344:  17%|█▋        | 17/100 [11:00<56:49, 41.08s/it]  Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 20.8 K | train
1 | seq_0M_encoder        | Sequential       | 12.8 K | train
2 | goutallier_0M_encoder | Sequential       | 20.3 K | train
3 | output_head           | Sequential       | 64.6 K | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | t

[I 2025-11-09 12:15:04,817] Trial 16 pruned. Trial was pruned at epoch 10.


Best trial: 9. Best value: 1.03344:  18%|█▊        | 18/100 [11:20<47:47, 34.97s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 5.4 K  | train
1 | seq_0M_encoder        | Sequential       | 31.5 K | train
2 | goutallier_0M_encoder | Sequential       | 17.9 K | train
3 | output_head           | Sequential       | 133 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tra

[I 2025-11-09 12:15:25,568] Trial 17 pruned. Trial was pruned at epoch 10.


Best trial: 9. Best value: 1.03344:  19%|█▉        | 19/100 [11:41<41:34, 30.80s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 41.2 K | train
1 | seq_0M_encoder        | Sequential       | 2.9 K  | train
2 | goutallier_0M_encoder | Sequential       | 18.2 K | train
3 | output_head           | Sequential       | 2.7 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tra

[I 2025-11-09 12:15:46,640] Trial 18 pruned. Trial was pruned at epoch 10.


Best trial: 9. Best value: 1.03344:  20%|██        | 20/100 [11:59<35:45, 26.82s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 21.7 K | train
1 | seq_0M_encoder        | Sequential       | 18.7 K | train
2 | goutallier_0M_encoder | Sequential       | 7.4 K  | train
3 | output_head           | Sequential       | 176 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tra

[I 2025-11-09 12:16:04,171] Trial 19 pruned. Trial was pruned at epoch 10.


Best trial: 20. Best value: 1.03079:  21%|██        | 21/100 [12:21<33:18, 25.29s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 21.7 K | train
1 | seq_0M_encoder        | Sequential       | 18.7 K | train
2 | goutallier_0M_encoder | Sequential       | 7.4 K  | train
3 | output_head           | Sequential       | 176 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:16:25,917] Trial 20 finished with value: 1.0307931900024414 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.2, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 192, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.25, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 128, 'seq_0M_encoder_dropout_1': 0.35000000000000003, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 96, 'goutallier_0M_encoder_dropout_0': 0.4, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 64, 'goutallier_0M_encoder_dropout_1': 0.0, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 448, 'output_head_dropout_0': 0.2, 'output_head_batch_norm_0': True, 'lr': 8.

Best trial: 20. Best value: 1.03079:  22%|██▏       | 22/100 [12:40<30:28, 23.44s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 21.7 K | train
1 | seq_0M_encoder        | Sequential       | 17.3 K | train
2 | goutallier_0M_encoder | Sequential       | 7.4 K  | train
3 | output_head           | Sequential       | 190 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:16:45,036] Trial 21 finished with value: 1.0386048555374146 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.2, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 192, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.25, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 128, 'seq_0M_encoder_dropout_1': 0.35000000000000003, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 96, 'goutallier_0M_encoder_dropout_0': 0.4, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 64, 'goutallier_0M_encoder_dropout_1': 0.0, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 448, 'output_head_dropout_0': 0.2, 'output_head_batch_norm_0': True, 'lr': 1.

Best trial: 20. Best value: 1.03079:  23%|██▎       | 23/100 [12:59<28:26, 22.16s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 32.9 K | train
1 | seq_0M_encoder        | Sequential       | 17.3 K | train
2 | goutallier_0M_encoder | Sequential       | 7.4 K  | train
3 | output_head           | Sequential       | 205 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:17:04,215] Trial 22 finished with value: 1.0352648496627808 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.2, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 192, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.25, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 160, 'seq_0M_encoder_dropout_1': 0.35000000000000003, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 96, 'goutallier_0M_encoder_dropout_0': 0.4, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 64, 'goutallier_0M_encoder_dropout_1': 0.0, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 448, 'output_head_dropout_0': 0.2, 'output_head_batch_norm_0': True, 'lr': 1.3

Best trial: 20. Best value: 1.03079:  24%|██▍       | 24/100 [13:22<28:32, 22.53s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 32.9 K | train
1 | seq_0M_encoder        | Sequential       | 13.8 K | train
2 | goutallier_0M_encoder | Sequential       | 7.4 K  | train
3 | output_head           | Sequential       | 273 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:17:27,616] Trial 23 finished with value: 1.0314348936080933 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.2, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 224, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.25, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 160, 'seq_0M_encoder_dropout_1': 0.30000000000000004, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 96, 'goutallier_0M_encoder_dropout_0': 0.30000000000000004, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 64, 'goutallier_0M_encoder_dropout_1': 0.05, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 448, 'output_head_dropout_0': 0.2, 'output_head_batch_norm_0

Best trial: 20. Best value: 1.03079:  25%|██▌       | 25/100 [13:30<22:40, 18.14s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 32.9 K | train
1 | seq_0M_encoder        | Sequential       | 20.4 K | train
2 | goutallier_0M_encoder | Sequential       | 704    | train
3 | output_head           | Sequential       | 235 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:17:35,496] Trial 24 finished with value: 1.0469639301300049 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.1, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 224, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.35000000000000003, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.25, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 96, 'goutallier_0M_encoder_dropout_0': 0.30000000000000004, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 64, 'goutallier_0M_encoder_dropout_1': 0.1, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 3, 'output_head_units_0': 384, 'output_head_dropout_0': 0.25, 'output_head_batch_norm_

Best trial: 25. Best value: 1.0268:  26%|██▌       | 26/100 [13:42<20:00, 16.22s/it] Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 32.9 K | train
1 | seq_0M_encoder        | Sequential       | 51.6 K | train
2 | goutallier_0M_encoder | Sequential       | 704    | train
3 | output_head           | Sequential       | 2.7 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:17:47,258] Trial 25 finished with value: 1.026798129081726 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.15000000000000002, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 224, 'static_encoder_dropout_1': 0.4, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.25, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.25, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 1, 'goutallier_0M_encoder_units_0': 64, 'goutallier_0M_encoder_dropout_0': 0.30000000000000004, 'goutallier_0M_encoder_batch_norm_0': True, 'output_head_n_layers': 2, 'output_head_units_0': 480, 'output_head_dropout_0': 0.2, 'output_head_batch_norm_0': True, 'lr': 1.9789002829749232e-05, 'weight_decay': 1.6799873346782648e-06}. Best is trial 25 with value: 1.02679812908

Best trial: 25. Best value: 1.0268:  27%|██▋       | 27/100 [13:48<15:50, 13.02s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 24.9 K | train
1 | seq_0M_encoder        | Sequential       | 1.4 K  | train
2 | goutallier_0M_encoder | Sequential       | 352    | train
3 | output_head           | Sequential       | 173 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tra

[I 2025-11-09 12:17:52,808] Trial 26 pruned. Trial was pruned at epoch 10.


Best trial: 25. Best value: 1.0268:  28%|██▊       | 28/100 [13:57<14:18, 11.92s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 37.1 K | train
1 | seq_0M_encoder        | Sequential       | 22.9 K | train
2 | goutallier_0M_encoder | Sequential       | 704    | train
3 | output_head           | Sequential       | 435 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tra

[I 2025-11-09 12:18:02,148] Trial 27 finished with value: 1.0491828918457031 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.15000000000000002, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 224, 'static_encoder_dropout_1': 0.30000000000000004, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.25, 'seq_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_n_layers': 1, 'goutallier_0M_encoder_units_0': 32, 'goutallier_0M_encoder_dropout_0': 0.30000000000000004, 'goutallier_0M_encoder_batch_norm_0': True, 'output_head_n_layers': 2, 'output_head_units_0': 480, 'output_head_dropout_0': 0.15000000000000002, 'output_head_batch_norm_0': True, 'lr': 1.896482407290215e-05, 'weight_decay': 2.008482101336782e-06}. Best is trial 25 with value: 1.026798129081726.


Best trial: 25. Best value: 1.0268:  29%|██▉       | 29/100 [14:03<12:05, 10.22s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 16.8 K | train
1 | seq_0M_encoder        | Sequential       | 13.8 K | train
2 | goutallier_0M_encoder | Sequential       | 704    | train
3 | output_head           | Sequential       | 188 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tra

[I 2025-11-09 12:18:08,401] Trial 28 pruned. Trial was pruned at epoch 10.


Best trial: 25. Best value: 1.0268:  30%|███       | 30/100 [14:26<16:30, 14.14s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 14.7 K | train
1 | seq_0M_encoder        | Sequential       | 13.8 K | train
2 | goutallier_0M_encoder | Sequential       | 704    | train
3 | output_head           | Sequential       | 3.1 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tra

[I 2025-11-09 12:18:31,703] Trial 29 finished with value: 1.0294679403305054 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.15000000000000002, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 224, 'static_encoder_dropout_1': 0.35000000000000003, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.25, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.4, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 1, 'goutallier_0M_encoder_units_0': 64, 'goutallier_0M_encoder_dropout_0': 0.2, 'goutallier_0M_encoder_batch_norm_0': False, 'output_head_n_layers': 2, 'output_head_units_0': 384, 'output_head_dropout_0': 0.1, 'output_head_batch_norm_0': True, 'lr': 5.832462935424327e-06, 'weight_decay': 4.389148167684484e-06}. Best is trial 25 with value: 1.0267981290817

Best trial: 25. Best value: 1.0268:  31%|███       | 31/100 [14:32<13:10, 11.46s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 16.8 K | train
1 | seq_0M_encoder        | Sequential       | 17.3 K | train
2 | goutallier_0M_encoder | Sequential       | 704    | train
3 | output_head           | Sequential       | 175 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tra

[I 2025-11-09 12:18:36,912] Trial 30 pruned. Trial was pruned at epoch 10.


Best trial: 25. Best value: 1.0268:  32%|███▏      | 32/100 [14:38<11:09,  9.85s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 24.9 K | train
1 | seq_0M_encoder        | Sequential       | 13.8 K | train
2 | goutallier_0M_encoder | Sequential       | 704    | train
3 | output_head           | Sequential       | 203 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tra

[I 2025-11-09 12:18:42,992] Trial 31 pruned. Trial was pruned at epoch 10.


Best trial: 25. Best value: 1.0268:  33%|███▎      | 33/100 [14:57<14:00, 12.55s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 46.2 K | train
1 | seq_0M_encoder        | Sequential       | 17.3 K | train
2 | goutallier_0M_encoder | Sequential       | 352    | train
3 | output_head           | Sequential       | 219 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tra

[I 2025-11-09 12:19:01,851] Trial 32 finished with value: 1.03571617603302 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.1, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 224, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.30000000000000004, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.30000000000000004, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 1, 'goutallier_0M_encoder_units_0': 64, 'goutallier_0M_encoder_dropout_0': 0.25, 'goutallier_0M_encoder_batch_norm_0': False, 'output_head_n_layers': 2, 'output_head_units_0': 416, 'output_head_dropout_0': 0.2, 'output_head_batch_norm_0': True, 'lr': 1.4715022502643441e-05, 'weight_decay': 3.0819206143336345e-06}. Best is trial 25 with value: 1.026798129081

Best trial: 25. Best value: 1.0268:  34%|███▍      | 34/100 [15:02<11:33, 10.50s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 14.7 K | train
1 | seq_0M_encoder        | Sequential       | 9.5 K  | train
2 | goutallier_0M_encoder | Sequential       | 2.8 K  | train
3 | output_head           | Sequential       | 162 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tra

[I 2025-11-09 12:19:07,566] Trial 33 pruned. Trial was pruned at epoch 10.


Best trial: 25. Best value: 1.0268:  35%|███▌      | 35/100 [15:26<15:31, 14.33s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 14.7 K | train
1 | seq_0M_encoder        | Sequential       | 9.5 K  | train
2 | goutallier_0M_encoder | Sequential       | 704    | train
3 | output_head           | Sequential       | 261 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tra

[I 2025-11-09 12:19:30,821] Trial 34 finished with value: 1.0286498069763184 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.15000000000000002, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 192, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.45, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 128, 'seq_0M_encoder_dropout_1': 0.35000000000000003, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 64, 'goutallier_0M_encoder_dropout_0': 0.15000000000000002, 'goutallier_0M_encoder_batch_norm_0': False, 'goutallier_0M_encoder_units_1': 32, 'goutallier_0M_encoder_dropout_1': 0.05, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 448, 'output_head_dropout_0': 0.05, 'output

Best trial: 25. Best value: 1.0268:  36%|███▌      | 36/100 [15:32<12:44, 11.94s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 14.7 K | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | goutallier_0M_encoder | Sequential       | 704    | train
3 | output_head           | Sequential       | 137 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tra

[I 2025-11-09 12:19:37,201] Trial 35 pruned. Trial was pruned at epoch 10.


Best trial: 25. Best value: 1.0268:  37%|███▋      | 37/100 [15:49<14:04, 13.41s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 29.0 K | train
1 | seq_0M_encoder        | Sequential       | 52.0 K | train
2 | goutallier_0M_encoder | Sequential       | 2.8 K  | train
3 | output_head           | Sequential       | 112 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tra

[I 2025-11-09 12:19:54,024] Trial 36 finished with value: 1.0319463014602661 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.15000000000000002, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 192, 'static_encoder_dropout_1': 0.25, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.30000000000000004, 'seq_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_n_layers': 1, 'goutallier_0M_encoder_units_0': 64, 'goutallier_0M_encoder_dropout_0': 0.1, 'goutallier_0M_encoder_batch_norm_0': False, 'output_head_n_layers': 2, 'output_head_units_0': 416, 'output_head_dropout_0': 0.05, 'output_head_batch_norm_0': True, 'lr': 2.680823399686258e-05, 'weight_decay': 1.0065413248250668e-06}. Best is trial 25 with value: 1.026798129081726.


Best trial: 25. Best value: 1.0268:  38%|███▊      | 38/100 [16:00<13:15, 12.84s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 10.4 K | train
1 | seq_0M_encoder        | Sequential       | 9.5 K  | train
2 | goutallier_0M_encoder | Sequential       | 1.5 K  | train
3 | output_head           | Sequential       | 104 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tra

[I 2025-11-09 12:20:05,529] Trial 37 pruned. Trial was pruned at epoch 10.


Best trial: 25. Best value: 1.0268:  39%|███▉      | 39/100 [16:06<10:57, 10.77s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 18.5 K | train
1 | seq_0M_encoder        | Sequential       | 59.6 K | train
2 | goutallier_0M_encoder | Sequential       | 704    | train
3 | output_head           | Sequential       | 432 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tra

[I 2025-11-09 12:20:11,481] Trial 38 pruned. Trial was pruned at epoch 10.


Best trial: 25. Best value: 1.0268:  40%|████      | 40/100 [16:22<12:13, 12.23s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 27.1 K | train
1 | seq_0M_encoder        | Sequential       | 1.9 K  | train
2 | goutallier_0M_encoder | Sequential       | 704    | train
3 | output_head           | Sequential       | 2.2 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tra

[I 2025-11-09 12:20:27,113] Trial 39 finished with value: 1.0389573574066162 and parameters: {'batch_size': 64, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 160, 'static_encoder_dropout_1': 0.4, 'static_encoder_batch_norm_1': True, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.2, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 256, 'seq_0M_encoder_dropout_1': 0.25, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 160, 'seq_0M_encoder_dropout_2': 0.35000000000000003, 'seq_0M_encoder_batch_norm_2': True, 'goutallier_0M_encoder_n_layers': 1, 'goutallier_0M_encoder_units_0': 64, 'goutallier_0M_encoder_dropout_0': 0.2, 'goutallier_0M_encoder_batch_norm_0': False, 'output_head_n_layers': 3, 'output_head_units_0': 512, 'output_head_dropout_0': 0.15000000000000002, 'output_head_batch_norm_0': True, 'output_head

Best trial: 25. Best value: 1.0268:  41%|████      | 41/100 [16:27<09:56, 10.11s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 32.9 K | train
1 | seq_0M_encoder        | Sequential       | 17.3 K | train
2 | goutallier_0M_encoder | Sequential       | 7.4 K  | train
3 | output_head           | Sequential       | 205 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tra

[I 2025-11-09 12:20:32,278] Trial 40 pruned. Trial was pruned at epoch 10.


Best trial: 41. Best value: 1.02397:  42%|████▏     | 42/100 [16:52<14:08, 14.62s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 41.0 K | train
1 | seq_0M_encoder        | Sequential       | 17.3 K | train
2 | goutallier_0M_encoder | Sequential       | 4.2 K  | train
3 | output_head           | Sequential       | 190 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:20:57,424] Trial 41 finished with value: 1.0239739418029785 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.2, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 224, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.35000000000000003, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 160, 'seq_0M_encoder_dropout_1': 0.35000000000000003, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 96, 'goutallier_0M_encoder_dropout_0': 0.2, 'goutallier_0M_encoder_batch_norm_0': False, 'goutallier_0M_encoder_units_1': 64, 'goutallier_0M_encoder_dropout_1': 0.05, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 448, 'output_head_dropout_0': 0.1, 'output_head_batch_norm_0

Best trial: 41. Best value: 1.02397:  43%|████▎     | 43/100 [17:08<14:22, 15.13s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 28.0 K | train
1 | seq_0M_encoder        | Sequential       | 20.4 K | train
2 | goutallier_0M_encoder | Sequential       | 7.4 K  | train
3 | output_head           | Sequential       | 250 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:21:13,731] Trial 42 finished with value: 1.0331535339355469 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 160, 'static_encoder_dropout_0': 0.2, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 224, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.45, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 160, 'seq_0M_encoder_dropout_1': 0.35000000000000003, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 96, 'goutallier_0M_encoder_dropout_0': 0.15000000000000002, 'goutallier_0M_encoder_batch_norm_0': False, 'goutallier_0M_encoder_units_1': 32, 'goutallier_0M_encoder_dropout_1': 0.05, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 448, 'output_head_dropout_0': 0.05, 'output_head_batch_nor

Best trial: 41. Best value: 1.02397:  44%|████▍     | 44/100 [17:18<12:35, 13.49s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 32.9 K | train
1 | seq_0M_encoder        | Sequential       | 16.0 K | train
2 | goutallier_0M_encoder | Sequential       | 5.0 K  | train
3 | output_head           | Sequential       | 217 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:21:23,391] Trial 43 finished with value: 1.0318149328231812 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.15000000000000002, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 256, 'static_encoder_dropout_1': 0.4, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.35000000000000003, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.4, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 96, 'goutallier_0M_encoder_dropout_0': 0.25, 'goutallier_0M_encoder_batch_norm_0': False, 'goutallier_0M_encoder_units_1': 64, 'goutallier_0M_encoder_dropout_1': 0.0, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 480, 'output_head_dropout_0': 0.1, 'output_head_batch_norm_0'

Best trial: 41. Best value: 1.02397:  45%|████▌     | 45/100 [17:24<10:18, 11.24s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 21.7 K | train
1 | seq_0M_encoder        | Sequential       | 18.7 K | train
2 | goutallier_0M_encoder | Sequential       | 2.8 K  | train
3 | output_head           | Sequential       | 162 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:21:29,403] Trial 44 pruned. Trial was pruned at epoch 10.


Best trial: 41. Best value: 1.02397:  46%|████▌     | 46/100 [17:38<10:56, 12.15s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 32.9 K | train
1 | seq_0M_encoder        | Sequential       | 10.9 K | train
2 | goutallier_0M_encoder | Sequential       | 4.2 K  | train
3 | output_head           | Sequential       | 185 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:21:43,671] Trial 45 pruned. Trial was pruned at epoch 26.


Best trial: 41. Best value: 1.02397:  47%|████▋     | 47/100 [17:48<10:11, 11.53s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 46.2 K | train
1 | seq_0M_encoder        | Sequential       | 16.0 K | train
2 | goutallier_0M_encoder | Sequential       | 1.1 K  | train
3 | output_head           | Sequential       | 225 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:21:53,752] Trial 46 pruned. Trial was pruned at epoch 10.


Best trial: 41. Best value: 1.02397:  48%|████▊     | 48/100 [17:58<09:22, 10.82s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 1.8 K  | train
1 | seq_0M_encoder        | Sequential       | 20.4 K | train
2 | goutallier_0M_encoder | Sequential       | 704    | train
3 | output_head           | Sequential       | 217 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:22:02,922] Trial 47 finished with value: 1.034509539604187 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 160, 'static_encoder_dropout_0': 0.2, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 256, 'static_encoder_dropout_1': 0.4, 'static_encoder_batch_norm_1': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.30000000000000004, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.35000000000000003, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 1, 'goutallier_0M_encoder_units_0': 96, 'goutallier_0M_encoder_dropout_0': 0.25, 'goutallier_0M_encoder_batch_norm_0': False, 'output_head_n_layers': 2, 'output_head_units_0': 384, 'output_head_dropout_0': 0.1, 'output_head_batch_norm_0': True, 'lr': 4.507996239841631e-05, 'weight_decay': 1.3476693705434573e-05}. Best is trial 41 with value: 1.023973941802

Best trial: 41. Best value: 1.02397:  49%|████▉     | 49/100 [18:03<07:52,  9.26s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 35.8 K | train
1 | seq_0M_encoder        | Sequential       | 18.7 K | train
2 | goutallier_0M_encoder | Sequential       | 3.7 K  | train
3 | output_head           | Sequential       | 2.9 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:22:08,540] Trial 48 pruned. Trial was pruned at epoch 10.


Best trial: 41. Best value: 1.02397:  50%|█████     | 50/100 [18:09<06:48,  8.17s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 46.7 K | train
1 | seq_0M_encoder        | Sequential       | 11.7 K | train
2 | goutallier_0M_encoder | Sequential       | 4.2 K  | train
3 | output_head           | Sequential       | 152 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:22:14,158] Trial 49 pruned. Trial was pruned at epoch 10.


Best trial: 41. Best value: 1.02397:  51%|█████     | 51/100 [18:23<08:07,  9.94s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 32.9 K | train
1 | seq_0M_encoder        | Sequential       | 17.3 K | train
2 | goutallier_0M_encoder | Sequential       | 7.4 K  | train
3 | output_head           | Sequential       | 205 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:22:28,242] Trial 50 finished with value: 1.0357917547225952 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.30000000000000004, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 224, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 96, 'static_encoder_dropout_2': 0.5, 'static_encoder_batch_norm_2': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.4, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 160, 'seq_0M_encoder_dropout_1': 0.15000000000000002, 'seq_0M_encoder_batch_norm_1': False, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 96, 'goutallier_0M_encoder_dropout_0': 0.35000000000000003, 'goutallier_0M_encoder_batch_norm_0': False, 'goutallier_0M_encoder_units_1': 32, 'goutallier_0M_encoder_dropout_1': 0.2, 'goutallier_0M_encoder_batch_norm_1': 

Best trial: 41. Best value: 1.02397:  52%|█████▏    | 52/100 [18:37<08:57, 11.19s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 37.1 K | train
1 | seq_0M_encoder        | Sequential       | 17.3 K | train
2 | goutallier_0M_encoder | Sequential       | 7.4 K  | train
3 | output_head           | Sequential       | 203 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:22:42,343] Trial 51 finished with value: 1.0351401567459106 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.2, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 224, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.25, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 160, 'seq_0M_encoder_dropout_1': 0.30000000000000004, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 96, 'goutallier_0M_encoder_dropout_0': 0.30000000000000004, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 64, 'goutallier_0M_encoder_dropout_1': 0.05, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 448, 'output_head_dropout_0': 0.2, 'output_head_batch_norm_0

Best trial: 41. Best value: 1.02397:  53%|█████▎    | 53/100 [18:55<10:16, 13.12s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 37.1 K | train
1 | seq_0M_encoder        | Sequential       | 33.7 K | train
2 | goutallier_0M_encoder | Sequential       | 7.4 K  | train
3 | output_head           | Sequential       | 217 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:22:59,956] Trial 52 finished with value: 1.029471755027771 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.2, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 256, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.25, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 160, 'seq_0M_encoder_dropout_1': 0.30000000000000004, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 96, 'goutallier_0M_encoder_dropout_0': 0.2, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 64, 'goutallier_0M_encoder_dropout_1': 0.0, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 416, 'output_head_dropout_0': 0.2, 'output_head_batch_norm_0': True, 'lr': 1.7

Best trial: 41. Best value: 1.02397:  54%|█████▍    | 54/100 [19:05<09:23, 12.25s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 18.9 K | train
1 | seq_0M_encoder        | Sequential       | 31.3 K | train
2 | goutallier_0M_encoder | Sequential       | 10.6 K | train
3 | output_head           | Sequential       | 225 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:23:10,180] Trial 53 finished with value: 1.0355019569396973 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.25, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 256, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.30000000000000004, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.35000000000000003, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 96, 'goutallier_0M_encoder_dropout_0': 0.2, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 64, 'goutallier_0M_encoder_dropout_1': 0.0, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 416, 'output_head_dropout_0': 0.15000000000000002, 'output_

Best trial: 41. Best value: 1.02397:  55%|█████▌    | 55/100 [19:16<08:51, 11.82s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 28.0 K | train
1 | seq_0M_encoder        | Sequential       | 23.3 K | train
2 | goutallier_0M_encoder | Sequential       | 5.0 K  | train
3 | output_head           | Sequential       | 205 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:23:20,990] Trial 54 finished with value: 1.0322692394256592 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.2, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 256, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.2, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.4, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 96, 'goutallier_0M_encoder_dropout_0': 0.15000000000000002, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.15000000000000002, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 384, 'output_head_dropout_0': 0.25, 'output_head_batch_norm_0'

Best trial: 41. Best value: 1.02397:  56%|█████▌    | 56/100 [19:26<08:21, 11.40s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 28.7 K | train
1 | seq_0M_encoder        | Sequential       | 17.3 K | train
2 | goutallier_0M_encoder | Sequential       | 4.2 K  | train
3 | output_head           | Sequential       | 163 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:23:31,424] Trial 55 pruned. Trial was pruned at epoch 10.


Best trial: 41. Best value: 1.02397:  57%|█████▋    | 57/100 [19:32<06:59,  9.75s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 35.8 K | train
1 | seq_0M_encoder        | Sequential       | 7.4 K  | train
2 | goutallier_0M_encoder | Sequential       | 7.4 K  | train
3 | output_head           | Sequential       | 338 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:23:37,331] Trial 56 pruned. Trial was pruned at epoch 10.


Best trial: 41. Best value: 1.02397:  58%|█████▊    | 58/100 [19:39<06:08,  8.78s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 2.7 K  | train
1 | seq_0M_encoder        | Sequential       | 20.4 K | train
2 | goutallier_0M_encoder | Sequential       | 16.6 K | train
3 | output_head           | Sequential       | 298 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:23:43,854] Trial 57 pruned. Trial was pruned at epoch 10.


Best trial: 41. Best value: 1.02397:  59%|█████▉    | 59/100 [19:45<05:32,  8.11s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 17.9 K | train
1 | seq_0M_encoder        | Sequential       | 18.7 K | train
2 | goutallier_0M_encoder | Sequential       | 4.2 K  | train
3 | output_head           | Sequential       | 1.6 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:23:50,375] Trial 58 pruned. Trial was pruned at epoch 10.


Best trial: 41. Best value: 1.02397:  60%|██████    | 60/100 [20:03<07:20, 11.01s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 24.5 K | train
1 | seq_0M_encoder        | Sequential       | 11.7 K | train
2 | goutallier_0M_encoder | Sequential       | 1.4 K  | train
3 | output_head           | Sequential       | 219 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:24:08,156] Trial 59 finished with value: 1.035826325416565 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 192, 'static_encoder_dropout_0': 0.25, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.0, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.15000000000000002, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 128, 'seq_0M_encoder_dropout_1': 0.30000000000000004, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 96, 'goutallier_0M_encoder_dropout_0': 0.35000000000000003, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 32, 'goutallier_0M_encoder_dropout_1': 0.1, 'goutallier_0M_encoder_batch_norm_1': False, 'output_head_n_layers': 1, 'lr': 2.752062312214239e-05, 'weight_decay': 3.122389298495108e-06}.

Best trial: 41. Best value: 1.02397:  61%|██████    | 61/100 [20:09<06:07,  9.41s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 32.9 K | train
1 | seq_0M_encoder        | Sequential       | 17.3 K | train
2 | goutallier_0M_encoder | Sequential       | 7.4 K  | train
3 | output_head           | Sequential       | 205 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:24:13,854] Trial 60 pruned. Trial was pruned at epoch 10.


Best trial: 41. Best value: 1.02397:  62%|██████▏   | 62/100 [20:23<06:58, 11.00s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 41.0 K | train
1 | seq_0M_encoder        | Sequential       | 20.4 K | train
2 | goutallier_0M_encoder | Sequential       | 7.4 K  | train
3 | output_head           | Sequential       | 219 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:24:28,553] Trial 61 finished with value: 1.0383245944976807 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.2, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 224, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.25, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 160, 'seq_0M_encoder_dropout_1': 0.30000000000000004, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 96, 'goutallier_0M_encoder_dropout_0': 0.30000000000000004, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 64, 'goutallier_0M_encoder_dropout_1': 0.05, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 448, 'output_head_dropout_0': 0.2, 'output_head_batch_norm_0

Best trial: 62. Best value: 1.0168:  63%|██████▎   | 63/100 [20:38<07:32, 12.23s/it] Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 41.0 K | train
1 | seq_0M_encoder        | Sequential       | 23.6 K | train
2 | goutallier_0M_encoder | Sequential       | 7.4 K  | train
3 | output_head           | Sequential       | 217 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:24:43,638] Trial 62 finished with value: 1.0167971849441528 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 160, 'static_encoder_dropout_0': 0.2, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 224, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.25, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.35000000000000003, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 96, 'goutallier_0M_encoder_dropout_0': 0.25, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 64, 'goutallier_0M_encoder_dropout_1': 0.0, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 448, 'output_head_dropout_0': 0.2, 'output_head_batch_norm_0': True, 'lr': 

Best trial: 62. Best value: 1.0168:  64%|██████▍   | 64/100 [20:54<07:54, 13.18s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 46.2 K | train
1 | seq_0M_encoder        | Sequential       | 13.8 K | train
2 | goutallier_0M_encoder | Sequential       | 10.6 K | train
3 | output_head           | Sequential       | 70.9 K | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tra

[I 2025-11-09 12:24:59,050] Trial 63 finished with value: 1.0348387956619263 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 160, 'static_encoder_dropout_0': 0.15000000000000002, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 224, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.35000000000000003, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.35000000000000003, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 96, 'goutallier_0M_encoder_dropout_0': 0.25, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 64, 'goutallier_0M_encoder_dropout_1': 0.0, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 416, 'output_head_dropout_0': 0.2, 'output_h

Best trial: 62. Best value: 1.0168:  65%|██████▌   | 65/100 [21:00<06:27, 11.06s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 16.8 K | train
1 | seq_0M_encoder        | Sequential       | 23.6 K | train
2 | goutallier_0M_encoder | Sequential       | 5.0 K  | train
3 | output_head           | Sequential       | 233 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tra

[I 2025-11-09 12:25:05,159] Trial 64 pruned. Trial was pruned at epoch 10.


Best trial: 62. Best value: 1.0168:  66%|██████▌   | 66/100 [21:16<07:03, 12.44s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 49.1 K | train
1 | seq_0M_encoder        | Sequential       | 23.6 K | train
2 | goutallier_0M_encoder | Sequential       | 5.0 K  | train
3 | output_head           | Sequential       | 233 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tra

[I 2025-11-09 12:25:20,834] Trial 65 finished with value: 1.0265378952026367 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.25, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 224, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.1, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.4, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 64, 'goutallier_0M_encoder_dropout_0': 0.15000000000000002, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 64, 'goutallier_0M_encoder_dropout_1': 0.05, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 448, 'output_head_dropout_0': 0.15000000000000002, 'output_head_batch_norm_0

Best trial: 62. Best value: 1.0168:  67%|██████▋   | 67/100 [21:23<06:01, 10.94s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 16.8 K | train
1 | seq_0M_encoder        | Sequential       | 16.0 K | train
2 | goutallier_0M_encoder | Sequential       | 5.1 K  | train
3 | output_head           | Sequential       | 250 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tra

[I 2025-11-09 12:25:28,268] Trial 66 finished with value: 1.0354974269866943 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 192, 'static_encoder_dropout_0': 0.25, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 224, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.05, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.4, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 64, 'goutallier_0M_encoder_dropout_0': 0.15000000000000002, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 64, 'goutallier_0M_encoder_dropout_1': 0.05, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 448, 'output_head_dropout_0': 0.1, 'output_head_batch_norm_0': True, 'lr':

Best trial: 62. Best value: 1.0168:  68%|██████▊   | 68/100 [21:32<05:31, 10.35s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 16.8 K | train
1 | seq_0M_encoder        | Sequential       | 26.8 K | train
2 | goutallier_0M_encoder | Sequential       | 7.1 K  | train
3 | output_head           | Sequential       | 297 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tra

[I 2025-11-09 12:25:37,235] Trial 67 finished with value: 1.0351076126098633 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.35000000000000003, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 224, 'static_encoder_dropout_1': 0.4, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.1, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.45, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 3, 'goutallier_0M_encoder_units_0': 64, 'goutallier_0M_encoder_dropout_0': 0.2, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 32, 'goutallier_0M_encoder_dropout_1': 0.1, 'goutallier_0M_encoder_batch_norm_1': False, 'goutallier_0M_encoder_units_2': 64, 'goutallier_0M_encoder_dropout_2': 0.25, 'goutallier_0M_encoder_batch_norm_2': False, 'ou

Best trial: 62. Best value: 1.0168:  69%|██████▉   | 69/100 [21:43<05:31, 10.68s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 73.5 K | train
1 | seq_0M_encoder        | Sequential       | 13.8 K | train
2 | goutallier_0M_encoder | Sequential       | 704    | train
3 | output_head           | Sequential       | 217 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tra

[I 2025-11-09 12:25:48,685] Trial 68 pruned. Trial was pruned at epoch 10.


Best trial: 62. Best value: 1.0168:  70%|███████   | 70/100 [22:10<07:39, 15.33s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 49.1 K | train
1 | seq_0M_encoder        | Sequential       | 32.9 K | train
2 | goutallier_0M_encoder | Sequential       | 5.0 K  | train
3 | output_head           | Sequential       | 2.5 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tra

[I 2025-11-09 12:26:14,853] Trial 69 finished with value: 1.038084864616394 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 256, 'static_encoder_dropout_0': 0.30000000000000004, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 256, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.1, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.2, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 1, 'goutallier_0M_encoder_units_0': 64, 'goutallier_0M_encoder_dropout_0': 0.15000000000000002, 'goutallier_0M_encoder_batch_norm_0': False, 'output_head_n_layers': 2, 'output_head_units_0': 416, 'output_head_dropout_0': 0.1, 'output_head_batch_norm_0': True, 'lr': 1.4852032393847132e-05, 'weight_decay': 0.00022648563120368536}. Best is trial 62 with value: 1.016797184944

Best trial: 62. Best value: 1.0168:  71%|███████   | 71/100 [22:15<06:02, 12.50s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 14.7 K | train
1 | seq_0M_encoder        | Sequential       | 18.7 K | train
2 | goutallier_0M_encoder | Sequential       | 7.4 K  | train
3 | output_head           | Sequential       | 176 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tra

[I 2025-11-09 12:26:20,767] Trial 70 pruned. Trial was pruned at epoch 10.


Best trial: 62. Best value: 1.0168:  72%|███████▏  | 72/100 [22:32<06:23, 13.70s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 14.7 K | train
1 | seq_0M_encoder        | Sequential       | 22.9 K | train
2 | goutallier_0M_encoder | Sequential       | 5.0 K  | train
3 | output_head           | Sequential       | 190 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tra

[I 2025-11-09 12:26:37,245] Trial 71 finished with value: 1.0188870429992676 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.15000000000000002, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 192, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.25, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 128, 'seq_0M_encoder_dropout_1': 0.35000000000000003, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 96, 'goutallier_0M_encoder_dropout_0': 0.4, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 64, 'goutallier_0M_encoder_dropout_1': 0.0, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 448, 'output_head_dropout_0': 0.2, 'output_head_batch_norm_0'

Best trial: 72. Best value: 1.01424:  73%|███████▎  | 73/100 [22:53<07:10, 15.94s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 14.7 K | train
1 | seq_0M_encoder        | Sequential       | 33.7 K | train
2 | goutallier_0M_encoder | Sequential       | 5.0 K  | train
3 | output_head           | Sequential       | 219 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:26:58,429] Trial 72 finished with value: 1.0142449140548706 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.1, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 192, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.30000000000000004, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 160, 'seq_0M_encoder_dropout_1': 0.35000000000000003, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 64, 'goutallier_0M_encoder_dropout_0': 0.45, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 64, 'goutallier_0M_encoder_dropout_1': 0.05, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 448, 'output_head_dropout_0': 0.2, 'output_head_batch_norm_0

Best trial: 72. Best value: 1.01424:  74%|███████▍  | 74/100 [23:14<07:35, 17.52s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 14.7 K | train
1 | seq_0M_encoder        | Sequential       | 18.0 K | train
2 | goutallier_0M_encoder | Sequential       | 5.0 K  | train
3 | output_head           | Sequential       | 173 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:27:19,628] Trial 73 finished with value: 1.0279945135116577 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.1, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 192, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.30000000000000004, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.45, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 64, 'goutallier_0M_encoder_dropout_0': 0.45, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 64, 'goutallier_0M_encoder_dropout_1': 0.05, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 480, 'output_head_dropout_0': 0.15000000000000002, 'output_head_batch_norm

Best trial: 72. Best value: 1.01424:  75%|███████▌  | 75/100 [23:20<05:51, 14.06s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 12.5 K | train
1 | seq_0M_encoder        | Sequential       | 23.3 K | train
2 | goutallier_0M_encoder | Sequential       | 5.0 K  | train
3 | output_head           | Sequential       | 185 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:27:25,613] Trial 74 pruned. Trial was pruned at epoch 10.


Best trial: 72. Best value: 1.01424:  76%|███████▌  | 76/100 [23:26<04:40, 11.69s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 14.7 K | train
1 | seq_0M_encoder        | Sequential       | 34.1 K | train
2 | goutallier_0M_encoder | Sequential       | 2.6 K  | train
3 | output_head           | Sequential       | 95.4 K | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:27:31,779] Trial 75 pruned. Trial was pruned at epoch 10.


Best trial: 72. Best value: 1.01424:  77%|███████▋  | 77/100 [23:32<03:49,  9.98s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 14.7 K | train
1 | seq_0M_encoder        | Sequential       | 31.3 K | train
2 | goutallier_0M_encoder | Sequential       | 5.0 K  | train
3 | output_head           | Sequential       | 219 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:27:37,767] Trial 76 pruned. Trial was pruned at epoch 10.


Best trial: 72. Best value: 1.01424:  78%|███████▊  | 78/100 [23:39<03:14,  8.82s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 12.5 K | train
1 | seq_0M_encoder        | Sequential       | 27.1 K | train
2 | goutallier_0M_encoder | Sequential       | 5.0 K  | train
3 | output_head           | Sequential       | 204 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:27:43,885] Trial 77 pruned. Trial was pruned at epoch 10.


Best trial: 72. Best value: 1.01424:  79%|███████▉  | 79/100 [23:53<03:39, 10.47s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 14.7 K | train
1 | seq_0M_encoder        | Sequential       | 28.5 K | train
2 | goutallier_0M_encoder | Sequential       | 5.0 K  | train
3 | output_head           | Sequential       | 218 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:27:58,200] Trial 78 finished with value: 1.0304043292999268 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.15000000000000002, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 160, 'static_encoder_dropout_1': 0.4, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.35000000000000003, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.4, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 64, 'goutallier_0M_encoder_dropout_0': 0.45, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 64, 'goutallier_0M_encoder_dropout_1': 0.05, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 480, 'output_head_dropout_0': 0.2, 'output_head_batch_norm_0

Best trial: 72. Best value: 1.01424:  80%|████████  | 80/100 [24:22<05:20, 16.05s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 14.7 K | train
1 | seq_0M_encoder        | Sequential       | 1.9 K  | train
2 | goutallier_0M_encoder | Sequential       | 5.0 K  | train
3 | output_head           | Sequential       | 189 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:28:27,266] Trial 79 pruned. Trial was pruned at epoch 16.


Best trial: 72. Best value: 1.01424:  81%|████████  | 81/100 [24:28<04:06, 12.99s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 16.8 K | train
1 | seq_0M_encoder        | Sequential       | 40.3 K | train
2 | goutallier_0M_encoder | Sequential       | 7.1 K  | train
3 | output_head           | Sequential       | 233 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:28:33,113] Trial 80 pruned. Trial was pruned at epoch 10.


Best trial: 72. Best value: 1.01424:  82%|████████▏ | 82/100 [24:42<04:01, 13.43s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 16.8 K | train
1 | seq_0M_encoder        | Sequential       | 33.7 K | train
2 | goutallier_0M_encoder | Sequential       | 7.1 K  | train
3 | output_head           | Sequential       | 233 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:28:47,561] Trial 81 finished with value: 1.0287643671035767 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.15000000000000002, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 224, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 192, 'seq_0M_encoder_dropout_0': 0.2, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.4, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 64, 'goutallier_0M_encoder_dropout_0': 0.45, 'goutallier_0M_encoder_batch_norm_0': False, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.05, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 448, 'output_head_dropout_0': 0.1, 'output_head_batch_norm_0': True, 'lr':

Best trial: 72. Best value: 1.01424:  83%|████████▎ | 83/100 [24:57<03:54, 13.77s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 16.8 K | train
1 | seq_0M_encoder        | Sequential       | 33.7 K | train
2 | goutallier_0M_encoder | Sequential       | 7.1 K  | train
3 | output_head           | Sequential       | 233 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:29:02,128] Trial 82 finished with value: 1.0154244899749756 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.15000000000000002, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 224, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.2, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.4, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 64, 'goutallier_0M_encoder_dropout_0': 0.5, 'goutallier_0M_encoder_batch_norm_0': False, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.05, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 448, 'output_head_dropout_0': 0.05, 'output_head_batch_norm_0': True, 'lr':

Best trial: 72. Best value: 1.01424:  84%|████████▍ | 84/100 [25:11<03:41, 13.85s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 24.9 K | train
1 | seq_0M_encoder        | Sequential       | 33.7 K | train
2 | goutallier_0M_encoder | Sequential       | 7.1 K  | train
3 | output_head           | Sequential       | 233 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:29:16,153] Trial 83 finished with value: 1.025421380996704 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.1, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 224, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.25, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.4, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 64, 'goutallier_0M_encoder_dropout_0': 0.5, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.1, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 448, 'output_head_dropout_0': 0.05, 'output_head_batch_norm_0': True, 'lr': 1.548033527721849e

Best trial: 72. Best value: 1.01424:  85%|████████▌ | 85/100 [25:24<03:25, 13.70s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 24.9 K | train
1 | seq_0M_encoder        | Sequential       | 33.7 K | train
2 | goutallier_0M_encoder | Sequential       | 7.1 K  | train
3 | output_head           | Sequential       | 233 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:29:29,505] Trial 84 finished with value: 1.018728494644165 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.05, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 224, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.25, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.45, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 64, 'goutallier_0M_encoder_dropout_0': 0.5, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.1, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 448, 'output_head_dropout_0': 0.05, 'output_head_batch_norm_0': True, 'lr': 1.47333774147504

Best trial: 72. Best value: 1.01424:  86%|████████▌ | 86/100 [25:36<03:04, 13.16s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 24.9 K | train
1 | seq_0M_encoder        | Sequential       | 38.9 K | train
2 | goutallier_0M_encoder | Sequential       | 7.1 K  | train
3 | output_head           | Sequential       | 230 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:29:41,426] Trial 85 finished with value: 1.0185856819152832 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.05, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 224, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.2, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.4, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 64, 'goutallier_0M_encoder_dropout_0': 0.5, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.1, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 448, 'output_head_dropout_0': 0.05, 'output_head_batch_norm_0': True, 'lr': 1.553136739585643

Best trial: 72. Best value: 1.01424:  87%|████████▋ | 87/100 [25:49<02:51, 13.22s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 24.9 K | train
1 | seq_0M_encoder        | Sequential       | 33.7 K | train
2 | goutallier_0M_encoder | Sequential       | 7.1 K  | train
3 | output_head           | Sequential       | 217 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:29:54,780] Trial 86 finished with value: 1.0152710676193237 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 224, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.2, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.4, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 64, 'goutallier_0M_encoder_dropout_0': 0.5, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.1, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 416, 'output_head_dropout_0': 0.05, 'output_head_batch_norm_0': True, 'lr': 1.544149209047743e

Best trial: 72. Best value: 1.01424:  88%|████████▊ | 88/100 [26:06<02:52, 14.34s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 24.9 K | train
1 | seq_0M_encoder        | Sequential       | 33.7 K | train
2 | goutallier_0M_encoder | Sequential       | 3.7 K  | train
3 | output_head           | Sequential       | 217 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:30:11,747] Trial 87 finished with value: 1.024205207824707 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 224, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.2, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.45, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 64, 'goutallier_0M_encoder_dropout_0': 0.5, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.1, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 416, 'output_head_dropout_0': 0.0, 'output_head_batch_norm_0': True, 'lr': 1.569754806056948e-

Best trial: 88. Best value: 1.0137:  89%|████████▉ | 89/100 [26:20<02:35, 14.17s/it] Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 24.9 K | train
1 | seq_0M_encoder        | Sequential       | 34.1 K | train
2 | goutallier_0M_encoder | Sequential       | 3.7 K  | train
3 | output_head           | Sequential       | 203 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:30:25,495] Trial 88 finished with value: 1.0136961936950684 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 224, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.2, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.45, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 32, 'goutallier_0M_encoder_dropout_0': 0.5, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.1, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 416, 'output_head_dropout_0': 0.0, 'output_head_batch_norm_0': True, 'lr': 2.6531008279299003

Best trial: 89. Best value: 1.00992:  90%|█████████ | 90/100 [26:30<02:09, 12.95s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 24.9 K | train
1 | seq_0M_encoder        | Sequential       | 40.3 K | train
2 | goutallier_0M_encoder | Sequential       | 3.7 K  | train
3 | output_head           | Sequential       | 217 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:30:35,598] Trial 89 finished with value: 1.009922742843628 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 224, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 192, 'seq_0M_encoder_dropout_0': 0.2, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 160, 'seq_0M_encoder_dropout_1': 0.45, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 32, 'goutallier_0M_encoder_dropout_0': 0.5, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.15000000000000002, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 416, 'output_head_dropout_0': 0.0, 'output_head_batch_norm_0': True, 'lr': 2.7

Best trial: 89. Best value: 1.00992:  91%|█████████ | 91/100 [26:43<01:55, 12.88s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 24.9 K | train
1 | seq_0M_encoder        | Sequential       | 28.5 K | train
2 | goutallier_0M_encoder | Sequential       | 3.7 K  | train
3 | output_head           | Sequential       | 203 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:30:48,318] Trial 90 finished with value: 1.0277363061904907 and parameters: {'batch_size': 64, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 224, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 192, 'seq_0M_encoder_dropout_0': 0.15000000000000002, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.5, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 32, 'goutallier_0M_encoder_dropout_0': 0.5, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.15000000000000002, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 416, 'output_head_dropout_0': 0.0, 'output_head_batch_norm_0': 

Best trial: 89. Best value: 1.00992:  92%|█████████▏| 92/100 [26:52<01:34, 11.84s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 24.9 K | train
1 | seq_0M_encoder        | Sequential       | 39.7 K | train
2 | goutallier_0M_encoder | Sequential       | 3.7 K  | train
3 | output_head           | Sequential       | 219 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:30:57,737] Trial 91 finished with value: 1.0316901206970215 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 224, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.2, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 160, 'seq_0M_encoder_dropout_1': 0.45, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 32, 'goutallier_0M_encoder_dropout_0': 0.5, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.1, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 416, 'output_head_dropout_0': 0.0, 'output_head_batch_norm_0': True, 'lr': 3.9074478464119946

Best trial: 89. Best value: 1.00992:  93%|█████████▎| 93/100 [26:59<01:11, 10.16s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 24.9 K | train
1 | seq_0M_encoder        | Sequential       | 44.1 K | train
2 | goutallier_0M_encoder | Sequential       | 3.7 K  | train
3 | output_head           | Sequential       | 243 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:31:03,982] Trial 92 pruned. Trial was pruned at epoch 10.


Best trial: 89. Best value: 1.00992:  94%|█████████▍| 94/100 [27:07<00:56,  9.48s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 24.9 K | train
1 | seq_0M_encoder        | Sequential       | 28.5 K | train
2 | goutallier_0M_encoder | Sequential       | 3.7 K  | train
3 | output_head           | Sequential       | 219 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:31:11,873] Trial 93 finished with value: 1.0289939641952515 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 224, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.2, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 256, 'seq_0M_encoder_dropout_1': 0.45, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 32, 'goutallier_0M_encoder_dropout_0': 0.45, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.15000000000000002, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 416, 'output_head_dropout_0': 0.0, 'output_head_batch_norm_0': True, 'lr': 6

Best trial: 89. Best value: 1.00992:  95%|█████████▌| 95/100 [27:16<00:46,  9.38s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 24.9 K | train
1 | seq_0M_encoder        | Sequential       | 28.5 K | train
2 | goutallier_0M_encoder | Sequential       | 3.7 K  | train
3 | output_head           | Sequential       | 188 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:31:21,031] Trial 94 finished with value: 1.0233538150787354 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 224, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.15000000000000002, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 160, 'seq_0M_encoder_dropout_1': 0.4, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 32, 'goutallier_0M_encoder_dropout_0': 0.5, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.1, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 448, 'output_head_dropout_0': 0.05, 'output_head_batch_norm_0': True, 'lr': 2.

Best trial: 89. Best value: 1.00992:  96%|█████████▌| 96/100 [27:26<00:39,  9.77s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 24.9 K | train
1 | seq_0M_encoder        | Sequential       | 40.3 K | train
2 | goutallier_0M_encoder | Sequential       | 3.7 K  | train
3 | output_head           | Sequential       | 233 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:31:31,688] Trial 95 finished with value: 1.0260977745056152 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 224, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.15000000000000002, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 160, 'seq_0M_encoder_dropout_1': 0.4, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 32, 'goutallier_0M_encoder_dropout_0': 0.5, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.15000000000000002, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 384, 'output_head_dropout_0': 0.05, 'output_head_batch_norm_0'

Best trial: 89. Best value: 1.00992:  97%|█████████▋| 97/100 [27:32<00:25,  8.55s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 24.9 K | train
1 | seq_0M_encoder        | Sequential       | 38.9 K | train
2 | goutallier_0M_encoder | Sequential       | 3.7 K  | train
3 | output_head           | Sequential       | 248 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:31:37,401] Trial 96 pruned. Trial was pruned at epoch 10.


Best trial: 89. Best value: 1.00992:  98%|█████████▊| 98/100 [27:38<00:15,  7.72s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 64.4 K | train
1 | seq_0M_encoder        | Sequential       | 34.1 K | train
2 | goutallier_0M_encoder | Sequential       | 3.7 K  | train
3 | output_head           | Sequential       | 200 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:31:43,186] Trial 97 pruned. Trial was pruned at epoch 10.


Best trial: 89. Best value: 1.00992:  99%|█████████▉| 99/100 [28:21<00:18, 18.27s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 64.4 K | train
1 | seq_0M_encoder        | Sequential       | 46.9 K | train
2 | goutallier_0M_encoder | Sequential       | 3.7 K  | train
3 | output_head           | Sequential       | 212 K  | train
4 | train_mse             | MeanSquaredError | 0      | train
5 | val_mse               | MeanSquaredError | 0      | tr

[I 2025-11-09 12:32:26,067] Trial 98 finished with value: 1.010851263999939 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 224, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 256, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 192, 'seq_0M_encoder_dropout_0': 0.15000000000000002, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 160, 'seq_0M_encoder_dropout_1': 0.4, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 32, 'goutallier_0M_encoder_dropout_0': 0.5, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.15000000000000002, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 384, 'output_head_dropout_0': 0.5, 'output_head_batch_norm_0': 

Best trial: 89. Best value: 1.00992: 100%|██████████| 100/100 [29:07<00:00, 17.47s/it]
[I 2025-11-09 12:33:12,285] A new study created in memory with name: sequential_mlp2_optimization


[I 2025-11-09 12:33:12,278] Trial 99 finished with value: 1.0233595371246338 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 224, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 256, 'static_encoder_dropout_1': 0.2, 'static_encoder_batch_norm_1': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 224, 'seq_0M_encoder_dropout_0': 0.2, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.1, 'seq_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 32, 'goutallier_0M_encoder_dropout_0': 0.5, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.2, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 2, 'output_head_units_0': 384, 'output_head_dropout_0': 0.45, 'output_head_batch_norm_0': True, 'lr': 1.84917730242617e

  0%|          | 0/100 [00:00<?, ?it/s]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 17.3 K | train
2 | seq_2M_encoder        | Sequential       | 8.8 K  | train
3 | goutallier_0M_encoder | Sequential       | 1.1 K  | train
4 | output_head           | Sequential       | 193 K  | train
5 | train_mse             | MeanSquaredError | 0      | train
6 | val_mse               | MeanSquaredErr

[I 2025-11-09 12:33:49,701] Trial 0 finished with value: 0.9360555410385132 and parameters: {'batch_size': 64, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.25, 'static_encoder_batch_norm_0': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.2, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 160, 'seq_0M_encoder_dropout_1': 0.5, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 128, 'seq_2M_encoder_dropout_0': 0.4, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 32, 'seq_2M_encoder_dropout_1': 0.15000000000000002, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 96, 'seq_2M_encoder_dropout_2': 0.1, 'seq_2M_encoder_batch_norm_2': False, 'goutallier_0M_encoder_n_layers': 1, 'goutallier_0M_encoder_units_0': 96, 'goutallier_0M_encoder_dropout_0': 0.15000000000000002, 'goutallier_0M_encoder_batch_norm_0': True, 'o

Best trial: 0. Best value: 0.936056:   2%|▏         | 2/100 [01:07<53:58, 33.05s/it]  Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 13.8 K | train
1 | seq_0M_encoder        | Sequential       | 70.1 K | train
2 | seq_2M_encoder        | Sequential       | 13.4 K | train
3 | goutallier_0M_encoder | Sequential       | 352    | train
4 | output_head           | Sequential       | 88.7 K | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 12:34:19,693] Trial 1 finished with value: 1.008135199546814 and parameters: {'batch_size': 64, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.45, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 96, 'static_encoder_dropout_1': 0.0, 'static_encoder_batch_norm_1': True, 'static_encoder_units_2': 64, 'static_encoder_dropout_2': 0.35000000000000003, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.45, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 160, 'seq_0M_encoder_dropout_1': 0.35000000000000003, 'seq_0M_encoder_batch_norm_1': False, 'seq_0M_encoder_units_2': 160, 'seq_0M_encoder_dropout_2': 0.4, 'seq_0M_encoder_batch_norm_2': True, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 128, 'seq_2M_encoder_dropout_0': 0.45, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 32, 'seq_2M_encoder_dropout_1': 0

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 2. Best value: 0.900552:   3%|▎         | 3/100 [01:42<54:40, 33.82s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 4.3 K  | train
1 | seq_0M_encoder        | Sequential       | 81.8 K | train
2 | seq_2M_encoder        | Sequential       | 960    | train
3 | goutallier_0M_encoder | Sequential       | 13.7 K | train
4 | output_head           | Sequential       | 119 K  | train
5 | train_m

[I 2025-11-09 12:34:54,423] Trial 2 finished with value: 0.9005518555641174 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 32, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 128, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 64, 'static_encoder_dropout_2': 0.30000000000000004, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 256, 'seq_0M_encoder_dropout_0': 0.5, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 256, 'seq_0M_encoder_dropout_1': 0.30000000000000004, 'seq_0M_encoder_batch_norm_1': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.2, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.25, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 64, 'seq_2M_encoder_dropout_2':

Best trial: 2. Best value: 0.900552:   4%|▍         | 4/100 [02:49<1:15:17, 47.06s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 27.9 K | train
1 | seq_0M_encoder        | Sequential       | 21.8 K | train
2 | seq_2M_encoder        | Sequential       | 1.4 K  | train
3 | goutallier_0M_encoder | Sequential       | 1.4 K  | train
4 | output_head           | Sequential       | 4.2 K  | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 12:36:01,789] Trial 3 finished with value: 1.0516129732131958 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 32, 'static_encoder_dropout_0': 0.4, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 32, 'static_encoder_dropout_1': 0.1, 'static_encoder_batch_norm_1': True, 'static_encoder_units_2': 64, 'static_encoder_dropout_2': 0.35000000000000003, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 256, 'seq_0M_encoder_dropout_0': 0.25, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 160, 'seq_0M_encoder_dropout_1': 0.45, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 224, 'seq_0M_encoder_dropout_2': 0.30000000000000004, 'seq_0M_encoder_batch_norm_2': True, 'seq_2M_encoder_n_layers': 1, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.2, 'seq_2M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_u

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 4. Best value: 0.881974:   5%|▌         | 5/100 [03:20<1:05:24, 41.32s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 8.2 K  | train
1 | seq_0M_encoder        | Sequential       | 46.6 K | train
2 | seq_2M_encoder        | Sequential       | 7.1 K  | train
3 | goutallier_0M_encoder | Sequential       | 13.7 K | train
4 | output_head           | Sequential       | 55.2 K | train
5 | train

[I 2025-11-09 12:36:32,917] Trial 4 finished with value: 0.8819741606712341 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.35000000000000003, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 128, 'static_encoder_dropout_1': 0.15000000000000002, 'static_encoder_batch_norm_1': True, 'static_encoder_units_2': 96, 'static_encoder_dropout_2': 0.25, 'static_encoder_batch_norm_2': False, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 192, 'seq_0M_encoder_dropout_0': 0.30000000000000004, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 64, 'seq_0M_encoder_dropout_1': 0.2, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 96, 'seq_0M_encoder_dropout_2': 0.4, 'seq_0M_encoder_batch_norm_2': True, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 32, 'seq_2M_encoder_dropout_0': 0.45, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 32, 'seq_2M_encoder

Best trial: 4. Best value: 0.881974:   6%|▌         | 6/100 [03:27<46:28, 29.66s/it]  Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 9.0 K  | train
1 | seq_0M_encoder        | Sequential       | 17.1 K | train
2 | seq_2M_encoder        | Sequential       | 9.7 K  | train
3 | goutallier_0M_encoder | Sequential       | 352    | train
4 | output_head           | Sequential       | 61.2 K | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 12:36:39,955] Trial 5 pruned. Trial was pruned at epoch 10.


Best trial: 4. Best value: 0.881974:   7%|▋         | 7/100 [04:25<1:00:05, 38.77s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 6.1 K  | train
1 | seq_0M_encoder        | Sequential       | 21.6 K | train
2 | seq_2M_encoder        | Sequential       | 13.2 K | train
3 | goutallier_0M_encoder | Sequential       | 9.3 K  | train
4 | output_head           | Sequential       | 3.5 K  | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 12:37:37,476] Trial 6 pruned. Trial was pruned at epoch 29.


Best trial: 4. Best value: 0.881974:   8%|▊         | 8/100 [04:32<44:01, 28.72s/it]  Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 13.8 K | train
1 | seq_0M_encoder        | Sequential       | 53.0 K | train
2 | seq_2M_encoder        | Sequential       | 960    | train
3 | goutallier_0M_encoder | Sequential       | 2.8 K  | train
4 | output_head           | Sequential       | 25.5 K | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 12:37:44,669] Trial 7 pruned. Trial was pruned at epoch 10.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 8. Best value: 0.880378:   9%|▉         | 9/100 [05:27<56:04, 36.97s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 12.2 K | train
1 | seq_0M_encoder        | Sequential       | 33.5 K | train
2 | seq_2M_encoder        | Sequential       | 26.5 K | train
3 | goutallier_0M_encoder | Sequential       | 1.4 K  | train
4 | output_head           | Sequential       | 143 K  | train
5 | train_m

[I 2025-11-09 12:38:39,802] Trial 8 finished with value: 0.8803783655166626 and parameters: {'batch_size': 64, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 32, 'static_encoder_dropout_0': 0.2, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 128, 'static_encoder_dropout_1': 0.35000000000000003, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 64, 'static_encoder_dropout_2': 0.30000000000000004, 'static_encoder_batch_norm_2': False, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 192, 'seq_0M_encoder_dropout_0': 0.0, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 128, 'seq_0M_encoder_dropout_1': 0.0, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 192, 'seq_0M_encoder_dropout_2': 0.30000000000000004, 'seq_0M_encoder_batch_norm_2': True, 'seq_2M_encoder_n_layers': 1, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.1, 'seq_2M_encoder_batch_norm_0': False, 'goutallier_0M_encoder_n_layers': 2, 'goutal

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 9. Best value: 0.871476:  10%|█         | 10/100 [07:08<1:25:08, 56.77s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 93.7 K | train
5 | trai

[I 2025-11-09 12:40:20,882] Trial 9 finished with value: 0.8714764714241028 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.25, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 96, 'static_encoder_dropout_1': 0.1, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.45, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 96, 'seq_0M_encoder_dropout_1': 0.05, 'seq_0M_encoder_batch_norm_1': False, 'seq_0M_encoder_units_2': 192, 'seq_0M_encoder_dropout_2': 0.25, 'seq_0M_encoder_batch_norm_2': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 128, 'seq_2M_encoder_dropout_0': 0.30000000000000004, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.0, 'seq_2M_encoder_batch_norm_1': True, 'seq_2M_encoder_units_2': 128, 'seq_2M_encoder_dropout_2': 0.1500000000

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 10. Best value: 0.821219:  11%|█         | 11/100 [08:42<1:41:11, 68.22s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 93.7 K | train
5 | tra

[I 2025-11-09 12:41:55,071] Trial 10 finished with value: 0.8212189674377441 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.1, 'static_encoder_batch_norm_0': False, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.45, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.0, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.0, 'seq_2M_encoder_batch_norm_1': True, 'seq_2M_encoder_units_2': 128, 'seq_2M_encoder_dropout_2': 0.45, 'seq_2M_encoder_batch_norm_2': False, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 128, 'goutallier_0M_encoder_dropout_0': 0.45, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.5, 'goutallier_0M_encoder_batch_norm_1': True, 'output_

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 10. Best value: 0.821219:  12%|█▏        | 12/100 [10:16<1:51:34, 76.08s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 93.7 K | train
5 | tra

[I 2025-11-09 12:43:29,123] Trial 11 finished with value: 0.832429051399231 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.1, 'static_encoder_batch_norm_0': False, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.45, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.0, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.0, 'seq_2M_encoder_batch_norm_1': True, 'seq_2M_encoder_units_2': 128, 'seq_2M_encoder_dropout_2': 0.5, 'seq_2M_encoder_batch_norm_2': False, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 128, 'goutallier_0M_encoder_dropout_0': 0.5, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.5, 'goutallier_0M_encoder_batch_norm_1': True, 'output_hea

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 10. Best value: 0.821219:  13%|█▎        | 13/100 [11:50<1:57:57, 81.35s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 26.0 K | train
3 | goutallier_0M_encoder | Sequential       | 26.7 K | train
4 | output_head           | Sequential       | 99.9 K | train
5 | tra

[I 2025-11-09 12:45:02,590] Trial 12 finished with value: 0.8227124810218811 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.1, 'static_encoder_batch_norm_0': False, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.4, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.0, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.0, 'seq_2M_encoder_batch_norm_1': True, 'seq_2M_encoder_units_2': 128, 'seq_2M_encoder_dropout_2': 0.5, 'seq_2M_encoder_batch_norm_2': False, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 128, 'goutallier_0M_encoder_dropout_0': 0.5, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.5, 'goutallier_0M_encoder_batch_norm_1': True, 'output_hea

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 10. Best value: 0.821219:  14%|█▍        | 14/100 [13:29<2:04:14, 86.68s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 1.4 K  | train
2 | seq_2M_encoder        | Sequential       | 26.2 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 113 K  | train
5 | tra

[I 2025-11-09 12:46:41,601] Trial 13 finished with value: 0.8367344737052917 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.1, 'static_encoder_batch_norm_0': False, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.4, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.0, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.1, 'seq_2M_encoder_batch_norm_1': True, 'seq_2M_encoder_units_2': 128, 'seq_2M_encoder_dropout_2': 0.5, 'seq_2M_encoder_batch_norm_2': False, 'goutallier_0M_encoder_n_layers': 3, 'goutallier_0M_encoder_units_0': 128, 'goutallier_0M_encoder_dropout_0': 0.5, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.5, 'goutallier_0M_encoder_batch_norm_1': True, 'goutallier

Best trial: 10. Best value: 0.821219:  15%|█▌        | 15/100 [14:36<1:54:23, 80.74s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 1.4 K  | train
2 | seq_2M_encoder        | Sequential       | 7.3 K  | train
3 | goutallier_0M_encoder | Sequential       | 10.6 K | train
4 | output_head           | Sequential       | 73.1 K | train
5 | train_mse             | MeanSquaredError | 0      |

[I 2025-11-09 12:47:48,587] Trial 14 pruned. Trial was pruned at epoch 34.


Best trial: 10. Best value: 0.821219:  16%|█▌        | 16/100 [15:19<1:37:22, 69.55s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 10.2 K | train
3 | goutallier_0M_encoder | Sequential       | 9.8 K  | train
4 | output_head           | Sequential       | 130 K  | train
5 | train_mse             | MeanSquaredError | 0      |

[I 2025-11-09 12:48:32,138] Trial 15 finished with value: 0.8881729245185852 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': False, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.2, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.1, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 64, 'seq_2M_encoder_dropout_1': 0.5, 'seq_2M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 96, 'goutallier_0M_encoder_dropout_0': 0.4, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.4, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head_n_layers': 3, 'output_head_units_0': 160, 'output_head_dropout_0': 0.1, 'output_head_batch_norm_0': Fa

Best trial: 10. Best value: 0.821219:  17%|█▋        | 17/100 [15:42<1:16:37, 55.39s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 1.8 K  | train
1 | seq_0M_encoder        | Sequential       | 1.9 K  | train
2 | seq_2M_encoder        | Sequential       | 13.6 K | train
3 | goutallier_0M_encoder | Sequential       | 30.5 K | train
4 | output_head           | Sequential       | 71.5 K | train
5 | train_mse             | MeanSquaredError | 0      |

[I 2025-11-09 12:48:54,597] Trial 16 pruned. Trial was pruned at epoch 10.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 10. Best value: 0.821219:  18%|█▊        | 18/100 [17:15<1:31:15, 66.78s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 5.9 K  | train
1 | seq_0M_encoder        | Sequential       | 1.9 K  | train
2 | seq_2M_encoder        | Sequential       | 640    | train
3 | goutallier_0M_encoder | Sequential       | 17.2 K | train
4 | output_head           | Sequential       | 60.7 K | train
5 | tra

[I 2025-11-09 12:50:27,880] Trial 17 finished with value: 0.8557336330413818 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.05, 'static_encoder_batch_norm_0': False, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.4, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.0, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.0, 'seq_2M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_n_layers': 3, 'goutallier_0M_encoder_units_0': 96, 'goutallier_0M_encoder_dropout_0': 0.4, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 128, 'goutallier_0M_encoder_dropout_1': 0.5, 'goutallier_0M_encoder_batch_norm_1': True, 'goutallier_0M_encoder_units_2': 128, 'goutallier_0M_encoder_dropout_2': 0.5, 'goutallier_0M_encoder_batch_norm_

Best trial: 10. Best value: 0.821219:  19%|█▉        | 19/100 [18:18<1:28:38, 65.67s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 16.0 K | train
2 | seq_2M_encoder        | Sequential       | 34.8 K | train
3 | goutallier_0M_encoder | Sequential       | 4.2 K  | train
4 | output_head           | Sequential       | 181 K  | train
5 | train_mse             | MeanSquaredError | 0      |

[I 2025-11-09 12:51:30,958] Trial 18 pruned. Trial was pruned at epoch 34.


Best trial: 10. Best value: 0.821219:  20%|██        | 20/100 [18:31<1:06:15, 49.69s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 2.7 K  | train
1 | seq_0M_encoder        | Sequential       | 1.4 K  | train
2 | seq_2M_encoder        | Sequential       | 7.3 K  | train
3 | goutallier_0M_encoder | Sequential       | 9.8 K  | train
4 | output_head           | Sequential       | 55.1 K | train
5 | train_mse             | MeanSquaredError | 0      |

[I 2025-11-09 12:51:43,421] Trial 19 pruned. Trial was pruned at epoch 10.


Best trial: 10. Best value: 0.821219:  21%|██        | 21/100 [18:51<53:39, 40.75s/it]  Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 93.7 K | train
5 | train_mse             | MeanSquaredError | 0      |

[I 2025-11-09 12:52:03,320] Trial 20 pruned. Trial was pruned at epoch 10.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 21. Best value: 0.820599:  22%|██▏       | 22/100 [20:25<1:13:47, 56.77s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 26.2 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 87.6 K | train
5 | tra

[I 2025-11-09 12:53:37,439] Trial 21 finished with value: 0.8205987215042114 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.1, 'static_encoder_batch_norm_0': False, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.45, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.0, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.0, 'seq_2M_encoder_batch_norm_1': True, 'seq_2M_encoder_units_2': 128, 'seq_2M_encoder_dropout_2': 0.5, 'seq_2M_encoder_batch_norm_2': False, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 128, 'goutallier_0M_encoder_dropout_0': 0.5, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.5, 'goutallier_0M_encoder_batch_norm_1': True, 'output_he

Best trial: 21. Best value: 0.820599:  23%|██▎       | 23/100 [21:31<1:16:43, 59.78s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 12.0 K | train
1 | seq_0M_encoder        | Sequential       | 1.4 K  | train
2 | seq_2M_encoder        | Sequential       | 23.1 K | train
3 | goutallier_0M_encoder | Sequential       | 18.2 K | train
4 | output_head           | Sequential       | 83.9 K | train
5 | train_mse             | MeanSquaredError | 0      |

[I 2025-11-09 12:54:44,259] Trial 22 pruned. Trial was pruned at epoch 34.


Best trial: 21. Best value: 0.820599:  24%|██▍       | 24/100 [21:54<1:01:30, 48.57s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 26.0 K | train
3 | goutallier_0M_encoder | Sequential       | 10.6 K | train
4 | output_head           | Sequential       | 96.6 K | train
5 | train_mse             | MeanSquaredError | 0      |

[I 2025-11-09 12:55:06,656] Trial 23 pruned. Trial was pruned at epoch 10.


Best trial: 21. Best value: 0.820599:  25%|██▌       | 25/100 [22:48<1:02:53, 50.32s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 2.7 K  | train
1 | seq_0M_encoder        | Sequential       | 31.3 K | train
2 | seq_2M_encoder        | Sequential       | 20.0 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 125 K  | train
5 | train_mse             | MeanSquaredError | 0      |

[I 2025-11-09 12:56:01,067] Trial 24 pruned. Trial was pruned at epoch 29.


Best trial: 21. Best value: 0.820599:  26%|██▌       | 26/100 [23:48<1:05:37, 53.20s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 1.4 K  | train
2 | seq_2M_encoder        | Sequential       | 34.8 K | train
3 | goutallier_0M_encoder | Sequential       | 13.8 K | train
4 | output_head           | Sequential       | 97.8 K | train
5 | train_mse             | MeanSquaredError | 0      |

[I 2025-11-09 12:57:00,998] Trial 25 pruned. Trial was pruned at epoch 29.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 21. Best value: 0.820599:  27%|██▋       | 27/100 [25:32<1:23:10, 68.36s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 5.9 K  | train
1 | seq_0M_encoder        | Sequential       | 26.8 K | train
2 | seq_2M_encoder        | Sequential       | 18.0 K | train
3 | goutallier_0M_encoder | Sequential       | 11.4 K | train
4 | output_head           | Sequential       | 119 K  | train
5 | tra

[I 2025-11-09 12:58:44,709] Trial 26 finished with value: 0.8211435675621033 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': True, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.30000000000000004, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 128, 'seq_2M_encoder_dropout_0': 0.1, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.05, 'seq_2M_encoder_batch_norm_1': True, 'seq_2M_encoder_units_2': 128, 'seq_2M_encoder_dropout_2': 0.45, 'seq_2M_encoder_batch_norm_2': False, 'goutallier_0M_encoder_n_layers': 3, 'goutallier_0M_encoder_units_0': 96, 'goutallier_0M_encoder_dropout_0': 0.35000000000000003, 'goutallier_0M_encoder_batch_norm_0': False, 'goutallier_0M_encoder_units_1': 64, 'goutallier_0M_encoder_dropout_1': 0.35000000000000003, 'gout

Best trial: 21. Best value: 0.820599:  28%|██▊       | 28/100 [26:01<1:07:53, 56.57s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 1.4 K  | train
2 | seq_2M_encoder        | Sequential       | 23.4 K | train
3 | goutallier_0M_encoder | Sequential       | 7.6 K  | train
4 | output_head           | Sequential       | 97.7 K | train
5 | train_mse             | MeanSquaredError | 0      |

[I 2025-11-09 12:59:13,778] Trial 27 pruned. Trial was pruned at epoch 24.


Best trial: 21. Best value: 0.820599:  29%|██▉       | 29/100 [26:08<49:29, 41.82s/it]  Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 1.8 K  | train
1 | seq_0M_encoder        | Sequential       | 27.1 K | train
2 | seq_2M_encoder        | Sequential       | 34.8 K | train
3 | goutallier_0M_encoder | Sequential       | 1.1 K  | train
4 | output_head           | Sequential       | 95.8 K | train
5 | train_mse             | MeanSquaredError | 0      |

[I 2025-11-09 12:59:21,174] Trial 28 pruned. Trial was pruned at epoch 10.


Best trial: 21. Best value: 0.820599:  30%|███       | 30/100 [26:36<43:53, 37.61s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 12.0 K | train
1 | seq_0M_encoder        | Sequential       | 3.4 K  | train
2 | seq_2M_encoder        | Sequential       | 23.4 K | train
3 | goutallier_0M_encoder | Sequential       | 9.3 K  | train
4 | output_head           | Sequential       | 97.4 K | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 12:59:48,983] Trial 29 pruned. Trial was pruned at epoch 13.


Best trial: 21. Best value: 0.820599:  31%|███       | 31/100 [26:49<34:49, 30.28s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 93.7 K | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:00:02,148] Trial 30 pruned. Trial was pruned at epoch 10.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 31. Best value: 0.816701:  32%|███▏      | 32/100 [28:25<56:25, 49.78s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 34.8 K | train
3 | goutallier_0M_encoder | Sequential       | 9.8 K  | train
4 | output_head           | Sequential       | 101 K  | train
5 | train

[I 2025-11-09 13:01:37,432] Trial 31 finished with value: 0.8167008757591248 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.15000000000000002, 'static_encoder_batch_norm_0': True, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.35000000000000003, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.0, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.0, 'seq_2M_encoder_batch_norm_1': True, 'seq_2M_encoder_units_2': 128, 'seq_2M_encoder_dropout_2': 0.45, 'seq_2M_encoder_batch_norm_2': False, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 128, 'goutallier_0M_encoder_dropout_0': 0.45, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.45, 'goutallier_0M_encode

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 31. Best value: 0.816701:  33%|███▎      | 33/100 [30:01<1:11:09, 63.73s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 1.4 K  | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 10.6 K | train
4 | output_head           | Sequential       | 112 K  | train
5 | tra

[I 2025-11-09 13:03:13,695] Trial 32 finished with value: 0.8304494023323059 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.15000000000000002, 'static_encoder_batch_norm_0': True, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.35000000000000003, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 128, 'seq_2M_encoder_dropout_0': 0.05, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.05, 'seq_2M_encoder_batch_norm_1': True, 'seq_2M_encoder_units_2': 128, 'seq_2M_encoder_dropout_2': 0.45, 'seq_2M_encoder_batch_norm_2': False, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 128, 'goutallier_0M_encoder_dropout_0': 0.45, 'goutallier_0M_encoder_batch_norm_0': False, 'goutallier_0M_encoder_units_1': 64, 'goutallier_0M_encoder_dropout_1': 0.45, 'goutallier_0M_en

Best trial: 31. Best value: 0.816701:  34%|███▍      | 34/100 [30:23<56:27, 51.32s/it]  Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 18.2 K | train
4 | output_head           | Sequential       | 99.9 K | train
5 | train_mse             | MeanSquaredError | 0      |

[I 2025-11-09 13:03:36,081] Trial 33 pruned. Trial was pruned at epoch 10.


Best trial: 31. Best value: 0.816701:  35%|███▌      | 35/100 [30:45<45:56, 42.41s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 2.7 K  | train
1 | seq_0M_encoder        | Sequential       | 1.4 K  | train
2 | seq_2M_encoder        | Sequential       | 26.0 K | train
3 | goutallier_0M_encoder | Sequential       | 352    | train
4 | output_head           | Sequential       | 58.7 K | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:03:57,695] Trial 34 pruned. Trial was pruned at epoch 10.


Best trial: 31. Best value: 0.816701:  36%|███▌      | 36/100 [31:17<41:53, 39.27s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 4.8 K  | train
3 | goutallier_0M_encoder | Sequential       | 23.5 K | train
4 | output_head           | Sequential       | 5.0 K  | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:04:29,636] Trial 35 pruned. Trial was pruned at epoch 18.


Best trial: 31. Best value: 0.816701:  37%|███▋      | 37/100 [31:23<30:42, 29.25s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 23.3 K | train
2 | seq_2M_encoder        | Sequential       | 6.6 K  | train
3 | goutallier_0M_encoder | Sequential       | 1.1 K  | train
4 | output_head           | Sequential       | 107 K  | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:04:35,496] Trial 36 pruned. Trial was pruned at epoch 10.


Best trial: 31. Best value: 0.816701:  38%|███▊      | 38/100 [31:45<28:11, 27.28s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 12.2 K | train
1 | seq_0M_encoder        | Sequential       | 1.4 K  | train
2 | seq_2M_encoder        | Sequential       | 960    | train
3 | goutallier_0M_encoder | Sequential       | 9.8 K  | train
4 | output_head           | Sequential       | 41.2 K | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:04:58,184] Trial 37 pruned. Trial was pruned at epoch 10.


Best trial: 31. Best value: 0.816701:  39%|███▉      | 39/100 [32:13<27:42, 27.26s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 22.1 K | train
1 | seq_0M_encoder        | Sequential       | 13.8 K | train
2 | seq_2M_encoder        | Sequential       | 13.9 K | train
3 | goutallier_0M_encoder | Sequential       | 16.0 K | train
4 | output_head           | Sequential       | 162 K  | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:05:25,389] Trial 38 pruned. Trial was pruned at epoch 27.


Best trial: 31. Best value: 0.816701:  40%|████      | 40/100 [32:21<21:33, 21.55s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 2.7 K  | train
1 | seq_0M_encoder        | Sequential       | 25.2 K | train
2 | seq_2M_encoder        | Sequential       | 21.8 K | train
3 | goutallier_0M_encoder | Sequential       | 3.7 K  | train
4 | output_head           | Sequential       | 51.1 K | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:05:33,630] Trial 39 pruned. Trial was pruned at epoch 10.


Best trial: 31. Best value: 0.816701:  41%|████      | 41/100 [32:42<21:10, 21.53s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 93.7 K | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:05:55,115] Trial 40 pruned. Trial was pruned at epoch 10.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 31. Best value: 0.816701:  42%|████▏     | 42/100 [34:17<42:07, 43.58s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 93.7 K | train
5 | train

[I 2025-11-09 13:07:30,126] Trial 41 finished with value: 0.819562554359436 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.1, 'static_encoder_batch_norm_0': False, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.4, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.0, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.0, 'seq_2M_encoder_batch_norm_1': True, 'seq_2M_encoder_units_2': 128, 'seq_2M_encoder_dropout_2': 0.5, 'seq_2M_encoder_batch_norm_2': False, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 128, 'goutallier_0M_encoder_dropout_0': 0.5, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.5, 'goutallier_0M_encoder_batch_norm_1': True, 'output_head

Best trial: 31. Best value: 0.816701:  43%|████▎     | 43/100 [35:03<41:58, 44.18s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 18.2 K | train
4 | output_head           | Sequential       | 105 K  | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:08:15,732] Trial 42 pruned. Trial was pruned at epoch 23.


Best trial: 31. Best value: 0.816701:  44%|████▍     | 44/100 [35:37<38:31, 41.28s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 1.4 K  | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 99.9 K | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:08:50,230] Trial 43 pruned. Trial was pruned at epoch 17.


Best trial: 31. Best value: 0.816701:  45%|████▌     | 45/100 [36:44<44:43, 48.79s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 23.1 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 133 K  | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:09:56,556] Trial 44 pruned. Trial was pruned at epoch 34.


Best trial: 31. Best value: 0.816701:  46%|████▌     | 46/100 [37:05<36:27, 40.51s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 7.8 K  | train
1 | seq_0M_encoder        | Sequential       | 2.4 K  | train
2 | seq_2M_encoder        | Sequential       | 26.2 K | train
3 | goutallier_0M_encoder | Sequential       | 1.4 K  | train
4 | output_head           | Sequential       | 96.6 K | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:10:17,740] Trial 45 pruned. Trial was pruned at epoch 10.


Best trial: 31. Best value: 0.816701:  47%|████▋     | 47/100 [37:25<30:14, 34.23s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 4.8 K  | train
3 | goutallier_0M_encoder | Sequential       | 13.7 K | train
4 | output_head           | Sequential       | 5.4 K  | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:10:37,312] Trial 46 pruned. Trial was pruned at epoch 10.


Best trial: 31. Best value: 0.816701:  48%|████▊     | 48/100 [37:35<23:32, 27.15s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 2.7 K  | train
1 | seq_0M_encoder        | Sequential       | 1.4 K  | train
2 | seq_2M_encoder        | Sequential       | 9.2 K  | train
3 | goutallier_0M_encoder | Sequential       | 9.8 K  | train
4 | output_head           | Sequential       | 62.9 K | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:10:47,960] Trial 47 pruned. Trial was pruned at epoch 20.


Best trial: 31. Best value: 0.816701:  49%|████▉     | 49/100 [37:57<21:38, 25.45s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 12.0 K | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 34.8 K | train
3 | goutallier_0M_encoder | Sequential       | 7.1 K  | train
4 | output_head           | Sequential       | 88.0 K | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:11:09,437] Trial 48 pruned. Trial was pruned at epoch 10.


Best trial: 31. Best value: 0.816701:  50%|█████     | 50/100 [38:19<20:28, 24.56s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 896    | train
1 | seq_0M_encoder        | Sequential       | 18.7 K | train
2 | seq_2M_encoder        | Sequential       | 23.1 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 76.6 K | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:11:31,925] Trial 49 pruned. Trial was pruned at epoch 10.


Best trial: 31. Best value: 0.816701:  51%|█████     | 51/100 [38:31<16:51, 20.64s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 93.7 K | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:11:43,402] Trial 50 pruned. Trial was pruned at epoch 10.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 31. Best value: 0.816701:  52%|█████▏    | 52/100 [40:05<34:14, 42.81s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 20.3 K | train
4 | output_head           | Sequential       | 87.6 K | train
5 | train

[I 2025-11-09 13:13:17,935] Trial 51 finished with value: 0.8312068581581116 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.1, 'static_encoder_batch_norm_0': False, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.4, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.0, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.0, 'seq_2M_encoder_batch_norm_1': True, 'seq_2M_encoder_units_2': 128, 'seq_2M_encoder_dropout_2': 0.5, 'seq_2M_encoder_batch_norm_2': False, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 128, 'goutallier_0M_encoder_dropout_0': 0.5, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.5, 'goutallier_0M_encoder_batch_norm_1': True, 'output_hea

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 31. Best value: 0.816701:  53%|█████▎    | 53/100 [41:46<47:03, 60.07s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 20.3 K | train
4 | output_head           | Sequential       | 73.1 K | train
5 | train

[I 2025-11-09 13:14:58,289] Trial 52 finished with value: 0.8209425806999207 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.1, 'static_encoder_batch_norm_0': False, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.4, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.0, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.0, 'seq_2M_encoder_batch_norm_1': True, 'seq_2M_encoder_units_2': 128, 'seq_2M_encoder_dropout_2': 0.5, 'seq_2M_encoder_batch_norm_2': False, 'goutallier_0M_encoder_n_layers': 3, 'goutallier_0M_encoder_units_0': 128, 'goutallier_0M_encoder_dropout_0': 0.5, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.5, 'goutallier_0M_encoder_batch_norm_1': True, 'goutallier

Best trial: 31. Best value: 0.816701:  54%|█████▍    | 54/100 [42:53<47:48, 62.36s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 1.4 K  | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 20.3 K | train
4 | output_head           | Sequential       | 109 K  | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:16:05,983] Trial 53 pruned. Trial was pruned at epoch 33.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 31. Best value: 0.816701:  55%|█████▌    | 55/100 [44:34<55:23, 73.86s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 63.7 K | train
2 | seq_2M_encoder        | Sequential       | 6.4 K  | train
3 | goutallier_0M_encoder | Sequential       | 23.5 K | train
4 | output_head           | Sequential       | 118 K  | train
5 | train

[I 2025-11-09 13:17:46,694] Trial 54 finished with value: 0.8274044394493103 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.1, 'static_encoder_batch_norm_0': False, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.45, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.05, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.0, 'seq_2M_encoder_batch_norm_1': True, 'seq_2M_encoder_units_2': 128, 'seq_2M_encoder_dropout_2': 0.5, 'seq_2M_encoder_batch_norm_2': False, 'goutallier_0M_encoder_n_layers': 3, 'goutallier_0M_encoder_units_0': 128, 'goutallier_0M_encoder_dropout_0': 0.45, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.5, 'goutallier_0M_encoder_batch_norm_1': True, 'goutall

Best trial: 31. Best value: 0.816701:  56%|█████▌    | 56/100 [44:58<43:15, 58.98s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 1.4 K  | train
2 | seq_2M_encoder        | Sequential       | 26.0 K | train
3 | goutallier_0M_encoder | Sequential       | 17.2 K | train
4 | output_head           | Sequential       | 164 K  | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:18:10,957] Trial 55 pruned. Trial was pruned at epoch 10.


Best trial: 31. Best value: 0.816701:  57%|█████▋    | 57/100 [45:21<34:36, 48.28s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 960    | train
3 | goutallier_0M_encoder | Sequential       | 14.1 K | train
4 | output_head           | Sequential       | 94.8 K | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:18:34,261] Trial 56 pruned. Trial was pruned at epoch 10.


Best trial: 31. Best value: 0.816701:  58%|█████▊    | 58/100 [46:07<33:17, 47.57s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 1.8 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 10.6 K | train
4 | output_head           | Sequential       | 88.0 K | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:19:20,159] Trial 57 pruned. Trial was pruned at epoch 24.


Best trial: 31. Best value: 0.816701:  59%|█████▉    | 59/100 [46:29<27:05, 39.65s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 2.7 K  | train
1 | seq_0M_encoder        | Sequential       | 1.4 K  | train
2 | seq_2M_encoder        | Sequential       | 13.6 K | train
3 | goutallier_0M_encoder | Sequential       | 4.2 K  | train
4 | output_head           | Sequential       | 107 K  | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:19:41,352] Trial 58 pruned. Trial was pruned at epoch 10.


Best trial: 31. Best value: 0.816701:  60%|██████    | 60/100 [46:52<23:11, 34.79s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 19.3 K | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 30.6 K | train
3 | goutallier_0M_encoder | Sequential       | 23.5 K | train
4 | output_head           | Sequential       | 98.4 K | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:20:04,790] Trial 59 pruned. Trial was pruned at epoch 12.


Best trial: 31. Best value: 0.816701:  61%|██████    | 61/100 [47:00<17:20, 26.69s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 93.7 K | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:20:12,590] Trial 60 pruned. Trial was pruned at epoch 10.


Best trial: 31. Best value: 0.816701:  62%|██████▏   | 62/100 [48:23<27:40, 43.70s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 93.7 K | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:21:35,989] Trial 61 pruned. Trial was pruned at epoch 43.


Best trial: 31. Best value: 0.816701:  63%|██████▎   | 63/100 [49:19<29:09, 47.28s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 1.4 K  | train
4 | output_head           | Sequential       | 116 K  | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:22:31,612] Trial 62 pruned. Trial was pruned at epoch 28.


Best trial: 31. Best value: 0.816701:  64%|██████▍   | 64/100 [49:38<23:21, 38.92s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 1.4 K  | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 106 K  | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:22:51,035] Trial 63 pruned. Trial was pruned at epoch 10.


Best trial: 31. Best value: 0.816701:  65%|██████▌   | 65/100 [50:00<19:40, 33.73s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 26.0 K | train
3 | goutallier_0M_encoder | Sequential       | 18.2 K | train
4 | output_head           | Sequential       | 74.1 K | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:23:12,654] Trial 64 pruned. Trial was pruned at epoch 10.


Best trial: 31. Best value: 0.816701:  66%|██████▌   | 66/100 [50:32<18:54, 33.36s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 1.9 K  | train
2 | seq_2M_encoder        | Sequential       | 23.1 K | train
3 | goutallier_0M_encoder | Sequential       | 20.3 K | train
4 | output_head           | Sequential       | 66.9 K | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:23:45,141] Trial 65 pruned. Trial was pruned at epoch 17.


Best trial: 31. Best value: 0.816701:  67%|██████▋   | 67/100 [50:44<14:49, 26.95s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 10.6 K | train
4 | output_head           | Sequential       | 109 K  | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:23:57,148] Trial 66 pruned. Trial was pruned at epoch 10.


Best trial: 31. Best value: 0.816701:  68%|██████▊   | 68/100 [51:06<13:28, 25.26s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 1.4 K  | train
2 | seq_2M_encoder        | Sequential       | 34.8 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 119 K  | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:24:18,463] Trial 67 pruned. Trial was pruned at epoch 10.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 31. Best value: 0.816701:  69%|██████▉   | 69/100 [52:45<24:30, 47.43s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 1.9 K  | train
2 | seq_2M_encoder        | Sequential       | 34.8 K | train
3 | goutallier_0M_encoder | Sequential       | 9.8 K  | train
4 | output_head           | Sequential       | 119 K  | train
5 | train

[I 2025-11-09 13:25:57,625] Trial 68 finished with value: 0.8185876607894897 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.05, 'static_encoder_batch_norm_0': True, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.30000000000000004, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 128, 'seq_2M_encoder_dropout_0': 0.0, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.05, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 128, 'seq_2M_encoder_dropout_2': 0.4, 'seq_2M_encoder_batch_norm_2': False, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 128, 'goutallier_0M_encoder_dropout_0': 0.4, 'goutallier_0M_encoder_batch_norm_0': False, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.45, 'goutallier_0M_encoder_batch_norm_

Best trial: 31. Best value: 0.816701:  70%|███████   | 70/100 [53:07<19:53, 39.78s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 15.4 K | train
1 | seq_0M_encoder        | Sequential       | 59.9 K | train
2 | seq_2M_encoder        | Sequential       | 16.1 K | train
3 | goutallier_0M_encoder | Sequential       | 23.5 K | train
4 | output_head           | Sequential       | 115 K  | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:26:19,537] Trial 69 pruned. Trial was pruned at epoch 10.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 31. Best value: 0.816701:  71%|███████   | 71/100 [55:09<31:13, 64.60s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 34.8 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 100 K  | train
5 | train

[I 2025-11-09 13:28:22,063] Trial 70 finished with value: 0.8262606263160706 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.05, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 128, 'static_encoder_dropout_1': 0.4, 'static_encoder_batch_norm_1': True, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.30000000000000004, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 256, 'seq_0M_encoder_dropout_1': 0.45, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 128, 'seq_0M_encoder_dropout_2': 0.0, 'seq_0M_encoder_batch_norm_2': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 128, 'seq_2M_encoder_dropout_0': 0.0, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 64, 'seq_2M_encoder_dropout_1': 0.05, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 96, 'seq_2M_encoder_dropout_2': 0.4, 'seq_2M_e

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 31. Best value: 0.816701:  72%|███████▏  | 72/100 [56:48<34:58, 74.96s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 1.4 K  | train
2 | seq_2M_encoder        | Sequential       | 34.8 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 106 K  | train
5 | train

[I 2025-11-09 13:30:01,200] Trial 71 finished with value: 0.8333803415298462 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.1, 'static_encoder_batch_norm_0': True, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.35000000000000003, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 128, 'seq_2M_encoder_dropout_0': 0.0, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.0, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 128, 'seq_2M_encoder_dropout_2': 0.45, 'seq_2M_encoder_batch_norm_2': False, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 128, 'goutallier_0M_encoder_dropout_0': 0.45, 'goutallier_0M_encoder_batch_norm_0': False, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.5, 'goutallier_0M_encoder_batch_norm_1

Best trial: 31. Best value: 0.816701:  73%|███████▎  | 73/100 [57:11<26:37, 59.16s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 34.8 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 124 K  | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:30:23,476] Trial 72 pruned. Trial was pruned at epoch 10.


Best trial: 31. Best value: 0.816701:  74%|███████▍  | 74/100 [57:33<20:48, 48.01s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 109 K  | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:30:45,492] Trial 73 pruned. Trial was pruned at epoch 10.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 31. Best value: 0.816701:  75%|███████▌  | 75/100 [59:10<26:06, 62.67s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 3.8 K  | train
2 | seq_2M_encoder        | Sequential       | 17.8 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 112 K  | train
5 | train

[I 2025-11-09 13:32:22,372] Trial 74 finished with value: 0.8230971097946167 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': True, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.30000000000000004, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.25, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.05, 'seq_2M_encoder_batch_norm_1': True, 'seq_2M_encoder_units_2': 128, 'seq_2M_encoder_dropout_2': 0.45, 'seq_2M_encoder_batch_norm_2': False, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 128, 'goutallier_0M_encoder_dropout_0': 0.35000000000000003, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.5, 'goutallier_0M_encode

Best trial: 31. Best value: 0.816701:  76%|███████▌  | 76/100 [59:31<20:06, 50.25s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 2.9 K  | train
2 | seq_2M_encoder        | Sequential       | 34.8 K | train
3 | goutallier_0M_encoder | Sequential       | 9.8 K  | train
4 | output_head           | Sequential       | 115 K  | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:32:43,649] Trial 75 pruned. Trial was pruned at epoch 10.


Best trial: 31. Best value: 0.816701:  77%|███████▋  | 77/100 [59:54<16:07, 42.05s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 1.4 K  | train
2 | seq_2M_encoder        | Sequential       | 26.2 K | train
3 | goutallier_0M_encoder | Sequential       | 7.1 K  | train
4 | output_head           | Sequential       | 93.7 K | train
5 | train_mse             | MeanSquaredError | 0      | t

[I 2025-11-09 13:33:06,563] Trial 76 pruned. Trial was pruned at epoch 10.


Best trial: 31. Best value: 0.816701:  78%|███████▊  | 78/100 [1:00:05<11:58, 32.66s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 23.1 K | train
3 | goutallier_0M_encoder | Sequential       | 13.7 K | train
4 | output_head           | Sequential       | 41.5 K | train
5 | train_mse             | MeanSquaredError | 0      |

[I 2025-11-09 13:33:17,308] Trial 77 pruned. Trial was pruned at epoch 17.


Best trial: 31. Best value: 0.816701:  79%|███████▉  | 79/100 [1:00:16<09:09, 26.19s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 1.4 K  | train
2 | seq_2M_encoder        | Sequential       | 34.8 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 147 K  | train
5 | train_mse             | MeanSquaredError | 0      |

[I 2025-11-09 13:33:28,388] Trial 78 pruned. Trial was pruned at epoch 10.


Best trial: 31. Best value: 0.816701:  80%|████████  | 80/100 [1:00:38<08:21, 25.05s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 2.7 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 9.8 K  | train
4 | output_head           | Sequential       | 58.7 K | train
5 | train_mse             | MeanSquaredError | 0      |

[I 2025-11-09 13:33:50,792] Trial 79 pruned. Trial was pruned at epoch 10.


Best trial: 31. Best value: 0.816701:  81%|████████  | 81/100 [1:01:08<08:26, 26.67s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 1.4 K  | train
4 | output_head           | Sequential       | 132 K  | train
5 | train_mse             | MeanSquaredError | 0      |

[I 2025-11-09 13:34:21,249] Trial 80 pruned. Trial was pruned at epoch 16.


Best trial: 31. Best value: 0.816701:  82%|████████▏ | 82/100 [1:02:14<11:28, 38.23s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 109 K  | train
5 | train_mse             | MeanSquaredError | 0      |

[I 2025-11-09 13:35:26,439] Trial 81 pruned. Trial was pruned at epoch 35.


Best trial: 31. Best value: 0.816701:  83%|████████▎ | 83/100 [1:02:35<09:23, 33.15s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 109 K  | train
5 | train_mse             | MeanSquaredError | 0      |

[I 2025-11-09 13:35:47,754] Trial 82 pruned. Trial was pruned at epoch 10.


Best trial: 31. Best value: 0.816701:  84%|████████▍ | 84/100 [1:03:41<11:28, 43.03s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 93.7 K | train
5 | train_mse             | MeanSquaredError | 0      |

[I 2025-11-09 13:36:53,822] Trial 83 pruned. Trial was pruned at epoch 33.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 31. Best value: 0.816701:  85%|████████▌ | 85/100 [1:05:19<14:50, 59.38s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 1.4 K  | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 123 K  | train
5 | tra

[I 2025-11-09 13:38:31,353] Trial 84 finished with value: 0.8244006037712097 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': True, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.30000000000000004, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.30000000000000004, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.05, 'seq_2M_encoder_batch_norm_1': True, 'seq_2M_encoder_units_2': 128, 'seq_2M_encoder_dropout_2': 0.5, 'seq_2M_encoder_batch_norm_2': False, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 128, 'goutallier_0M_encoder_dropout_0': 0.4, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.5, 'goutallier_0M_encoder_

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 31. Best value: 0.816701:  86%|████████▌ | 86/100 [1:06:55<16:28, 70.62s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 1.4 K  | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 17.2 K | train
4 | output_head           | Sequential       | 100 K  | train
5 | tra

[I 2025-11-09 13:40:08,212] Trial 85 finished with value: 0.8167228698730469 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.05, 'static_encoder_batch_norm_0': True, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.35000000000000003, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.0, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.1, 'seq_2M_encoder_batch_norm_1': True, 'seq_2M_encoder_units_2': 128, 'seq_2M_encoder_dropout_2': 0.45, 'seq_2M_encoder_batch_norm_2': False, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 128, 'goutallier_0M_encoder_dropout_0': 0.35000000000000003, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.45, 'goutallier_0M_encode

Best trial: 31. Best value: 0.816701:  87%|████████▋ | 87/100 [1:08:20<16:14, 74.92s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 1.9 K  | train
2 | seq_2M_encoder        | Sequential       | 26.0 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 131 K  | train
5 | train_mse             | MeanSquaredError | 0      |

[I 2025-11-09 13:41:33,174] Trial 86 pruned. Trial was pruned at epoch 41.


Best trial: 31. Best value: 0.816701:  88%|████████▊ | 88/100 [1:08:42<11:46, 58.91s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 8.4 K  | train
1 | seq_0M_encoder        | Sequential       | 1.4 K  | train
2 | seq_2M_encoder        | Sequential       | 1.3 K  | train
3 | goutallier_0M_encoder | Sequential       | 3.7 K  | train
4 | output_head           | Sequential       | 119 K  | train
5 | train_mse             | MeanSquaredError | 0      |

[I 2025-11-09 13:41:54,721] Trial 87 pruned. Trial was pruned at epoch 10.


Best trial: 31. Best value: 0.816701:  89%|████████▉ | 89/100 [1:09:04<08:46, 47.89s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 1.4 K  | train
2 | seq_2M_encoder        | Sequential       | 26.2 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 69.0 K | train
5 | train_mse             | MeanSquaredError | 0      |

[I 2025-11-09 13:42:16,891] Trial 88 pruned. Trial was pruned at epoch 10.


Best trial: 31. Best value: 0.816701:  90%|█████████ | 90/100 [1:09:26<06:40, 40.10s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 17.9 K | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 26.7 K | train
4 | output_head           | Sequential       | 106 K  | train
5 | train_mse             | MeanSquaredError | 0      |

[I 2025-11-09 13:42:38,803] Trial 89 finished with value: 0.8669764995574951 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.1, 'static_encoder_batch_norm_0': False, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.4, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.05, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.0, 'seq_2M_encoder_batch_norm_1': True, 'seq_2M_encoder_units_2': 96, 'seq_2M_encoder_dropout_2': 0.4, 'seq_2M_encoder_batch_norm_2': False, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 128, 'goutallier_0M_encoder_dropout_0': 0.5, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.45, 'goutallier_0M_encoder_batch_norm_1': True, 'output_hea

Best trial: 31. Best value: 0.816701:  91%|█████████ | 91/100 [1:09:40<04:50, 32.24s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 109 K  | train
5 | train_mse             | MeanSquaredError | 0      |

[I 2025-11-09 13:42:52,706] Trial 90 pruned. Trial was pruned at epoch 20.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 91. Best value: 0.815697:  92%|█████████▏| 92/100 [1:11:16<06:51, 51.48s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 109 K  | train
5 | tra

[I 2025-11-09 13:44:29,101] Trial 91 finished with value: 0.815697431564331 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': True, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.30000000000000004, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.0, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.05, 'seq_2M_encoder_batch_norm_1': True, 'seq_2M_encoder_units_2': 128, 'seq_2M_encoder_dropout_2': 0.45, 'seq_2M_encoder_batch_norm_2': False, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 128, 'goutallier_0M_encoder_dropout_0': 0.35000000000000003, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.5, 'goutallier_0M_encoder_

Best trial: 91. Best value: 0.815697:  93%|█████████▎| 93/100 [1:12:38<07:02, 60.42s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 109 K  | train
5 | train_mse             | MeanSquaredError | 0      |

[I 2025-11-09 13:45:50,360] Trial 92 finished with value: 0.81844562292099 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.05, 'static_encoder_batch_norm_0': True, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.35000000000000003, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.0, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.05, 'seq_2M_encoder_batch_norm_1': True, 'seq_2M_encoder_units_2': 128, 'seq_2M_encoder_dropout_2': 0.35000000000000003, 'seq_2M_encoder_batch_norm_2': False, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 128, 'goutallier_0M_encoder_dropout_0': 0.30000000000000004, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.45, 'goutal

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 93. Best value: 0.810839:  94%|█████████▍| 94/100 [1:14:14<07:08, 71.34s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 109 K  | train
5 | tra

[I 2025-11-09 13:47:27,178] Trial 93 finished with value: 0.8108391165733337 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.05, 'static_encoder_batch_norm_0': True, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.35000000000000003, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.0, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.05, 'seq_2M_encoder_batch_norm_1': True, 'seq_2M_encoder_units_2': 128, 'seq_2M_encoder_dropout_2': 0.35000000000000003, 'seq_2M_encoder_batch_norm_2': False, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 128, 'goutallier_0M_encoder_dropout_0': 0.25, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.45, 'goutallier_0M_encod

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 93. Best value: 0.810839:  95%|█████████▌| 95/100 [1:15:51<06:33, 78.79s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 124 K  | train
5 | tra

[I 2025-11-09 13:49:03,359] Trial 94 finished with value: 0.8171094059944153 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': True, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.35000000000000003, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.0, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.1, 'seq_2M_encoder_batch_norm_1': True, 'seq_2M_encoder_units_2': 128, 'seq_2M_encoder_dropout_2': 0.35000000000000003, 'seq_2M_encoder_batch_norm_2': False, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 128, 'goutallier_0M_encoder_dropout_0': 0.25, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.45, 'goutallier_0M_encoder

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 93. Best value: 0.810839:  96%|█████████▌| 96/100 [1:17:27<05:36, 84.23s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 124 K  | train
5 | tra

[I 2025-11-09 13:50:40,266] Trial 95 finished with value: 0.8109049797058105 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.05, 'static_encoder_batch_norm_0': True, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.35000000000000003, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.0, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.15000000000000002, 'seq_2M_encoder_batch_norm_1': True, 'seq_2M_encoder_units_2': 128, 'seq_2M_encoder_dropout_2': 0.35000000000000003, 'seq_2M_encoder_batch_norm_2': False, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 128, 'goutallier_0M_encoder_dropout_0': 0.25, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.45, 'gout

Best trial: 93. Best value: 0.810839:  97%|█████████▋| 97/100 [1:17:49<03:16, 65.40s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 124 K  | train
5 | train_mse             | MeanSquaredError | 0      |

[I 2025-11-09 13:51:01,740] Trial 96 pruned. Trial was pruned at epoch 10.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 93. Best value: 0.810839:  98%|█████████▊| 98/100 [1:19:26<02:29, 74.89s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 2.4 K  | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 149 K  | train
5 | tra

[I 2025-11-09 13:52:38,788] Trial 97 finished with value: 0.8189512491226196 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': True, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.35000000000000003, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.05, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.1, 'seq_2M_encoder_batch_norm_1': True, 'seq_2M_encoder_units_2': 128, 'seq_2M_encoder_dropout_2': 0.35000000000000003, 'seq_2M_encoder_batch_norm_2': False, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 128, 'goutallier_0M_encoder_dropout_0': 0.2, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.45, 'goutallier_0M_encoder

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 93. Best value: 0.810839:  99%|█████████▉| 99/100 [1:21:04<01:21, 81.80s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 2.4 K  | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 166 K  | train
5 | tra

[I 2025-11-09 13:54:16,717] Trial 98 finished with value: 0.8154956102371216 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': True, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.35000000000000003, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.05, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.1, 'seq_2M_encoder_batch_norm_1': True, 'seq_2M_encoder_units_2': 128, 'seq_2M_encoder_dropout_2': 0.35000000000000003, 'seq_2M_encoder_batch_norm_2': False, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 128, 'goutallier_0M_encoder_dropout_0': 0.2, 'goutallier_0M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_units_1': 96, 'goutallier_0M_encoder_dropout_1': 0.4, 'goutallier_0M_encoder

Best trial: 93. Best value: 0.810839: 100%|██████████| 100/100 [1:21:26<00:00, 48.86s/it]
[I 2025-11-09 13:54:38,309] A new study created in memory with name: sequential_mlp3_optimization


[I 2025-11-09 13:54:38,302] Trial 99 pruned. Trial was pruned at epoch 10.

[Model 2] 최적화 완료!
최고 성능: 0.810839
최적 파라미터:
  batch_size: 32
  static_encoder_n_layers: 1
  static_encoder_units_0: 128
  static_encoder_dropout_0: 0.05
  static_encoder_batch_norm_0: True
  seq_0M_encoder_n_layers: 1
  seq_0M_encoder_units_0: 64
  seq_0M_encoder_dropout_0: 0.35000000000000003
  seq_0M_encoder_batch_norm_0: False
  seq_2M_encoder_n_layers: 3
  seq_2M_encoder_units_0: 96
  seq_2M_encoder_dropout_0: 0.0
  seq_2M_encoder_batch_norm_0: False
  seq_2M_encoder_units_1: 128
  seq_2M_encoder_dropout_1: 0.05
  seq_2M_encoder_batch_norm_1: True
  seq_2M_encoder_units_2: 128
  seq_2M_encoder_dropout_2: 0.35000000000000003
  seq_2M_encoder_batch_norm_2: False
  goutallier_0M_encoder_n_layers: 2
  goutallier_0M_encoder_units_0: 128
  goutallier_0M_encoder_dropout_0: 0.25
  goutallier_0M_encoder_batch_norm_0: True
  goutallier_0M_encoder_units_1: 96
  goutallier_0M_encoder_dropout_1: 0.45
  goutallier_0M_enco

  0%|          | 0/100 [00:00<?, ?it/s]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 16.2 K | train
1 | seq_0M_encoder        | Sequential       | 1.9 K  | train
2 | seq_2M_encoder        | Sequential       | 2.6 K  | train
3 | seq_3M_encoder        | Sequential       | 53.4 K | train
4 | goutallier_0M_encoder | Sequential       | 1.5 K  | train
5 | output_head           | Sequential       | 25.5 K | train
6 | train_mse             | MeanSquaredErr

[I 2025-11-09 13:55:11,921] Trial 0 finished with value: 0.9742297530174255 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.05, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 96, 'static_encoder_dropout_1': 0.0, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.30000000000000004, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 32, 'seq_2M_encoder_dropout_0': 0.35000000000000003, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 64, 'seq_2M_encoder_dropout_1': 0.15000000000000002, 'seq_2M_encoder_batch_norm_1': True, 'seq_3M_encoder_n_layers': 3, 'seq_3M_encoder_units_0': 160, 'seq_3M_encoder_dropout_0': 0.35000000000000003, 'seq_3M_encoder_batch_norm_0': False, 'seq_3M_encoder_units_1': 224, 'seq_3M_encoder_dropout_1': 0.25, 'seq_3M_encoder_batch_norm_1': 

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 0. Best value: 0.97423:   2%|▏         | 2/100 [01:32<1:19:01, 48.39s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 5.4 K  | train
1 | seq_0M_encoder        | Sequential       | 1.4 K  | train
2 | seq_2M_encoder        | Sequential       | 13.2 K | train
3 | seq_3M_encoder        | Sequential       | 2.9 K  | train
4 | goutallier_0M_encoder | Sequential       | 2.8 K  | train
5 | output

[I 2025-11-09 13:56:10,648] Trial 1 finished with value: 1.046046495437622 and parameters: {'batch_size': 64, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.45, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.1, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 64, 'static_encoder_dropout_2': 0.0, 'static_encoder_batch_norm_2': False, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.45, 'seq_0M_encoder_batch_norm_0': True, 'seq_2M_encoder_n_layers': 1, 'seq_2M_encoder_units_0': 128, 'seq_2M_encoder_dropout_0': 0.05, 'seq_2M_encoder_batch_norm_0': True, 'seq_3M_encoder_n_layers': 2, 'seq_3M_encoder_units_0': 64, 'seq_3M_encoder_dropout_0': 0.2, 'seq_3M_encoder_batch_norm_0': True, 'seq_3M_encoder_units_1': 64, 'seq_3M_encoder_dropout_1': 0.0, 'seq_3M_encoder_batch_norm_1': False, 'goutallier_0M_encoder_n_layers': 3, 'goutallier_0M_encoder_un

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 2. Best value: 0.912667:   3%|▎         | 3/100 [02:33<1:27:39, 54.22s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 15.4 K | train
1 | seq_0M_encoder        | Sequential       | 2.9 K  | train
2 | seq_2M_encoder        | Sequential       | 4.1 K  | train
3 | seq_3M_encoder        | Sequential       | 25.2 K | train
4 | goutallier_0M_encoder | Sequential       | 19.8 K | train
5 | outpu

[I 2025-11-09 13:57:11,804] Trial 2 finished with value: 0.9126667976379395 and parameters: {'batch_size': 64, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 32, 'static_encoder_dropout_0': 0.05, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 128, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.15000000000000002, 'seq_0M_encoder_batch_norm_0': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 32, 'seq_2M_encoder_dropout_0': 0.4, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.05, 'seq_2M_encoder_batch_norm_1': True, 'seq_2M_encoder_units_2': 96, 'seq_2M_encoder_dropout_2': 0.1, 'seq_2M_encoder_batch_norm_2': True, 'seq_3M_encoder_n_layers': 1, 'seq_3M_encoder_units_0': 192, 'seq_3M_encoder_dropout_0': 0.15000000000000002, 'seq_3M_encoder_batch_norm_0': False, 'goutallier_0M_encoder_n_layer

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 2. Best value: 0.912667:   4%|▍         | 4/100 [03:08<1:14:29, 46.56s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 15.4 K | train
1 | seq_0M_encoder        | Sequential       | 26.8 K | train
2 | seq_2M_encoder        | Sequential       | 13.6 K | train
3 | seq_3M_encoder        | Sequential       | 28.7 K | train
4 | goutallier_0M_encoder | Sequential       | 704    | train
5 | outpu

[I 2025-11-09 13:57:46,627] Trial 3 finished with value: 1.035427212715149 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.15000000000000002, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 128, 'static_encoder_dropout_1': 0.25, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 192, 'seq_0M_encoder_dropout_0': 0.4, 'seq_0M_encoder_batch_norm_0': True, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.30000000000000004, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 32, 'seq_2M_encoder_dropout_1': 0.0, 'seq_2M_encoder_batch_norm_1': False, 'seq_3M_encoder_n_layers': 2, 'seq_3M_encoder_units_0': 224, 'seq_3M_encoder_dropout_0': 0.25, 'seq_3M_encoder_batch_norm_0': False, 'seq_3M_encoder_units_1': 96, 'seq_3M_encoder_dropout_1': 0.15000000000000002, 'seq_3M_encoder_batch_norm_1': True, 'goutallier_

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 2. Best value: 0.912667:   5%|▌         | 5/100 [05:04<1:53:26, 71.65s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 11.4 K | train
1 | seq_0M_encoder        | Sequential       | 46.6 K | train
2 | seq_2M_encoder        | Sequential       | 13.2 K | train
3 | seq_3M_encoder        | Sequential       | 90.6 K | train
4 | goutallier_0M_encoder | Sequential       | 1.4 K  | train
5 | outpu

[I 2025-11-09 13:59:42,770] Trial 4 finished with value: 0.9844875335693359 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.45, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 96, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 32, 'static_encoder_dropout_2': 0.5, 'static_encoder_batch_norm_2': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.35000000000000003, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 256, 'seq_0M_encoder_dropout_1': 0.05, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.30000000000000004, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.45, 'seq_2M_encoder_batch_norm_1': True, 'seq_2M_encoder_units_2': 32, 'seq_2M_encoder_dropout_2': 0.

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 2. Best value: 0.912667:   6%|▌         | 6/100 [07:11<2:21:43, 90.47s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 2.7 K  | train
1 | seq_0M_encoder        | Sequential       | 18.7 K | train
2 | seq_2M_encoder        | Sequential       | 23.4 K | train
3 | seq_3M_encoder        | Sequential       | 59.0 K | train
4 | goutallier_0M_encoder | Sequential       | 4.8 K  | train
5 | outpu

[I 2025-11-09 14:01:49,758] Trial 5 finished with value: 0.985063910484314 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.30000000000000004, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 96, 'static_encoder_dropout_1': 0.2, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 32, 'static_encoder_dropout_2': 0.30000000000000004, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 192, 'seq_0M_encoder_dropout_0': 0.45, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.4, 'seq_0M_encoder_batch_norm_1': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 32, 'seq_2M_encoder_dropout_0': 0.0, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.15000000000000002, 'seq_2M_encoder_batch_norm_1': True, 'seq_2M_encoder_units_2': 64, 'seq_2M_encoder

Best trial: 2. Best value: 0.912667:   7%|▋         | 7/100 [09:03<2:31:18, 97.62s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 7.3 K  | train
1 | seq_0M_encoder        | Sequential       | 3.8 K  | train
2 | seq_2M_encoder        | Sequential       | 22.2 K | train
3 | seq_3M_encoder        | Sequential       | 17.3 K | train
4 | goutallier_0M_encoder | Sequential       | 9.8 K  | train
5 | output_head           | Sequential       | 120 K  | t

[I 2025-11-09 14:03:42,102] Trial 6 finished with value: 0.9524717330932617 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.25, 'static_encoder_batch_norm_0': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.1, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 128, 'seq_0M_encoder_dropout_1': 0.05, 'seq_0M_encoder_batch_norm_1': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 128, 'seq_2M_encoder_dropout_0': 0.05, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.1, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 96, 'seq_2M_encoder_dropout_2': 0.25, 'seq_2M_encoder_batch_norm_2': False, 'seq_3M_encoder_n_layers': 3, 'seq_3M_encoder_units_0': 96, 'seq_3M_encoder_dropout_0': 0.1, 'seq_3M_encoder_batch_norm_0': True, 'seq_3M_encoder_units_1': 160, 'seq_3M_encoder_dropout_1'

Best trial: 2. Best value: 0.912667:   8%|▊         | 8/100 [09:12<1:46:03, 69.17s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 16.2 K | train
1 | seq_0M_encoder        | Sequential       | 66.3 K | train
2 | seq_2M_encoder        | Sequential       | 2.6 K  | train
3 | seq_3M_encoder        | Sequential       | 70.6 K | train
4 | goutallier_0M_encoder | Sequential       | 4.8 K  | train
5 | output_head           | Sequential       | 8.8 K  | t

[I 2025-11-09 14:03:50,344] Trial 7 pruned. Trial was pruned at epoch 10.


Best trial: 2. Best value: 0.912667:   9%|▉         | 9/100 [09:20<1:15:53, 50.03s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 19.6 K | train
1 | seq_0M_encoder        | Sequential       | 17.9 K | train
2 | seq_2M_encoder        | Sequential       | 3.9 K  | train
3 | seq_3M_encoder        | Sequential       | 2.9 K  | train
4 | goutallier_0M_encoder | Sequential       | 23.5 K | train
5 | output_head           | Sequential       | 112 K  | t

[I 2025-11-09 14:03:58,311] Trial 8 pruned. Trial was pruned at epoch 10.


Best trial: 2. Best value: 0.912667:  10%|█         | 10/100 [09:39<1:01:05, 40.72s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 896    | train
1 | seq_0M_encoder        | Sequential       | 26.3 K | train
2 | seq_2M_encoder        | Sequential       | 640    | train
3 | seq_3M_encoder        | Sequential       | 1.9 K  | train
4 | goutallier_0M_encoder | Sequential       | 1.1 K  | train
5 | output_head           | Sequential       | 125 K  | 

[I 2025-11-09 14:04:18,186] Trial 9 pruned. Trial was pruned at epoch 24.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 2. Best value: 0.912667:  11%|█         | 11/100 [10:35<1:06:57, 45.15s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 896    | train
1 | seq_0M_encoder        | Sequential       | 26.3 K | train
2 | seq_2M_encoder        | Sequential       | 640    | train
3 | seq_3M_encoder        | Sequential       | 1.9 K  | train
4 | goutallier_0M_encoder | Sequential       | 1.1 K  | train
5 | outp

[I 2025-11-09 14:05:13,357] Trial 10 finished with value: 0.9227583408355713 and parameters: {'batch_size': 64, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 32, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': True, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.0, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.25, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 64, 'seq_0M_encoder_dropout_2': 0.0, 'seq_0M_encoder_batch_norm_2': True, 'seq_2M_encoder_n_layers': 1, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.5, 'seq_2M_encoder_batch_norm_0': True, 'seq_3M_encoder_n_layers': 1, 'seq_3M_encoder_units_0': 128, 'seq_3M_encoder_dropout_0': 0.0, 'seq_3M_encoder_batch_norm_0': False, 'goutallier_0M_encoder_n_layers': 1, 'goutallier_0M_encoder_units_0': 96, 'goutallier_0M_encoder_dropout_0': 0.30000000000000004, 'goutallier_0M_encoder_batch_norm_

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 2. Best value: 0.912667:  12%|█▏        | 12/100 [11:30<1:10:39, 48.18s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 896    | train
1 | seq_0M_encoder        | Sequential       | 27.7 K | train
2 | seq_2M_encoder        | Sequential       | 640    | train
3 | seq_3M_encoder        | Sequential       | 2.4 K  | train
4 | goutallier_0M_encoder | Sequential       | 1.1 K  | train
5 | outp

[I 2025-11-09 14:06:08,472] Trial 11 finished with value: 0.9180286526679993 and parameters: {'batch_size': 64, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 32, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': True, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.0, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.25, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 64, 'seq_0M_encoder_dropout_2': 0.0, 'seq_0M_encoder_batch_norm_2': True, 'seq_2M_encoder_n_layers': 1, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.5, 'seq_2M_encoder_batch_norm_0': True, 'seq_3M_encoder_n_layers': 1, 'seq_3M_encoder_units_0': 128, 'seq_3M_encoder_dropout_0': 0.0, 'seq_3M_encoder_batch_norm_0': False, 'goutallier_0M_encoder_n_layers': 1, 'goutallier_0M_encoder_units_0': 96, 'goutallier_0M_encoder_dropout_0': 0.30000000000000004, 'goutallier_0M_encoder_batch_norm_

Best trial: 2. Best value: 0.912667:  13%|█▎        | 13/100 [11:42<54:01, 37.26s/it]  Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 5.4 K  | train
1 | seq_0M_encoder        | Sequential       | 18.5 K | train
2 | seq_2M_encoder        | Sequential       | 3.7 K  | train
3 | seq_3M_encoder        | Sequential       | 1.9 K  | train
4 | goutallier_0M_encoder | Sequential       | 1.1 K  | train
5 | output_head           | Sequential       | 123 K  | 

[I 2025-11-09 14:06:20,610] Trial 12 pruned. Trial was pruned at epoch 10.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 13. Best value: 0.909465:  14%|█▍        | 14/100 [12:45<1:04:47, 45.21s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 10.4 K | train
1 | seq_0M_encoder        | Sequential       | 10.3 K | train
2 | seq_2M_encoder        | Sequential       | 3.7 K  | train
3 | seq_3M_encoder        | Sequential       | 2.4 K  | train
4 | goutallier_0M_encoder | Sequential       | 2.8 K  | train
5 | out

[I 2025-11-09 14:07:24,182] Trial 13 finished with value: 0.9094647765159607 and parameters: {'batch_size': 64, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 32, 'static_encoder_dropout_0': 0.1, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 128, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': True, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.15000000000000002, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 64, 'seq_0M_encoder_dropout_1': 0.35000000000000003, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 160, 'seq_0M_encoder_dropout_2': 0.5, 'seq_0M_encoder_batch_norm_2': True, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 32, 'seq_2M_encoder_dropout_0': 0.45, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.35000000000000003, 'seq_2M_encoder_batch_norm_1': True, 'seq_3M_encoder_n_layers': 1, 'seq_3M_encoder_u

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 13. Best value: 0.909465:  15%|█▌        | 15/100 [13:49<1:12:02, 50.85s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 5.4 K  | train
1 | seq_0M_encoder        | Sequential       | 33.9 K | train
2 | seq_2M_encoder        | Sequential       | 3.7 K  | train
3 | seq_3M_encoder        | Sequential       | 1.9 K  | train
4 | goutallier_0M_encoder | Sequential       | 7.4 K  | train
5 | out

[I 2025-11-09 14:08:28,115] Trial 14 finished with value: 0.9312784671783447 and parameters: {'batch_size': 64, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.1, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 128, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.2, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 64, 'seq_0M_encoder_dropout_1': 0.35000000000000003, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 32, 'seq_2M_encoder_dropout_0': 0.4, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.35000000000000003, 'seq_2M_encoder_batch_norm_1': True, 'seq_3M_encoder_n_layers': 1, 'seq_3M_encoder_units_0': 160, 'seq_3M_encoder_dropout_0': 0.1, 'seq_3M_encoder_batch_norm_0': False, 'goutallier_0M_encoder_n_layers'

Best trial: 13. Best value: 0.909465:  16%|█▌        | 16/100 [14:03<55:35, 39.70s/it]  Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.1 K  | train
1 | seq_0M_encoder        | Sequential       | 1.4 K  | train
2 | seq_2M_encoder        | Sequential       | 4.8 K  | train
3 | seq_3M_encoder        | Sequential       | 2.9 K  | train
4 | goutallier_0M_encoder | Sequential       | 704    | train
5 | output_head           | Sequential       | 182 K  |

[I 2025-11-09 14:08:41,930] Trial 15 pruned. Trial was pruned at epoch 10.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 13. Best value: 0.909465:  17%|█▋        | 17/100 [15:01<1:02:30, 45.19s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 10.4 K | train
1 | seq_0M_encoder        | Sequential       | 18.0 K | train
2 | seq_2M_encoder        | Sequential       | 15.9 K | train
3 | seq_3M_encoder        | Sequential       | 18.1 K | train
4 | goutallier_0M_encoder | Sequential       | 5.3 K  | train
5 | out

[I 2025-11-09 14:09:39,874] Trial 16 finished with value: 0.9207859635353088 and parameters: {'batch_size': 64, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 32, 'static_encoder_dropout_0': 0.1, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.4, 'static_encoder_batch_norm_1': True, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.1, 'seq_0M_encoder_batch_norm_0': True, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 32, 'seq_2M_encoder_dropout_0': 0.45, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.25, 'seq_2M_encoder_batch_norm_1': True, 'seq_3M_encoder_n_layers': 1, 'seq_3M_encoder_units_0': 192, 'seq_3M_encoder_dropout_0': 0.15000000000000002, 'seq_3M_encoder_batch_norm_0': False, 'goutallier_0M_encoder_n_layers': 1, 'goutallier_0M_encoder_units_0': 64, 'goutallier_0M_encoder_dropout_0': 0.2, 'goutallier_0M_encoder_batch_norm

Best trial: 13. Best value: 0.909465:  18%|█▊        | 18/100 [15:15<48:53, 35.77s/it]  Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 896    | train
1 | seq_0M_encoder        | Sequential       | 35.1 K | train
2 | seq_2M_encoder        | Sequential       | 7.1 K  | train
3 | seq_3M_encoder        | Sequential       | 3.8 K  | train
4 | goutallier_0M_encoder | Sequential       | 1.1 K  | train
5 | output_head           | Sequential       | 96.0 K |

[I 2025-11-09 14:09:53,723] Trial 17 pruned. Trial was pruned at epoch 10.


Best trial: 13. Best value: 0.909465:  19%|█▉        | 19/100 [15:27<38:52, 28.80s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.9 K  | train
1 | seq_0M_encoder        | Sequential       | 1.9 K  | train
2 | seq_2M_encoder        | Sequential       | 2.6 K  | train
3 | seq_3M_encoder        | Sequential       | 61.5 K | train
4 | goutallier_0M_encoder | Sequential       | 5.0 K  | train
5 | output_head           | Sequential       | 155 K  | t

[I 2025-11-09 14:10:06,284] Trial 18 pruned. Trial was pruned at epoch 10.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 19. Best value: 0.907841:  20%|██        | 20/100 [16:30<51:55, 38.94s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.9 K  | train
1 | seq_0M_encoder        | Sequential       | 10.3 K | train
2 | seq_2M_encoder        | Sequential       | 4.9 K  | train
3 | seq_3M_encoder        | Sequential       | 61.5 K | train
4 | goutallier_0M_encoder | Sequential       | 11.9 K | train
5 | outpu

[I 2025-11-09 14:11:08,857] Trial 19 finished with value: 0.9078408479690552 and parameters: {'batch_size': 64, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.05, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 32, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.25, 'seq_0M_encoder_batch_norm_0': True, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 32, 'seq_2M_encoder_dropout_0': 0.45, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 64, 'seq_2M_encoder_dropout_1': 0.25, 'seq_2M_encoder_batch_norm_1': True, 'seq_3M_encoder_n_layers': 2, 'seq_3M_encoder_units_0': 224, 'seq_3M_encoder_dropout_0': 0.05, 'seq_3M_encoder_batch_norm_0': False, 'seq_3M_encoder_units_1': 256, 'seq_3M_encoder_dropout_1': 0.15000000000000002, 'seq_3M_encoder_batch_norm_1': False, 'goutallier_0M_encoder_n_layers': 2, 'gou

Best trial: 19. Best value: 0.907841:  21%|██        | 21/100 [17:01<48:13, 36.63s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.1 K  | train
1 | seq_0M_encoder        | Sequential       | 1.4 K  | train
2 | seq_2M_encoder        | Sequential       | 2.6 K  | train
3 | seq_3M_encoder        | Sequential       | 46.6 K | train
4 | goutallier_0M_encoder | Sequential       | 2.8 K  | train
5 | output_head           | Sequential       | 100 K  | t

[I 2025-11-09 14:11:40,087] Trial 20 pruned. Trial was pruned at epoch 12.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 19. Best value: 0.907841:  22%|██▏       | 22/100 [18:03<57:30, 44.24s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.9 K  | train
1 | seq_0M_encoder        | Sequential       | 1.9 K  | train
2 | seq_2M_encoder        | Sequential       | 3.7 K  | train
3 | seq_3M_encoder        | Sequential       | 3.4 K  | train
4 | goutallier_0M_encoder | Sequential       | 5.0 K  | train
5 | outpu

[I 2025-11-09 14:12:42,083] Trial 21 finished with value: 0.91910719871521 and parameters: {'batch_size': 64, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 32, 'static_encoder_dropout_0': 0.05, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.25, 'seq_0M_encoder_batch_norm_0': True, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 32, 'seq_2M_encoder_dropout_0': 0.45, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 64, 'seq_2M_encoder_dropout_1': 0.35000000000000003, 'seq_2M_encoder_batch_norm_1': True, 'seq_3M_encoder_n_layers': 2, 'seq_3M_encoder_units_0': 192, 'seq_3M_encoder_dropout_0': 0.15000000000000002, 'seq_3M_encoder_batch_norm_0': False, 'seq_3M_encoder_units_1': 224, 'seq_3M_encoder_dropout_1': 0.15000000000000002, 'seq_3M_encoder_batch_norm_1': False, 'goutallier_0

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 19. Best value: 0.907841:  23%|██▎       | 23/100 [19:03<1:02:47, 48.93s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.9 K  | train
1 | seq_0M_encoder        | Sequential       | 2.4 K  | train
2 | seq_2M_encoder        | Sequential       | 3.7 K  | train
3 | seq_3M_encoder        | Sequential       | 3.8 K  | train
4 | goutallier_0M_encoder | Sequential       | 5.0 K  | train
5 | out

[I 2025-11-09 14:13:41,949] Trial 22 finished with value: 0.910356342792511 and parameters: {'batch_size': 64, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.05, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 32, 'static_encoder_dropout_1': 0.4, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.15000000000000002, 'seq_0M_encoder_batch_norm_0': True, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 32, 'seq_2M_encoder_dropout_0': 0.35000000000000003, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.2, 'seq_2M_encoder_batch_norm_1': True, 'seq_3M_encoder_n_layers': 1, 'seq_3M_encoder_units_0': 224, 'seq_3M_encoder_dropout_0': 0.05, 'seq_3M_encoder_batch_norm_0': False, 'goutallier_0M_encoder_n_layers': 2, 'goutallier_0M_encoder_units_0': 64, 'goutallier_0M_encoder_dropout_0': 0.4, 'goutallier_0M_en

Best trial: 19. Best value: 0.907841:  24%|██▍       | 24/100 [19:34<55:12, 43.58s/it]  Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.9 K  | train
1 | seq_0M_encoder        | Sequential       | 1.9 K  | train
2 | seq_2M_encoder        | Sequential       | 1.4 K  | train
3 | seq_3M_encoder        | Sequential       | 3.4 K  | train
4 | goutallier_0M_encoder | Sequential       | 2.6 K  | train
5 | output_head           | Sequential       | 118 K  |

[I 2025-11-09 14:14:13,054] Trial 23 pruned. Trial was pruned at epoch 25.


Best trial: 19. Best value: 0.907841:  25%|██▌       | 25/100 [19:59<47:31, 38.02s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 6.1 K  | train
1 | seq_0M_encoder        | Sequential       | 28.5 K | train
2 | seq_2M_encoder        | Sequential       | 640    | train
3 | seq_3M_encoder        | Sequential       | 20.4 K | train
4 | goutallier_0M_encoder | Sequential       | 1.1 K  | train
5 | output_head           | Sequential       | 183 K  | t

[I 2025-11-09 14:14:38,085] Trial 24 pruned. Trial was pruned at epoch 20.


Best trial: 19. Best value: 0.907841:  26%|██▌       | 26/100 [20:24<41:57, 34.02s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 8.4 K  | train
1 | seq_0M_encoder        | Sequential       | 2.9 K  | train
2 | seq_2M_encoder        | Sequential       | 2.6 K  | train
3 | seq_3M_encoder        | Sequential       | 2.4 K  | train
4 | goutallier_0M_encoder | Sequential       | 7.1 K  | train
5 | output_head           | Sequential       | 190 K  | t

[I 2025-11-09 14:15:02,787] Trial 25 pruned. Trial was pruned at epoch 21.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 19. Best value: 0.907841:  27%|██▋       | 27/100 [21:26<51:32, 42.36s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 1.8 K  | train
1 | seq_0M_encoder        | Sequential       | 1.9 K  | train
2 | seq_2M_encoder        | Sequential       | 640    | train
3 | seq_3M_encoder        | Sequential       | 32.4 K | train
4 | goutallier_0M_encoder | Sequential       | 6.9 K  | train
5 | outpu

[I 2025-11-09 14:16:04,598] Trial 26 finished with value: 0.918494462966919 and parameters: {'batch_size': 64, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.15000000000000002, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 32, 'static_encoder_dropout_1': 0.4, 'static_encoder_batch_norm_1': True, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.2, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 192, 'seq_0M_encoder_dropout_0': 0.30000000000000004, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 32, 'seq_2M_encoder_dropout_0': 0.25, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 64, 'seq_2M_encoder_dropout_1': 0.2, 'seq_2M_encoder_batch_norm_1': True, 'seq_3M_encoder_n_layers': 1, 'seq_3M_encoder_units_0': 160, 'seq_3M_encoder_dropout_0': 0.1, 'seq_3M_encoder_batch_norm_0': True, 'goutallier_0M_encoder_n_layer

Best trial: 19. Best value: 0.907841:  28%|██▊       | 28/100 [22:07<50:17, 41.91s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 9.0 K  | train
1 | seq_0M_encoder        | Sequential       | 7.4 K  | train
2 | seq_2M_encoder        | Sequential       | 4.8 K  | train
3 | seq_3M_encoder        | Sequential       | 31.3 K | train
4 | goutallier_0M_encoder | Sequential       | 1.1 K  | train
5 | output_head           | Sequential       | 182 K  | t

[I 2025-11-09 14:16:45,451] Trial 27 finished with value: 0.9258278608322144 and parameters: {'batch_size': 64, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.2, 'static_encoder_batch_norm_0': True, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.2, 'seq_0M_encoder_batch_norm_0': True, 'seq_2M_encoder_n_layers': 1, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.35000000000000003, 'seq_2M_encoder_batch_norm_0': True, 'seq_3M_encoder_n_layers': 2, 'seq_3M_encoder_units_0': 224, 'seq_3M_encoder_dropout_0': 0.25, 'seq_3M_encoder_batch_norm_0': False, 'seq_3M_encoder_units_1': 128, 'seq_3M_encoder_dropout_1': 0.05, 'seq_3M_encoder_batch_norm_1': False, 'goutallier_0M_encoder_n_layers': 3, 'goutallier_0M_encoder_units_0': 32, 'goutallier_0M_encoder_dropout_0': 0.05, 'goutallier_0M_encoder_batch_norm_0': False, 'goutallier_0M_encoder_units_1': 64, 'goutallier_0M_encoder_dropout_1': 0.15000000000

Best trial: 19. Best value: 0.907841:  29%|██▉       | 29/100 [22:32<43:45, 36.98s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 7.8 K  | train
1 | seq_0M_encoder        | Sequential       | 1.9 K  | train
2 | seq_2M_encoder        | Sequential       | 3.7 K  | train
3 | seq_3M_encoder        | Sequential       | 95.0 K | train
4 | goutallier_0M_encoder | Sequential       | 3.7 K  | train
5 | output_head           | Sequential       | 59.3 K | t

[I 2025-11-09 14:17:10,944] Trial 28 pruned. Trial was pruned at epoch 10.


Best trial: 19. Best value: 0.907841:  30%|███       | 30/100 [22:45<34:41, 29.74s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 2.0 K  | train
1 | seq_0M_encoder        | Sequential       | 85.6 K | train
2 | seq_2M_encoder        | Sequential       | 7.3 K  | train
3 | seq_3M_encoder        | Sequential       | 64.9 K | train
4 | goutallier_0M_encoder | Sequential       | 5.0 K  | train
5 | output_head           | Sequential       | 161 K  | t

[I 2025-11-09 14:17:23,792] Trial 29 pruned. Trial was pruned at epoch 10.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 19. Best value: 0.907841:  31%|███       | 31/100 [23:26<38:02, 33.08s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 5.4 K  | train
1 | seq_0M_encoder        | Sequential       | 1.4 K  | train
2 | seq_2M_encoder        | Sequential       | 16.4 K | train
3 | seq_3M_encoder        | Sequential       | 2.9 K  | train
4 | goutallier_0M_encoder | Sequential       | 2.8 K  | train
5 | outpu

[I 2025-11-09 14:18:04,651] Trial 30 finished with value: 0.9182330965995789 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 32, 'static_encoder_dropout_0': 0.5, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 32, 'static_encoder_dropout_1': 0.30000000000000004, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.30000000000000004, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 256, 'seq_0M_encoder_dropout_1': 0.0, 'seq_0M_encoder_batch_norm_1': False, 'seq_0M_encoder_units_2': 160, 'seq_0M_encoder_dropout_2': 0.30000000000000004, 'seq_0M_encoder_batch_norm_2': True, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.4, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 64, 'seq_2M_encoder_dropout_1': 0.4, 'seq_2M_encoder_batch_norm_1': True, 'seq_3M_encoder_n_layers': 3, 'seq_3M_encode

Best trial: 19. Best value: 0.907841:  32%|███▏      | 32/100 [23:40<30:56, 27.31s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 4.3 K  | train
1 | seq_0M_encoder        | Sequential       | 1.4 K  | train
2 | seq_2M_encoder        | Sequential       | 3.7 K  | train
3 | seq_3M_encoder        | Sequential       | 3.4 K  | train
4 | goutallier_0M_encoder | Sequential       | 2.8 K  | train
5 | output_head           | Sequential       | 162 K  | t

[I 2025-11-09 14:18:18,496] Trial 31 pruned. Trial was pruned at epoch 10.


Best trial: 19. Best value: 0.907841:  33%|███▎      | 33/100 [23:53<25:46, 23.08s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 5.4 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 3.7 K  | train
3 | seq_3M_encoder        | Sequential       | 2.9 K  | train
4 | goutallier_0M_encoder | Sequential       | 5.0 K  | train
5 | output_head           | Sequential       | 162 K  | t

[I 2025-11-09 14:18:31,708] Trial 32 pruned. Trial was pruned at epoch 10.


Best trial: 19. Best value: 0.907841:  34%|███▍      | 34/100 [24:06<22:09, 20.14s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 8.2 K  | train
1 | seq_0M_encoder        | Sequential       | 1.9 K  | train
2 | seq_2M_encoder        | Sequential       | 17.4 K | train
3 | seq_3M_encoder        | Sequential       | 3.4 K  | train
4 | goutallier_0M_encoder | Sequential       | 5.1 K  | train
5 | output_head           | Sequential       | 141 K  | t

[I 2025-11-09 14:18:44,988] Trial 33 pruned. Trial was pruned at epoch 10.


Best trial: 19. Best value: 0.907841:  35%|███▌      | 35/100 [25:09<35:43, 32.98s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 17.6 K | train
1 | seq_0M_encoder        | Sequential       | 1.4 K  | train
2 | seq_2M_encoder        | Sequential       | 3.7 K  | train
3 | seq_3M_encoder        | Sequential       | 3.4 K  | train
4 | goutallier_0M_encoder | Sequential       | 5.0 K  | train
5 | output_head           | Sequential       | 101 K  | t

[I 2025-11-09 14:19:47,943] Trial 34 pruned. Trial was pruned at epoch 47.


Best trial: 19. Best value: 0.907841:  36%|███▌      | 36/100 [25:22<28:47, 26.99s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 5.4 K  | train
1 | seq_0M_encoder        | Sequential       | 1.9 K  | train
2 | seq_2M_encoder        | Sequential       | 9.2 K  | train
3 | seq_3M_encoder        | Sequential       | 2.9 K  | train
4 | goutallier_0M_encoder | Sequential       | 1.1 K  | train
5 | output_head           | Sequential       | 59.8 K | t

[I 2025-11-09 14:20:00,948] Trial 35 pruned. Trial was pruned at epoch 10.


Best trial: 19. Best value: 0.907841:  37%|███▋      | 37/100 [25:44<26:46, 25.50s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 7.3 K  | train
1 | seq_0M_encoder        | Sequential       | 1.4 K  | train
2 | seq_2M_encoder        | Sequential       | 9.0 K  | train
3 | seq_3M_encoder        | Sequential       | 37.0 K | train
4 | goutallier_0M_encoder | Sequential       | 4.8 K  | train
5 | output_head           | Sequential       | 205 K  | t

[I 2025-11-09 14:20:22,975] Trial 36 pruned. Trial was pruned at epoch 10.


Best trial: 19. Best value: 0.907841:  38%|███▊      | 38/100 [26:00<23:19, 22.58s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 16.2 K | train
1 | seq_0M_encoder        | Sequential       | 40.3 K | train
2 | seq_2M_encoder        | Sequential       | 10.5 K | train
3 | seq_3M_encoder        | Sequential       | 3.4 K  | train
4 | goutallier_0M_encoder | Sequential       | 5.0 K  | train
5 | output_head           | Sequential       | 182 K  | t

[I 2025-11-09 14:20:38,723] Trial 37 pruned. Trial was pruned at epoch 10.


Best trial: 19. Best value: 0.907841:  39%|███▉      | 39/100 [26:08<18:27, 18.15s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 9.0 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 5.9 K  | train
3 | seq_3M_encoder        | Sequential       | 7.8 K  | train
4 | goutallier_0M_encoder | Sequential       | 352    | train
5 | output_head           | Sequential       | 4.2 K  | t

[I 2025-11-09 14:20:46,545] Trial 38 pruned. Trial was pruned at epoch 10.


Best trial: 19. Best value: 0.907841:  40%|████      | 40/100 [26:59<28:11, 28.19s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 13.8 K | train
1 | seq_0M_encoder        | Sequential       | 14.5 K | train
2 | seq_2M_encoder        | Sequential       | 13.9 K | train
3 | seq_3M_encoder        | Sequential       | 68.0 K | train
4 | goutallier_0M_encoder | Sequential       | 7.1 K  | train
5 | output_head           | Sequential       | 129 K  | t

[I 2025-11-09 14:21:38,170] Trial 39 pruned. Trial was pruned at epoch 25.


Best trial: 19. Best value: 0.907841:  41%|████      | 41/100 [27:15<24:05, 24.51s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 896    | train
1 | seq_0M_encoder        | Sequential       | 32.5 K | train
2 | seq_2M_encoder        | Sequential       | 640    | train
3 | seq_3M_encoder        | Sequential       | 1.9 K  | train
4 | goutallier_0M_encoder | Sequential       | 1.1 K  | train
5 | output_head           | Sequential       | 136 K  | t

[I 2025-11-09 14:21:54,074] Trial 40 pruned. Trial was pruned at epoch 10.


Best trial: 19. Best value: 0.907841:  42%|████▏     | 42/100 [27:28<20:12, 20.90s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 896    | train
1 | seq_0M_encoder        | Sequential       | 74.1 K | train
2 | seq_2M_encoder        | Sequential       | 640    | train
3 | seq_3M_encoder        | Sequential       | 2.4 K  | train
4 | goutallier_0M_encoder | Sequential       | 1.1 K  | train
5 | output_head           | Sequential       | 183 K  | t

[I 2025-11-09 14:22:06,552] Trial 41 pruned. Trial was pruned at epoch 10.


Best trial: 19. Best value: 0.907841:  43%|████▎     | 43/100 [27:40<17:29, 18.41s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 896    | train
1 | seq_0M_encoder        | Sequential       | 45.0 K | train
2 | seq_2M_encoder        | Sequential       | 320    | train
3 | seq_3M_encoder        | Sequential       | 1.9 K  | train
4 | goutallier_0M_encoder | Sequential       | 1.1 K  | train
5 | output_head           | Sequential       | 121 K  | t

[I 2025-11-09 14:22:19,164] Trial 42 pruned. Trial was pruned at epoch 10.


Best trial: 19. Best value: 0.907841:  44%|████▍     | 44/100 [27:53<15:33, 16.66s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 1.8 K  | train
1 | seq_0M_encoder        | Sequential       | 53.8 K | train
2 | seq_2M_encoder        | Sequential       | 640    | train
3 | seq_3M_encoder        | Sequential       | 1.9 K  | train
4 | goutallier_0M_encoder | Sequential       | 1.4 K  | train
5 | output_head           | Sequential       | 78.8 K | t

[I 2025-11-09 14:22:31,753] Trial 43 pruned. Trial was pruned at epoch 10.


Best trial: 19. Best value: 0.907841:  45%|████▌     | 45/100 [28:00<12:42, 13.86s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 896    | train
1 | seq_0M_encoder        | Sequential       | 45.4 K | train
2 | seq_2M_encoder        | Sequential       | 960    | train
3 | seq_3M_encoder        | Sequential       | 1.4 K  | train
4 | goutallier_0M_encoder | Sequential       | 1.1 K  | train
5 | output_head           | Sequential       | 112 K  | t

[I 2025-11-09 14:22:39,067] Trial 44 pruned. Trial was pruned at epoch 10.


Best trial: 19. Best value: 0.907841:  46%|████▌     | 46/100 [28:13<12:04, 13.42s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 6.1 K  | train
1 | seq_0M_encoder        | Sequential       | 22.1 K | train
2 | seq_2M_encoder        | Sequential       | 13.2 K | train
3 | seq_3M_encoder        | Sequential       | 2.9 K  | train
4 | goutallier_0M_encoder | Sequential       | 4.2 K  | train
5 | output_head           | Sequential       | 167 K  | t

[I 2025-11-09 14:22:51,454] Trial 45 pruned. Trial was pruned at epoch 10.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 46. Best value: 0.900448:  47%|████▋     | 47/100 [29:17<25:22, 28.73s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 6.1 K  | train
1 | seq_0M_encoder        | Sequential       | 39.3 K | train
2 | seq_2M_encoder        | Sequential       | 13.2 K | train
3 | seq_3M_encoder        | Sequential       | 2.9 K  | train
4 | goutallier_0M_encoder | Sequential       | 2.8 K  | train
5 | outpu

[I 2025-11-09 14:23:55,925] Trial 46 finished with value: 0.9004484415054321 and parameters: {'batch_size': 64, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.15000000000000002, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.25, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.2, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 128, 'seq_0M_encoder_dropout_1': 0.35000000000000003, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 96, 'seq_0M_encoder_dropout_2': 0.1, 'seq_0M_encoder_batch_norm_2': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 32, 'seq_2M_encoder_dropout_0': 0.5, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.4, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 64, 'seq_2M_encoder_dropout_2': 0.0

Best trial: 46. Best value: 0.900448:  48%|████▊     | 48/100 [29:57<27:48, 32.09s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.9 K  | train
1 | seq_0M_encoder        | Sequential       | 12.8 K | train
2 | seq_2M_encoder        | Sequential       | 13.2 K | train
3 | seq_3M_encoder        | Sequential       | 52.8 K | train
4 | goutallier_0M_encoder | Sequential       | 4.2 K  | train
5 | output_head           | Sequential       | 105 K  | t

[I 2025-11-09 14:24:35,858] Trial 47 finished with value: 0.9287949800491333 and parameters: {'batch_size': 64, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.15000000000000002, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.05, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.2, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 128, 'seq_0M_encoder_dropout_1': 0.45, 'seq_0M_encoder_batch_norm_1': False, 'seq_0M_encoder_units_2': 192, 'seq_0M_encoder_dropout_2': 0.1, 'seq_0M_encoder_batch_norm_2': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 32, 'seq_2M_encoder_dropout_0': 0.4, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.4, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 64, 'seq_2M_encoder_dropout_2': 0.0, 'seq_2M_

Best trial: 46. Best value: 0.900448:  49%|████▉     | 49/100 [30:11<22:45, 26.77s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 5.9 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 17.4 K | train
3 | seq_3M_encoder        | Sequential       | 3.4 K  | train
4 | goutallier_0M_encoder | Sequential       | 5.6 K  | train
5 | output_head           | Sequential       | 132 K  | t

[I 2025-11-09 14:24:50,195] Trial 48 pruned. Trial was pruned at epoch 10.


Best trial: 46. Best value: 0.900448:  50%|█████     | 50/100 [30:19<17:36, 21.12s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 8.2 K  | train
1 | seq_0M_encoder        | Sequential       | 24.0 K | train
2 | seq_2M_encoder        | Sequential       | 9.0 K  | train
3 | seq_3M_encoder        | Sequential       | 2.4 K  | train
4 | goutallier_0M_encoder | Sequential       | 2.8 K  | train
5 | output_head           | Sequential       | 154 K  | t

[I 2025-11-09 14:24:58,143] Trial 49 pruned. Trial was pruned at epoch 10.


Best trial: 46. Best value: 0.900448:  51%|█████     | 51/100 [30:34<15:40, 19.20s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.1 K  | train
1 | seq_0M_encoder        | Sequential       | 27.3 K | train
2 | seq_2M_encoder        | Sequential       | 4.8 K  | train
3 | seq_3M_encoder        | Sequential       | 1.9 K  | train
4 | goutallier_0M_encoder | Sequential       | 7.4 K  | train
5 | output_head           | Sequential       | 150 K  | t

[I 2025-11-09 14:25:12,847] Trial 50 pruned. Trial was pruned at epoch 10.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 51. Best value: 0.899208:  52%|█████▏    | 52/100 [31:41<26:55, 33.66s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 6.1 K  | train
1 | seq_0M_encoder        | Sequential       | 22.1 K | train
2 | seq_2M_encoder        | Sequential       | 4.8 K  | train
3 | seq_3M_encoder        | Sequential       | 2.4 K  | train
4 | goutallier_0M_encoder | Sequential       | 7.4 K  | train
5 | outpu

[I 2025-11-09 14:26:20,243] Trial 51 finished with value: 0.8992083072662354 and parameters: {'batch_size': 64, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 32, 'static_encoder_dropout_0': 0.05, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.1, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.15000000000000002, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 160, 'seq_0M_encoder_dropout_1': 0.30000000000000004, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 96, 'seq_0M_encoder_dropout_2': 0.05, 'seq_0M_encoder_batch_norm_2': True, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 32, 'seq_2M_encoder_dropout_0': 0.5, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.35000000000000003, 'seq_2M_encoder_batch_norm_1': False, 'seq_3M_encoder_n_layers': 1, 'seq_3M_encode

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 51. Best value: 0.899208:  53%|█████▎    | 53/100 [32:47<33:47, 43.15s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 6.1 K  | train
1 | seq_0M_encoder        | Sequential       | 22.1 K | train
2 | seq_2M_encoder        | Sequential       | 4.8 K  | train
3 | seq_3M_encoder        | Sequential       | 2.4 K  | train
4 | goutallier_0M_encoder | Sequential       | 7.4 K  | train
5 | outpu

[I 2025-11-09 14:27:25,537] Trial 52 finished with value: 0.9050768613815308 and parameters: {'batch_size': 64, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.05, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.1, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.15000000000000002, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 128, 'seq_0M_encoder_dropout_1': 0.30000000000000004, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 96, 'seq_0M_encoder_dropout_2': 0.1, 'seq_0M_encoder_batch_norm_2': True, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 32, 'seq_2M_encoder_dropout_0': 0.5, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.35000000000000003, 'seq_2M_encoder_batch_norm_1': False, 'seq_3M_encoder_n_layers': 1, 'seq_3M_encoder

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 53. Best value: 0.898045:  54%|█████▍    | 54/100 [33:52<38:09, 49.76s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 6.1 K  | train
1 | seq_0M_encoder        | Sequential       | 22.1 K | train
2 | seq_2M_encoder        | Sequential       | 4.8 K  | train
3 | seq_3M_encoder        | Sequential       | 1.9 K  | train
4 | goutallier_0M_encoder | Sequential       | 7.4 K  | train
5 | outpu

[I 2025-11-09 14:28:30,732] Trial 53 finished with value: 0.8980445861816406 and parameters: {'batch_size': 64, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.05, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.1, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.15000000000000002, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 128, 'seq_0M_encoder_dropout_1': 0.30000000000000004, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 96, 'seq_0M_encoder_dropout_2': 0.1, 'seq_0M_encoder_batch_norm_2': True, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 32, 'seq_2M_encoder_dropout_0': 0.5, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.35000000000000003, 'seq_2M_encoder_batch_norm_1': False, 'seq_3M_encoder_n_layers': 1, 'seq_3M_encoder

Best trial: 53. Best value: 0.898045:  55%|█████▌    | 55/100 [34:07<29:24, 39.22s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 9.0 K  | train
1 | seq_0M_encoder        | Sequential       | 32.5 K | train
2 | seq_2M_encoder        | Sequential       | 4.8 K  | train
3 | seq_3M_encoder        | Sequential       | 2.4 K  | train
4 | goutallier_0M_encoder | Sequential       | 7.4 K  | train
5 | output_head           | Sequential       | 172 K  | t

[I 2025-11-09 14:28:45,347] Trial 54 pruned. Trial was pruned at epoch 10.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 55. Best value: 0.896073:  56%|█████▌    | 56/100 [35:12<34:26, 46.97s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 9.0 K  | train
1 | seq_0M_encoder        | Sequential       | 27.3 K | train
2 | seq_2M_encoder        | Sequential       | 4.8 K  | train
3 | seq_3M_encoder        | Sequential       | 2.4 K  | train
4 | goutallier_0M_encoder | Sequential       | 7.4 K  | train
5 | outpu

[I 2025-11-09 14:29:50,412] Trial 55 finished with value: 0.896073043346405 and parameters: {'batch_size': 64, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.1, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.1, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.2, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 160, 'seq_0M_encoder_dropout_1': 0.30000000000000004, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 128, 'seq_0M_encoder_dropout_2': 0.1, 'seq_0M_encoder_batch_norm_2': True, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 32, 'seq_2M_encoder_dropout_0': 0.5, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.4, 'seq_2M_encoder_batch_norm_1': False, 'seq_3M_encoder_n_layers': 1, 'seq_3M_encoder_units_0': 160, 'seq_3M_encoder_d

Best trial: 55. Best value: 0.896073:  57%|█████▋    | 57/100 [35:26<26:41, 37.24s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 9.0 K  | train
1 | seq_0M_encoder        | Sequential       | 32.5 K | train
2 | seq_2M_encoder        | Sequential       | 4.8 K  | train
3 | seq_3M_encoder        | Sequential       | 2.4 K  | train
4 | goutallier_0M_encoder | Sequential       | 7.4 K  | train
5 | output_head           | Sequential       | 161 K  | t

[I 2025-11-09 14:30:04,931] Trial 56 pruned. Trial was pruned at epoch 10.


Best trial: 55. Best value: 0.896073:  58%|█████▊    | 58/100 [35:41<21:18, 30.44s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 9.0 K  | train
1 | seq_0M_encoder        | Sequential       | 45.0 K | train
2 | seq_2M_encoder        | Sequential       | 4.8 K  | train
3 | seq_3M_encoder        | Sequential       | 2.4 K  | train
4 | goutallier_0M_encoder | Sequential       | 7.4 K  | train
5 | output_head           | Sequential       | 166 K  | t

[I 2025-11-09 14:30:19,519] Trial 57 pruned. Trial was pruned at epoch 10.


Best trial: 55. Best value: 0.896073:  59%|█████▉    | 59/100 [36:08<20:03, 29.36s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 9.0 K  | train
1 | seq_0M_encoder        | Sequential       | 26.3 K | train
2 | seq_2M_encoder        | Sequential       | 4.8 K  | train
3 | seq_3M_encoder        | Sequential       | 23.3 K | train
4 | goutallier_0M_encoder | Sequential       | 14.0 K | train
5 | output_head           | Sequential       | 175 K  | t

[I 2025-11-09 14:30:46,349] Trial 58 pruned. Trial was pruned at epoch 10.


Best trial: 55. Best value: 0.896073:  60%|██████    | 60/100 [36:23<16:46, 25.17s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 6.1 K  | train
1 | seq_0M_encoder        | Sequential       | 22.1 K | train
2 | seq_2M_encoder        | Sequential       | 4.8 K  | train
3 | seq_3M_encoder        | Sequential       | 960    | train
4 | goutallier_0M_encoder | Sequential       | 7.4 K  | train
5 | output_head           | Sequential       | 128 K  | t

[I 2025-11-09 14:31:01,738] Trial 59 pruned. Trial was pruned at epoch 10.


Best trial: 55. Best value: 0.896073:  61%|██████    | 61/100 [36:38<14:23, 22.13s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 6.1 K  | train
1 | seq_0M_encoder        | Sequential       | 32.5 K | train
2 | seq_2M_encoder        | Sequential       | 4.8 K  | train
3 | seq_3M_encoder        | Sequential       | 1.9 K  | train
4 | goutallier_0M_encoder | Sequential       | 13.7 K | train
5 | output_head           | Sequential       | 149 K  | t

[I 2025-11-09 14:31:16,791] Trial 60 pruned. Trial was pruned at epoch 10.


Best trial: 55. Best value: 0.896073:  62%|██████▏   | 62/100 [36:53<12:37, 19.92s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 6.1 K  | train
1 | seq_0M_encoder        | Sequential       | 27.3 K | train
2 | seq_2M_encoder        | Sequential       | 4.8 K  | train
3 | seq_3M_encoder        | Sequential       | 1.9 K  | train
4 | goutallier_0M_encoder | Sequential       | 7.4 K  | train
5 | output_head           | Sequential       | 147 K  | t

[I 2025-11-09 14:31:31,552] Trial 61 pruned. Trial was pruned at epoch 10.


Best trial: 55. Best value: 0.896073:  63%|██████▎   | 63/100 [37:07<11:18, 18.34s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 6.1 K  | train
1 | seq_0M_encoder        | Sequential       | 30.9 K | train
2 | seq_2M_encoder        | Sequential       | 4.8 K  | train
3 | seq_3M_encoder        | Sequential       | 1.9 K  | train
4 | goutallier_0M_encoder | Sequential       | 7.4 K  | train
5 | output_head           | Sequential       | 181 K  | t

[I 2025-11-09 14:31:46,214] Trial 62 pruned. Trial was pruned at epoch 10.


Best trial: 55. Best value: 0.896073:  64%|██████▍   | 64/100 [37:22<10:20, 17.23s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 8.2 K  | train
1 | seq_0M_encoder        | Sequential       | 27.7 K | train
2 | seq_2M_encoder        | Sequential       | 4.8 K  | train
3 | seq_3M_encoder        | Sequential       | 2.4 K  | train
4 | goutallier_0M_encoder | Sequential       | 10.6 K | train
5 | output_head           | Sequential       | 147 K  | t

[I 2025-11-09 14:32:00,858] Trial 63 pruned. Trial was pruned at epoch 10.


Best trial: 55. Best value: 0.896073:  65%|██████▌   | 65/100 [37:37<09:34, 16.41s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 9.0 K  | train
1 | seq_0M_encoder        | Sequential       | 22.1 K | train
2 | seq_2M_encoder        | Sequential       | 4.8 K  | train
3 | seq_3M_encoder        | Sequential       | 2.9 K  | train
4 | goutallier_0M_encoder | Sequential       | 7.4 K  | train
5 | output_head           | Sequential       | 172 K  | t

[I 2025-11-09 14:32:15,360] Trial 64 pruned. Trial was pruned at epoch 10.


Best trial: 55. Best value: 0.896073:  66%|██████▌   | 66/100 [37:51<09:00, 15.89s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 12.0 K | train
1 | seq_0M_encoder        | Sequential       | 23.2 K | train
2 | seq_2M_encoder        | Sequential       | 4.8 K  | train
3 | seq_3M_encoder        | Sequential       | 2.4 K  | train
4 | goutallier_0M_encoder | Sequential       | 10.6 K | train
5 | output_head           | Sequential       | 179 K  | t

[I 2025-11-09 14:32:30,035] Trial 65 pruned. Trial was pruned at epoch 10.


Best trial: 55. Best value: 0.896073:  67%|██████▋   | 67/100 [38:06<08:28, 15.41s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 8.2 K  | train
1 | seq_0M_encoder        | Sequential       | 57.9 K | train
2 | seq_2M_encoder        | Sequential       | 4.8 K  | train
3 | seq_3M_encoder        | Sequential       | 27.1 K | train
4 | goutallier_0M_encoder | Sequential       | 16.0 K | train
5 | output_head           | Sequential       | 225 K  | t

[I 2025-11-09 14:32:44,307] Trial 66 pruned. Trial was pruned at epoch 10.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 55. Best value: 0.896073:  68%|██████▊   | 68/100 [39:16<17:05, 32.05s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 8.2 K  | train
1 | seq_0M_encoder        | Sequential       | 57.9 K | train
2 | seq_2M_encoder        | Sequential       | 9.2 K  | train
3 | seq_3M_encoder        | Sequential       | 33.7 K | train
4 | goutallier_0M_encoder | Sequential       | 16.0 K | train
5 | outpu

[I 2025-11-09 14:33:55,204] Trial 67 finished with value: 0.9035511612892151 and parameters: {'batch_size': 64, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.05, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 96, 'static_encoder_dropout_1': 0.15000000000000002, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.2, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.30000000000000004, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 192, 'seq_0M_encoder_dropout_2': 0.2, 'seq_0M_encoder_batch_norm_2': False, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 32, 'seq_2M_encoder_dropout_0': 0.45, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.4, 'seq_2M_encoder_batch_norm_1': False, 'seq_3M_encoder_n_layers': 2, 'seq_3M_encoder_units_0': 12

Best trial: 55. Best value: 0.896073:  69%|██████▉   | 69/100 [39:33<14:06, 27.29s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 8.2 K  | train
1 | seq_0M_encoder        | Sequential       | 43.0 K | train
2 | seq_2M_encoder        | Sequential       | 4.8 K  | train
3 | seq_3M_encoder        | Sequential       | 38.9 K | train
4 | goutallier_0M_encoder | Sequential       | 16.2 K | train
5 | output_head           | Sequential       | 225 K  | t

[I 2025-11-09 14:34:11,384] Trial 68 pruned. Trial was pruned at epoch 12.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 55. Best value: 0.896073:  70%|███████   | 70/100 [40:14<15:43, 31.45s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 8.2 K  | train
1 | seq_0M_encoder        | Sequential       | 43.0 K | train
2 | seq_2M_encoder        | Sequential       | 4.8 K  | train
3 | seq_3M_encoder        | Sequential       | 59.8 K | train
4 | goutallier_0M_encoder | Sequential       | 16.2 K | train
5 | outpu

[I 2025-11-09 14:34:52,545] Trial 69 finished with value: 0.9018399119377136 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.05, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 96, 'static_encoder_dropout_1': 0.1, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.2, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 160, 'seq_0M_encoder_dropout_1': 0.30000000000000004, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 192, 'seq_0M_encoder_dropout_2': 0.1, 'seq_0M_encoder_batch_norm_2': False, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 32, 'seq_2M_encoder_dropout_0': 0.45, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.4, 'seq_2M_encoder_batch_norm_1': False, 'seq_3M_encoder_n_layers': 2, 'seq_3M_encoder_units_0': 160, 'seq_3M_enco

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 55. Best value: 0.896073:  71%|███████   | 71/100 [40:54<16:29, 34.12s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 8.2 K  | train
1 | seq_0M_encoder        | Sequential       | 43.0 K | train
2 | seq_2M_encoder        | Sequential       | 4.8 K  | train
3 | seq_3M_encoder        | Sequential       | 59.8 K | train
4 | goutallier_0M_encoder | Sequential       | 16.2 K | train
5 | outpu

[I 2025-11-09 14:35:32,881] Trial 70 finished with value: 0.9076941013336182 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.05, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 96, 'static_encoder_dropout_1': 0.1, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.2, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 160, 'seq_0M_encoder_dropout_1': 0.30000000000000004, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 192, 'seq_0M_encoder_dropout_2': 0.1, 'seq_0M_encoder_batch_norm_2': False, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 32, 'seq_2M_encoder_dropout_0': 0.5, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.4, 'seq_2M_encoder_batch_norm_1': False, 'seq_3M_encoder_n_layers': 3, 'seq_3M_encoder_units_0': 160, 'seq_3M_enco

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 55. Best value: 0.896073:  72%|███████▏  | 72/100 [41:34<16:46, 35.96s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 8.2 K  | train
1 | seq_0M_encoder        | Sequential       | 43.0 K | train
2 | seq_2M_encoder        | Sequential       | 4.8 K  | train
3 | seq_3M_encoder        | Sequential       | 58.3 K | train
4 | goutallier_0M_encoder | Sequential       | 16.2 K | train
5 | outpu

[I 2025-11-09 14:36:13,140] Trial 71 finished with value: 0.9027490019798279 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.05, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 96, 'static_encoder_dropout_1': 0.1, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.2, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 160, 'seq_0M_encoder_dropout_1': 0.30000000000000004, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 192, 'seq_0M_encoder_dropout_2': 0.1, 'seq_0M_encoder_batch_norm_2': False, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 32, 'seq_2M_encoder_dropout_0': 0.5, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.4, 'seq_2M_encoder_batch_norm_1': False, 'seq_3M_encoder_n_layers': 3, 'seq_3M_encoder_units_0': 160, 'seq_3M_enco

Best trial: 55. Best value: 0.896073:  73%|███████▎  | 73/100 [41:43<12:29, 27.74s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 8.2 K  | train
1 | seq_0M_encoder        | Sequential       | 57.5 K | train
2 | seq_2M_encoder        | Sequential       | 4.8 K  | train
3 | seq_3M_encoder        | Sequential       | 97.0 K | train
4 | goutallier_0M_encoder | Sequential       | 18.4 K | train
5 | output_head           | Sequential       | 231 K  | t

[I 2025-11-09 14:36:21,716] Trial 72 pruned. Trial was pruned at epoch 10.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 55. Best value: 0.896073:  74%|███████▍  | 74/100 [42:23<13:41, 31.59s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 8.2 K  | train
1 | seq_0M_encoder        | Sequential       | 57.5 K | train
2 | seq_2M_encoder        | Sequential       | 4.8 K  | train
3 | seq_3M_encoder        | Sequential       | 97.0 K | train
4 | goutallier_0M_encoder | Sequential       | 18.4 K | train
5 | outpu

[I 2025-11-09 14:37:02,286] Trial 73 finished with value: 0.9019415974617004 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.05, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 96, 'static_encoder_dropout_1': 0.05, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.15000000000000002, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.30000000000000004, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 224, 'seq_0M_encoder_dropout_2': 0.0, 'seq_0M_encoder_batch_norm_2': False, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 32, 'seq_2M_encoder_dropout_0': 0.45, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.45, 'seq_2M_encoder_batch_norm_1': False, 'seq_3M_encoder_n_layers': 3, 'seq_3M_encoder_units_0'

Best trial: 55. Best value: 0.896073:  75%|███████▌  | 75/100 [42:33<10:21, 24.85s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 8.2 K  | train
1 | seq_0M_encoder        | Sequential       | 57.5 K | train
2 | seq_2M_encoder        | Sequential       | 18.0 K | train
3 | seq_3M_encoder        | Sequential       | 90.1 K | train
4 | goutallier_0M_encoder | Sequential       | 18.4 K | train
5 | output_head           | Sequential       | 214 K  | t

[I 2025-11-09 14:37:11,390] Trial 74 pruned. Trial was pruned at epoch 10.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 55. Best value: 0.896073:  76%|███████▌  | 76/100 [43:13<11:49, 29.54s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 8.2 K  | train
1 | seq_0M_encoder        | Sequential       | 48.2 K | train
2 | seq_2M_encoder        | Sequential       | 18.0 K | train
3 | seq_3M_encoder        | Sequential       | 90.1 K | train
4 | goutallier_0M_encoder | Sequential       | 16.2 K | train
5 | outpu

[I 2025-11-09 14:37:51,895] Trial 75 finished with value: 0.8993211388587952 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 96, 'static_encoder_dropout_1': 0.15000000000000002, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.15000000000000002, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.35000000000000003, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 224, 'seq_0M_encoder_dropout_2': 0.0, 'seq_0M_encoder_batch_norm_2': False, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 128, 'seq_2M_encoder_dropout_0': 0.45, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.5, 'seq_2M_encoder_batch_norm_1': False, 'seq_3M_encoder_n_layers': 3, 'seq_3M_en

Best trial: 55. Best value: 0.896073:  77%|███████▋  | 77/100 [43:42<11:16, 29.39s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 12.2 K | train
1 | seq_0M_encoder        | Sequential       | 74.1 K | train
2 | seq_2M_encoder        | Sequential       | 13.6 K | train
3 | seq_3M_encoder        | Sequential       | 90.1 K | train
4 | goutallier_0M_encoder | Sequential       | 18.4 K | train
5 | output_head           | Sequential       | 202 K  | t

[I 2025-11-09 14:38:20,937] Trial 76 pruned. Trial was pruned at epoch 35.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 55. Best value: 0.896073:  78%|███████▊  | 78/100 [44:23<12:04, 32.91s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 8.2 K  | train
1 | seq_0M_encoder        | Sequential       | 76.2 K | train
2 | seq_2M_encoder        | Sequential       | 18.0 K | train
3 | seq_3M_encoder        | Sequential       | 97.4 K | train
4 | goutallier_0M_encoder | Sequential       | 14.1 K | train
5 | outpu

[I 2025-11-09 14:39:02,057] Trial 77 finished with value: 0.9029095768928528 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 96, 'static_encoder_dropout_1': 0.2, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.15000000000000002, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.35000000000000003, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 256, 'seq_0M_encoder_dropout_2': 0.0, 'seq_0M_encoder_batch_norm_2': False, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.45, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.5, 'seq_2M_encoder_batch_norm_1': False, 'seq_3M_encoder_n_layers': 3, 'seq_3M_encoder_units_0': 1

Best trial: 55. Best value: 0.896073:  79%|███████▉  | 79/100 [44:32<09:01, 25.81s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 8.2 K  | train
1 | seq_0M_encoder        | Sequential       | 26.3 K | train
2 | seq_2M_encoder        | Sequential       | 13.6 K | train
3 | seq_3M_encoder        | Sequential       | 71.5 K | train
4 | goutallier_0M_encoder | Sequential       | 16.2 K | train
5 | output_head           | Sequential       | 166 K  | t

[I 2025-11-09 14:39:11,294] Trial 78 pruned. Trial was pruned at epoch 10.


Best trial: 55. Best value: 0.896073:  80%|████████  | 80/100 [44:42<06:56, 20.81s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 8.2 K  | train
1 | seq_0M_encoder        | Sequential       | 53.4 K | train
2 | seq_2M_encoder        | Sequential       | 18.0 K | train
3 | seq_3M_encoder        | Sequential       | 61.4 K | train
4 | goutallier_0M_encoder | Sequential       | 26.7 K | train
5 | output_head           | Sequential       | 239 K  | t

[I 2025-11-09 14:39:20,451] Trial 79 pruned. Trial was pruned at epoch 10.


Best trial: 55. Best value: 0.896073:  81%|████████  | 81/100 [45:04<06:42, 21.19s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 12.2 K | train
1 | seq_0M_encoder        | Sequential       | 63.7 K | train
2 | seq_2M_encoder        | Sequential       | 13.6 K | train
3 | seq_3M_encoder        | Sequential       | 90.1 K | train
4 | goutallier_0M_encoder | Sequential       | 18.4 K | train
5 | output_head           | Sequential       | 202 K  | t

[I 2025-11-09 14:39:42,518] Trial 80 pruned. Trial was pruned at epoch 26.


Best trial: 55. Best value: 0.896073:  82%|████████▏ | 82/100 [45:12<05:14, 17.45s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 12.2 K | train
1 | seq_0M_encoder        | Sequential       | 74.1 K | train
2 | seq_2M_encoder        | Sequential       | 13.6 K | train
3 | seq_3M_encoder        | Sequential       | 82.5 K | train
4 | goutallier_0M_encoder | Sequential       | 18.4 K | train
5 | output_head           | Sequential       | 222 K  | t

[I 2025-11-09 14:39:51,247] Trial 81 pruned. Trial was pruned at epoch 10.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 55. Best value: 0.896073:  83%|████████▎ | 83/100 [45:53<06:55, 24.43s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 12.2 K | train
1 | seq_0M_encoder        | Sequential       | 76.1 K | train
2 | seq_2M_encoder        | Sequential       | 18.0 K | train
3 | seq_3M_encoder        | Sequential       | 89.8 K | train
4 | goutallier_0M_encoder | Sequential       | 16.2 K | train
5 | outpu

[I 2025-11-09 14:40:31,968] Trial 82 finished with value: 0.9006674885749817 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 96, 'static_encoder_dropout_1': 0.25, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.15000000000000002, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.35000000000000003, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 256, 'seq_0M_encoder_dropout_2': 0.0, 'seq_0M_encoder_batch_norm_2': False, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.5, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.5, 'seq_2M_encoder_batch_norm_1': False, 'seq_3M_encoder_n_layers': 3, 'seq_3M_encoder_units_0': 1

Best trial: 55. Best value: 0.896073:  84%|████████▍ | 84/100 [46:07<05:39, 21.24s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 12.2 K | train
1 | seq_0M_encoder        | Sequential       | 59.6 K | train
2 | seq_2M_encoder        | Sequential       | 13.6 K | train
3 | seq_3M_encoder        | Sequential       | 59.8 K | train
4 | goutallier_0M_encoder | Sequential       | 18.4 K | train
5 | output_head           | Sequential       | 206 K  | t

[I 2025-11-09 14:40:45,761] Trial 83 pruned. Trial was pruned at epoch 16.


Best trial: 55. Best value: 0.896073:  85%|████████▌ | 85/100 [46:21<04:45, 19.06s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 12.2 K | train
1 | seq_0M_encoder        | Sequential       | 66.8 K | train
2 | seq_2M_encoder        | Sequential       | 9.2 K  | train
3 | seq_3M_encoder        | Sequential       | 58.7 K | train
4 | goutallier_0M_encoder | Sequential       | 18.4 K | train
5 | output_head           | Sequential       | 198 K  | t

[I 2025-11-09 14:40:59,719] Trial 84 pruned. Trial was pruned at epoch 16.


Best trial: 55. Best value: 0.896073:  86%|████████▌ | 86/100 [46:32<03:51, 16.56s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 8.2 K  | train
1 | seq_0M_encoder        | Sequential       | 37.8 K | train
2 | seq_2M_encoder        | Sequential       | 18.0 K | train
3 | seq_3M_encoder        | Sequential       | 85.6 K | train
4 | goutallier_0M_encoder | Sequential       | 16.2 K | train
5 | output_head           | Sequential       | 181 K  | t

[I 2025-11-09 14:41:10,444] Trial 85 pruned. Trial was pruned at epoch 12.


Best trial: 55. Best value: 0.896073:  87%|████████▋ | 87/100 [46:41<03:07, 14.39s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 8.2 K  | train
1 | seq_0M_encoder        | Sequential       | 64.1 K | train
2 | seq_2M_encoder        | Sequential       | 4.8 K  | train
3 | seq_3M_encoder        | Sequential       | 65.0 K | train
4 | goutallier_0M_encoder | Sequential       | 10.1 K | train
5 | output_head           | Sequential       | 182 K  | t

[I 2025-11-09 14:41:19,781] Trial 86 pruned. Trial was pruned at epoch 10.


Best trial: 55. Best value: 0.896073:  88%|████████▊ | 88/100 [46:50<02:31, 12.64s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 12.0 K | train
1 | seq_0M_encoder        | Sequential       | 34.7 K | train
2 | seq_2M_encoder        | Sequential       | 4.8 K  | train
3 | seq_3M_encoder        | Sequential       | 77.3 K | train
4 | goutallier_0M_encoder | Sequential       | 16.2 K | train
5 | output_head           | Sequential       | 141 K  | t

[I 2025-11-09 14:41:28,332] Trial 87 pruned. Trial was pruned at epoch 10.


Best trial: 55. Best value: 0.896073:  89%|████████▉ | 89/100 [46:59<02:07, 11.57s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 12.2 K | train
1 | seq_0M_encoder        | Sequential       | 53.4 K | train
2 | seq_2M_encoder        | Sequential       | 4.8 K  | train
3 | seq_3M_encoder        | Sequential       | 90.1 K | train
4 | goutallier_0M_encoder | Sequential       | 14.1 K | train
5 | output_head           | Sequential       | 188 K  | t

[I 2025-11-09 14:41:37,423] Trial 88 pruned. Trial was pruned at epoch 10.


Best trial: 55. Best value: 0.896073:  90%|█████████ | 90/100 [47:08<01:48, 10.81s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 6.1 K  | train
1 | seq_0M_encoder        | Sequential       | 105 K  | train
2 | seq_2M_encoder        | Sequential       | 13.6 K | train
3 | seq_3M_encoder        | Sequential       | 43.4 K | train
4 | goutallier_0M_encoder | Sequential       | 18.4 K | train
5 | output_head           | Sequential       | 200 K  | t

[I 2025-11-09 14:41:46,435] Trial 89 pruned. Trial was pruned at epoch 10.


Best trial: 55. Best value: 0.896073:  91%|█████████ | 91/100 [47:17<01:32, 10.28s/it]

[I 2025-11-09 14:41:55,499] Trial 90 pruned. Trial was pruned at epoch 11.


Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 12.2 K | train
1 | seq_0M_encoder        | Sequential       | 74.1 K | train
2 | seq_2M_encoder        | Sequential       | 13.6 K | train
3 | seq_3M_encoder        | Sequential       | 90.1 K | train
4 | goutallier_0M_encoder | Sequential       | 18.4 K | train
5 | output_head           | Sequential       | 231 K  | train
6 | train_mse             | MeanSquaredError | 0      | train
7 | val_mse        

[I 2025-11-09 14:42:35,728] Trial 91 finished with value: 0.897046685218811 and parameters: {'batch_size': 128, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 96, 'static_encoder_dropout_1': 0.2, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.15000000000000002, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.35000000000000003, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 256, 'seq_0M_encoder_dropout_2': 0.0, 'seq_0M_encoder_batch_norm_2': False, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.45, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.5, 'seq_2M_encoder_batch_norm_1': False, 'seq_3M_encoder_n_layers': 3, 'seq_3M_encoder_units_0': 19

Best trial: 55. Best value: 0.896073:  93%|█████████▎| 93/100 [48:06<01:53, 16.25s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 12.2 K | train
1 | seq_0M_encoder        | Sequential       | 74.1 K | train
2 | seq_2M_encoder        | Sequential       | 13.6 K | train
3 | seq_3M_encoder        | Sequential       | 102 K  | train
4 | goutallier_0M_encoder | Sequential       | 18.4 K | train
5 | output_head           | Sequential       | 231 K  | t

[I 2025-11-09 14:42:44,945] Trial 92 pruned. Trial was pruned at epoch 10.


Best trial: 55. Best value: 0.896073:  94%|█████████▍| 94/100 [48:16<01:25, 14.24s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 12.2 K | train
1 | seq_0M_encoder        | Sequential       | 63.7 K | train
2 | seq_2M_encoder        | Sequential       | 18.0 K | train
3 | seq_3M_encoder        | Sequential       | 97.4 K | train
4 | goutallier_0M_encoder | Sequential       | 18.4 K | train
5 | output_head           | Sequential       | 231 K  | t

[I 2025-11-09 14:42:54,501] Trial 93 pruned. Trial was pruned at epoch 11.


Best trial: 55. Best value: 0.896073:  95%|█████████▌| 95/100 [48:30<01:10, 14.20s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 9.0 K  | train
1 | seq_0M_encoder        | Sequential       | 32.9 K | train
2 | seq_2M_encoder        | Sequential       | 13.6 K | train
3 | seq_3M_encoder        | Sequential       | 71.1 K | train
4 | goutallier_0M_encoder | Sequential       | 18.4 K | train
5 | output_head           | Sequential       | 173 K  | t

[I 2025-11-09 14:43:08,584] Trial 94 pruned. Trial was pruned at epoch 16.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 55. Best value: 0.896073:  96%|█████████▌| 96/100 [50:43<03:19, 49.80s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 9.0 K  | train
1 | seq_0M_encoder        | Sequential       | 26.7 K | train
2 | seq_2M_encoder        | Sequential       | 13.6 K | train
3 | seq_3M_encoder        | Sequential       | 84.0 K | train
4 | goutallier_0M_encoder | Sequential       | 10.1 K | train
5 | outpu

[I 2025-11-09 14:45:21,450] Trial 95 finished with value: 0.8991538882255554 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.2, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.2, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 160, 'seq_0M_encoder_dropout_1': 0.25, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 96, 'seq_0M_encoder_dropout_2': 0.1, 'seq_0M_encoder_batch_norm_2': True, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.45, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.5, 'seq_2M_encoder_batch_norm_1': False, 'seq_3M_encoder_n_layers': 3, 'seq_3M_encoder_units_0': 160, 'seq_3M_encoder_dropout_0': 0.

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 55. Best value: 0.896073:  97%|█████████▋| 97/100 [52:54<03:42, 74.27s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 9.0 K  | train
1 | seq_0M_encoder        | Sequential       | 59.9 K | train
2 | seq_2M_encoder        | Sequential       | 13.6 K | train
3 | seq_3M_encoder        | Sequential       | 70.8 K | train
4 | goutallier_0M_encoder | Sequential       | 18.4 K | train
5 | outpu

[I 2025-11-09 14:47:32,817] Trial 96 finished with value: 0.9068072438240051 and parameters: {'batch_size': 32, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.2, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.15000000000000002, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 128, 'seq_0M_encoder_dropout_1': 0.25, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 96, 'seq_0M_encoder_dropout_2': 0.05, 'seq_0M_encoder_batch_norm_2': True, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.4, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.5, 'seq_2M_encoder_batch_norm_1': False, 'seq_3M_encoder_n_layers': 3, 'seq_3M_encoder_units_0': 192, 'seq_3M_encode

Best trial: 55. Best value: 0.896073:  98%|█████████▊| 98/100 [53:23<02:01, 60.73s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 9.0 K  | train
1 | seq_0M_encoder        | Sequential       | 22.1 K | train
2 | seq_2M_encoder        | Sequential       | 13.6 K | train
3 | seq_3M_encoder        | Sequential       | 82.5 K | train
4 | goutallier_0M_encoder | Sequential       | 34.9 K | train
5 | output_head           | Sequential       | 191 K  | t

[I 2025-11-09 14:48:01,955] Trial 97 pruned. Trial was pruned at epoch 10.


Best trial: 55. Best value: 0.896073:  99%|█████████▉| 99/100 [53:52<00:51, 51.28s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 9.0 K  | train
1 | seq_0M_encoder        | Sequential       | 37.8 K | train
2 | seq_2M_encoder        | Sequential       | 13.6 K | train
3 | seq_3M_encoder        | Sequential       | 93.1 K | train
4 | goutallier_0M_encoder | Sequential       | 9.8 K  | train
5 | output_head           | Sequential       | 173 K  | t

[I 2025-11-09 14:48:31,199] Trial 98 pruned. Trial was pruned at epoch 10.


Best trial: 55. Best value: 0.896073: 100%|██████████| 100/100 [54:21<00:00, 32.61s/it]
[I 2025-11-09 14:48:59,572] A new study created in memory with name: sequential_mlp4_optimization


[I 2025-11-09 14:48:59,564] Trial 99 pruned. Trial was pruned at epoch 10.

[Model 3] 최적화 완료!
최고 성능: 0.896073
최적 파라미터:
  batch_size: 64
  static_encoder_n_layers: 2
  static_encoder_units_0: 96
  static_encoder_dropout_0: 0.1
  static_encoder_batch_norm_0: True
  static_encoder_units_1: 64
  static_encoder_dropout_1: 0.1
  static_encoder_batch_norm_1: False
  seq_0M_encoder_n_layers: 3
  seq_0M_encoder_units_0: 64
  seq_0M_encoder_dropout_0: 0.2
  seq_0M_encoder_batch_norm_0: True
  seq_0M_encoder_units_1: 160
  seq_0M_encoder_dropout_1: 0.30000000000000004
  seq_0M_encoder_batch_norm_1: True
  seq_0M_encoder_units_2: 128
  seq_0M_encoder_dropout_2: 0.1
  seq_0M_encoder_batch_norm_2: True
  seq_2M_encoder_n_layers: 2
  seq_2M_encoder_units_0: 32
  seq_2M_encoder_dropout_0: 0.5
  seq_2M_encoder_batch_norm_0: True
  seq_2M_encoder_units_1: 128
  seq_2M_encoder_dropout_1: 0.4
  seq_2M_encoder_batch_norm_1: False
  seq_3M_encoder_n_layers: 1
  seq_3M_encoder_units_0: 160
  seq_3M_encoder_d

  0%|          | 0/100 [00:00<?, ?it/s]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 4.3 K  | train
1  | seq_0M_encoder        | Sequential             | 43.4 K | train
2  | seq_2M_encoder        | Sequential             | 960    | train
3  | seq_3M_encoder        | Sequential             | 1.4 K  | train
4  | seq_4M_encoder        | Sequential             | 2.9 K  | train
5  | goutallier_0M_encoder | Sequential             | 1

[I 2025-11-09 14:49:48,930] Trial 0 finished with value: -0.7558845281600952 and parameters: {'batch_size': 128, 'reg_loss_weight': 0.2, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 32, 'static_encoder_dropout_0': 0.45, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 32, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 64, 'static_encoder_dropout_2': 0.0, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.25, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 160, 'seq_0M_encoder_dropout_1': 0.1, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 160, 'seq_0M_encoder_dropout_2': 0.05, 'seq_0M_encoder_batch_norm_2': True, 'seq_2M_encoder_n_layers': 1, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.45, 'seq_2M_encoder_batch_norm_0': True, 'seq_3M_encoder_n_layers': 1, 'seq_3M_encoder_units_0': 96, 'seq

Best trial: 0. Best value: -0.755885:   2%|▏         | 2/100 [01:13<56:35, 34.65s/it]  Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 12.2 K | train
1  | seq_0M_encoder        | Sequential             | 2.9 K  | train
2  | seq_2M_encoder        | Sequential             | 11.1 K | train
3  | seq_3M_encoder        | Sequential             | 77.3 K | train
4  | seq_4M_encoder        | Sequential             | 10.9 K | train
5  | go

[I 2025-11-09 14:50:13,285] Trial 1 finished with value: -0.6421984434127808 and parameters: {'batch_size': 128, 'reg_loss_weight': 0.35, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.25, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.05, 'static_encoder_batch_norm_1': True, 'static_encoder_units_2': 32, 'static_encoder_dropout_2': 0.15000000000000002, 'static_encoder_batch_norm_2': False, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 192, 'seq_0M_encoder_dropout_0': 0.1, 'seq_0M_encoder_batch_norm_0': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.35000000000000003, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 64, 'seq_2M_encoder_dropout_1': 0.5, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 96, 'seq_2M_encoder_dropout_2': 0.25, 'seq_2M_encoder_batch_norm_2': True, 'seq_3M_encoder_n_layers': 2, 'se

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 0. Best value: -0.755885:   3%|▎         | 3/100 [02:07<1:10:24, 43.55s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 6.5 K  | train
1  | seq_0M_encoder        | Sequential             | 70.6 K | train
2  | seq_2M_encoder        | Sequential             | 640    | train
3  | seq_3M_encoder        | Sequential             | 11.7 K | train
4  | seq_4M_encoder        |

[I 2025-11-09 14:51:07,424] Trial 2 finished with value: -0.5821402668952942 and parameters: {'batch_size': 128, 'reg_loss_weight': 0.35, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 96, 'static_encoder_dropout_1': 0.1, 'static_encoder_batch_norm_1': True, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 192, 'seq_0M_encoder_dropout_0': 0.0, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 32, 'seq_2M_encoder_dropout_0': 0.25, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 64, 'seq_2M_encoder_dropout_1': 0.25, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 128, 'seq_2M_encoder_dropout_2': 0.5, 'seq_2M_encoder_batch_norm_2': False, 'seq_3M_encoder_n_layers': 3, 'seq_3M_encoder_units_0': 160, 'seq_3M_encoder_dropout_0': 0.4, 'seq_3M_encoder_batch_norm_0': True, 'seq_3M_encoder_units_1': 256, 's

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 3. Best value: -0.787794:   4%|▍         | 4/100 [04:49<2:23:58, 89.99s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 18.8 K | train
1  | seq_0M_encoder        | Sequential             | 32.9 K | train
2  | seq_2M_encoder        | Sequential             | 18.0 K | train
3  | seq_3M_encoder        | Sequential             | 46.6 K | train
4  | seq_4M_encoder        |

[I 2025-11-09 14:53:48,599] Trial 3 finished with value: -0.7877942323684692 and parameters: {'batch_size': 32, 'reg_loss_weight': 0.2, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 32, 'static_encoder_dropout_0': 0.05, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 32, 'static_encoder_dropout_1': 0.2, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.15000000000000002, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 192, 'seq_0M_encoder_dropout_0': 0.15000000000000002, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 160, 'seq_0M_encoder_dropout_1': 0.2, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 224, 'seq_0M_encoder_dropout_2': 0.25, 'seq_0M_encoder_batch_norm_2': True, 'seq_2M_encoder_n_layers': 1, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.1, 'seq_2M_encoder_batch_norm_0': False, 'seq_3M_encoder_n_layers': 2, 'se

Best trial: 3. Best value: -0.787794:   5%|▌         | 5/100 [05:30<1:54:33, 72.35s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 12.0 K | train
1  | seq_0M_encoder        | Sequential             | 60.3 K | train
2  | seq_2M_encoder        | Sequential             | 640    | train
3  | seq_3M_encoder        | Sequential             | 1.9 K  | train
4  | seq_4M_encoder        | Sequential             | 22.9 K | train
5  | go

[I 2025-11-09 14:54:29,674] Trial 4 finished with value: -0.5332443714141846 and parameters: {'batch_size': 32, 'reg_loss_weight': 0.2, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.15000000000000002, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 128, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 64, 'static_encoder_dropout_2': 0.05, 'static_encoder_batch_norm_2': False, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.25, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.5, 'seq_0M_encoder_batch_norm_1': False, 'seq_0M_encoder_units_2': 64, 'seq_0M_encoder_dropout_2': 0.4, 'seq_0M_encoder_batch_norm_2': False, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 128, 'seq_2M_encoder_dropout_0': 0.4, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_d

Best trial: 3. Best value: -0.787794:   6%|▌         | 6/100 [06:24<1:43:57, 66.35s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 2.7 K  | train
1  | seq_0M_encoder        | Sequential             | 11.7 K | train
2  | seq_2M_encoder        | Sequential             | 15.9 K | train
3  | seq_3M_encoder        | Sequential             | 3.8 K  | train
4  | seq_4M_encoder        | Sequential             | 26.8 K | train
5  | go

[I 2025-11-09 14:55:24,385] Trial 5 finished with value: -0.6836932897567749 and parameters: {'batch_size': 32, 'reg_loss_weight': 0.15000000000000002, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.25, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.05, 'static_encoder_batch_norm_1': True, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.35000000000000003, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 256, 'seq_0M_encoder_dropout_1': 0.1, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 96, 'seq_0M_encoder_dropout_2': 0.15000000000000002, 'seq_0M_encoder_batch_norm_2': False, 'seq_2M_encoder_n_layers': 1, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.45, 'seq_2M_encoder_batch_norm_0': True, 'seq_3M_encoder_n_layers': 1, 'seq_3M_encoder_units_0': 128, 'seq_3M_encoder_dropout_0': 0.1, 'seq_3M_encoder_batch_no

Best trial: 3. Best value: -0.787794:   7%|▋         | 7/100 [06:43<1:18:34, 50.69s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 1.8 K  | train
1  | seq_0M_encoder        | Sequential             | 2.4 K  | train
2  | seq_2M_encoder        | Sequential             | 320    | train
3  | seq_3M_encoder        | Sequential             | 32.4 K | train
4  | seq_4M_encoder        | Sequential             | 83.6 K | train
5  | go

[I 2025-11-09 14:55:42,827] Trial 6 pruned. Trial was pruned at epoch 10.


Best trial: 3. Best value: -0.787794:   8%|▊         | 8/100 [06:58<1:00:18, 39.33s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 3.1 K  | train
1  | seq_0M_encoder        | Sequential             | 38.9 K | train
2  | seq_2M_encoder        | Sequential             | 960    | train
3  | seq_3M_encoder        | Sequential             | 54.2 K | train
4  | seq_4M_encoder        | Sequential             | 65.3 K | train
5  | go

[I 2025-11-09 14:55:57,848] Trial 7 finished with value: -0.7583110928535461 and parameters: {'batch_size': 128, 'reg_loss_weight': 0.30000000000000004, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.15000000000000002, 'static_encoder_batch_norm_0': True, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.1, 'seq_0M_encoder_batch_norm_0': True, 'seq_2M_encoder_n_layers': 1, 'seq_2M_encoder_units_0': 32, 'seq_2M_encoder_dropout_0': 0.25, 'seq_2M_encoder_batch_norm_0': True, 'seq_3M_encoder_n_layers': 2, 'seq_3M_encoder_units_0': 224, 'seq_3M_encoder_dropout_0': 0.30000000000000004, 'seq_3M_encoder_batch_norm_0': False, 'seq_3M_encoder_units_1': 128, 'seq_3M_encoder_dropout_1': 0.1, 'seq_3M_encoder_batch_norm_1': True, 'seq_4M_encoder_n_layers': 3, 'seq_4M_encoder_units_0': 256, 'seq_4M_encoder_dropout_0': 0.0, 'seq_4M_encoder_batch_norm_0': False, 'seq_4M_encoder_units_1': 224, 'seq_4M_encoder_dropout_1':

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 3. Best value: -0.787794:   9%|▉         | 9/100 [08:27<1:23:14, 54.89s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 3.6 K  | train
1  | seq_0M_encoder        | Sequential             | 43.8 K | train
2  | seq_2M_encoder        | Sequential             | 18.0 K | train
3  | seq_3M_encoder        | Sequential             | 23.6 K | train
4  | seq_4M_encoder        |

[I 2025-11-09 14:57:26,942] Trial 8 finished with value: -0.6049503087997437 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.25, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 32, 'static_encoder_dropout_0': 0.35000000000000003, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.1, 'static_encoder_batch_norm_1': True, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.0, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 160, 'seq_0M_encoder_dropout_1': 0.30000000000000004, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 64, 'seq_0M_encoder_dropout_2': 0.25, 'seq_0M_encoder_batch_norm_2': True, 'seq_2M_encoder_n_layers': 1, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.15000000000000002, 'seq_2M_encoder_batch_norm_0': False, 'seq_3M_encoder_n_layers': 2, 'seq_3M_encoder_units_0': 224, 'seq_3M_encoder_dropout_0': 0.25, 'seq_3M_encoder_batch_nor

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 3. Best value: -0.787794:  10%|█         | 10/100 [09:54<1:37:12, 64.80s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 6.5 K  | train
1  | seq_0M_encoder        | Sequential             | 20.4 K | train
2  | seq_2M_encoder        | Sequential             | 2.8 K  | train
3  | seq_3M_encoder        | Sequential             | 32.7 K | train
4  | seq_4M_encoder        

[I 2025-11-09 14:58:53,938] Trial 9 finished with value: -0.7060179710388184 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.45000000000000007, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.30000000000000004, 'static_encoder_batch_norm_0': False, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 192, 'seq_0M_encoder_dropout_0': 0.4, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 96, 'seq_0M_encoder_dropout_1': 0.15000000000000002, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 224, 'seq_0M_encoder_dropout_2': 0.15000000000000002, 'seq_0M_encoder_batch_norm_2': True, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 128, 'seq_2M_encoder_dropout_0': 0.45, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.2, 'seq_2M_encoder_batch_norm_1': True, 'seq_3M_encoder_n_layers': 2, 'seq_3M_encoder_units_0': 96, 'seq_3M_encoder_dropout_0': 0.25, 'seq_3M_

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 3. Best value: -0.787794:  11%|█         | 11/100 [12:20<2:13:06, 89.73s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 1.8 K  | train
1  | seq_0M_encoder        | Sequential             | 3.8 K  | train
2  | seq_2M_encoder        | Sequential             | 320    | train
3  | seq_3M_encoder        | Sequential             | 38.9 K | train
4  | seq_4M_encoder        

[I 2025-11-09 15:01:20,195] Trial 10 finished with value: -0.507522463798523 and parameters: {'batch_size': 32, 'reg_loss_weight': 0.1, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 32, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 32, 'static_encoder_dropout_1': 0.30000000000000004, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.4, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 256, 'seq_0M_encoder_dropout_0': 0.15000000000000002, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 64, 'seq_0M_encoder_dropout_1': 0.35000000000000003, 'seq_0M_encoder_batch_norm_1': False, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.05, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 32, 'seq_2M_encoder_dropout_1': 0.0, 'seq_2M_encoder_batch_norm_1': False, 'seq_3M_encoder_n_

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 3. Best value: -0.787794:  12%|█▏        | 12/100 [14:46<2:36:51, 106.95s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 896    | train
1  | seq_0M_encoder        | Sequential             | 38.9 K | train
2  | seq_2M_encoder        | Sequential             | 320    | train
3  | seq_3M_encoder        | Sequential             | 20.4 K | train
4  | seq_4M_encoder       

[I 2025-11-09 15:03:46,531] Trial 11 finished with value: -0.641349196434021 and parameters: {'batch_size': 32, 'reg_loss_weight': 0.30000000000000004, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.1, 'static_encoder_batch_norm_0': True, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 256, 'seq_0M_encoder_dropout_0': 0.15000000000000002, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 1, 'seq_2M_encoder_units_0': 32, 'seq_2M_encoder_dropout_0': 0.0, 'seq_2M_encoder_batch_norm_0': False, 'seq_3M_encoder_n_layers': 3, 'seq_3M_encoder_units_0': 160, 'seq_3M_encoder_dropout_0': 0.35000000000000003, 'seq_3M_encoder_batch_norm_0': False, 'seq_3M_encoder_units_1': 160, 'seq_3M_encoder_dropout_1': 0.1, 'seq_3M_encoder_batch_norm_1': True, 'seq_3M_encoder_units_2': 64, 'seq_3M_encoder_dropout_2': 0.0, 'seq_3M_encoder_batch_norm_2': True, 'seq_4M_encoder_n_layers': 3, 'seq_4M_encoder_units_0': 256, 'seq_4M_encoder_dropout_0': 0

Best trial: 3. Best value: -0.787794:  13%|█▎        | 13/100 [15:08<1:57:43, 81.19s/it] Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 3.9 K  | train
1  | seq_0M_encoder        | Sequential             | 25.2 K | train
2  | seq_2M_encoder        | Sequential             | 320    | train
3  | seq_3M_encoder        | Sequential             | 11.7 K | train
4  | seq_4M_encoder        | Sequential             | 2.9 K  | train
5  | 

[I 2025-11-09 15:04:08,439] Trial 12 pruned. Trial was pruned at epoch 22.


Best trial: 3. Best value: -0.787794:  14%|█▍        | 14/100 [15:18<1:25:23, 59.57s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 15.4 K | train
1  | seq_0M_encoder        | Sequential             | 1.9 K  | train
2  | seq_2M_encoder        | Sequential             | 7.1 K  | train
3  | seq_3M_encoder        | Sequential             | 34.1 K | train
4  | seq_4M_encoder        | Sequential             | 32.4 K | train
5  | g

[I 2025-11-09 15:04:18,063] Trial 13 pruned. Trial was pruned at epoch 10.


Best trial: 3. Best value: -0.787794:  15%|█▌        | 15/100 [15:50<1:12:42, 51.32s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 16.9 K | train
1  | seq_0M_encoder        | Sequential             | 32.4 K | train
2  | seq_2M_encoder        | Sequential             | 1.4 K  | train
3  | seq_3M_encoder        | Sequential             | 1.9 K  | train
4  | seq_4M_encoder        | Sequential             | 83.6 K | train
5  | g

[I 2025-11-09 15:04:50,250] Trial 14 pruned. Trial was pruned at epoch 10.


Best trial: 3. Best value: -0.787794:  16%|█▌        | 16/100 [18:05<1:47:01, 76.44s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 1.8 K  | train
1  | seq_0M_encoder        | Sequential             | 1.9 K  | train
2  | seq_2M_encoder        | Sequential             | 640    | train
3  | seq_3M_encoder        | Sequential             | 61.4 K | train
4  | seq_4M_encoder        | Sequential             | 3.4 K  | train
5  | g

[I 2025-11-09 15:07:05,040] Trial 15 finished with value: -0.7005581259727478 and parameters: {'batch_size': 32, 'reg_loss_weight': 0.25, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 32, 'static_encoder_dropout_0': 0.2, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 96, 'static_encoder_dropout_1': 0.35000000000000003, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.30000000000000004, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 224, 'seq_0M_encoder_dropout_0': 0.25, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 128, 'seq_0M_encoder_dropout_1': 0.25, 'seq_0M_encoder_batch_norm_1': False, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 32, 'seq_2M_encoder_dropout_0': 0.25, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 32, 'seq_2M_encoder_dropout_1': 0.4, 'seq_2M_encoder_batch_norm_1': False, 'seq_3M_encoder_n_layers': 1,

Best trial: 3. Best value: -0.787794:  17%|█▋        | 17/100 [18:15<1:17:59, 56.38s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 2.0 K  | train
1  | seq_0M_encoder        | Sequential             | 54.2 K | train
2  | seq_2M_encoder        | Sequential             | 320    | train
3  | seq_3M_encoder        | Sequential             | 32.4 K | train
4  | seq_4M_encoder        | Sequential             | 23.3 K | train
5  | g

[I 2025-11-09 15:07:14,759] Trial 16 pruned. Trial was pruned at epoch 10.


Best trial: 3. Best value: -0.787794:  18%|█▊        | 18/100 [18:46<1:06:46, 48.86s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 1.8 K  | train
1  | seq_0M_encoder        | Sequential             | 56.8 K | train
2  | seq_2M_encoder        | Sequential             | 640    | train
3  | seq_3M_encoder        | Sequential             | 58.3 K | train
4  | seq_4M_encoder        | Sequential             | 45.0 K | train
5  | g

[I 2025-11-09 15:07:46,132] Trial 17 pruned. Trial was pruned at epoch 10.


Best trial: 3. Best value: -0.787794:  19%|█▉        | 19/100 [19:00<51:53, 38.43s/it]  Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 24.9 K | train
1  | seq_0M_encoder        | Sequential             | 61.4 K | train
2  | seq_2M_encoder        | Sequential             | 10.5 K | train
3  | seq_3M_encoder        | Sequential             | 51.6 K | train
4  | seq_4M_encoder        | Sequential             | 16.0 K | train
5  | g

[I 2025-11-09 15:08:00,265] Trial 18 finished with value: -0.7766318917274475 and parameters: {'batch_size': 128, 'reg_loss_weight': 0.1, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.5, 'static_encoder_batch_norm_0': True, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.5, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 128, 'seq_0M_encoder_dropout_1': 0.45, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 256, 'seq_0M_encoder_dropout_2': 0.5, 'seq_0M_encoder_batch_norm_2': True, 'seq_2M_encoder_n_layers': 1, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.1, 'seq_2M_encoder_batch_norm_0': True, 'seq_3M_encoder_n_layers': 3, 'seq_3M_encoder_units_0': 128, 'seq_3M_encoder_dropout_0': 0.45, 'seq_3M_encoder_batch_norm_0': True, 'seq_3M_encoder_units_1': 192, 'seq_3M_encoder_dropout_1': 0.4, 'seq_3M_encoder_batch_norm_1': False, 'seq_3M_encoder_units_2': 160, 'se

Best trial: 3. Best value: -0.787794:  20%|██        | 20/100 [19:20<43:49, 32.87s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 3.1 K  | train
1  | seq_0M_encoder        | Sequential             | 43.5 K | train
2  | seq_2M_encoder        | Sequential             | 9.2 K  | train
3  | seq_3M_encoder        | Sequential             | 58.3 K | train
4  | seq_4M_encoder        | Sequential             | 1.4 K  | train
5  | gou

[I 2025-11-09 15:08:20,172] Trial 19 pruned. Trial was pruned at epoch 10.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 3. Best value: -0.787794:  21%|██        | 21/100 [20:09<49:48, 37.82s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 1.8 K  | train
1  | seq_0M_encoder        | Sequential             | 83.6 K | train
2  | seq_2M_encoder        | Sequential             | 640    | train
3  | seq_3M_encoder        | Sequential             | 33.7 K | train
4  | seq_4M_encoder        | 

[I 2025-11-09 15:09:09,543] Trial 20 finished with value: -0.6889104843139648 and parameters: {'batch_size': 128, 'reg_loss_weight': 0.15000000000000002, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 32, 'static_encoder_dropout_0': 0.4, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.15000000000000002, 'static_encoder_batch_norm_1': True, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.5, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 128, 'seq_0M_encoder_dropout_1': 0.4, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 224, 'seq_0M_encoder_dropout_2': 0.35000000000000003, 'seq_0M_encoder_batch_norm_2': True, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.1, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.35000000000000003, 'seq_2M_encoder_batch_norm_1': False, 'se

Best trial: 3. Best value: -0.787794:  22%|██▏       | 22/100 [20:20<38:27, 29.58s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 1.8 K  | train
1  | seq_0M_encoder        | Sequential             | 25.7 K | train
2  | seq_2M_encoder        | Sequential             | 640    | train
3  | seq_3M_encoder        | Sequential             | 16.9 K | train
4  | seq_4M_encoder        | Sequential             | 59.9 K | train
5  | gou

[I 2025-11-09 15:09:19,901] Trial 21 pruned. Trial was pruned at epoch 10.


Best trial: 3. Best value: -0.787794:  23%|██▎       | 23/100 [20:31<30:54, 24.09s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 1.8 K  | train
1  | seq_0M_encoder        | Sequential             | 18.0 K | train
2  | seq_2M_encoder        | Sequential             | 320    | train
3  | seq_3M_encoder        | Sequential             | 14.1 K | train
4  | seq_4M_encoder        | Sequential             | 55.3 K | train
5  | gou

[I 2025-11-09 15:09:31,174] Trial 22 pruned. Trial was pruned at epoch 10.


Best trial: 3. Best value: -0.787794:  24%|██▍       | 24/100 [20:41<25:12, 19.90s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 2.7 K  | train
1  | seq_0M_encoder        | Sequential             | 27.1 K | train
2  | seq_2M_encoder        | Sequential             | 960    | train
3  | seq_3M_encoder        | Sequential             | 27.1 K | train
4  | seq_4M_encoder        | Sequential             | 82.5 K | train
5  | gou

[I 2025-11-09 15:09:41,309] Trial 23 pruned. Trial was pruned at epoch 10.


Best trial: 3. Best value: -0.787794:  25%|██▌       | 25/100 [21:04<26:07, 20.90s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 3.9 K  | train
1  | seq_0M_encoder        | Sequential             | 48.8 K | train
2  | seq_2M_encoder        | Sequential             | 640    | train
3  | seq_3M_encoder        | Sequential             | 2.4 K  | train
4  | seq_4M_encoder        | Sequential             | 45.0 K | train
5  | gou

[I 2025-11-09 15:10:04,530] Trial 24 finished with value: -0.7353797554969788 and parameters: {'batch_size': 128, 'reg_loss_weight': 0.4, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.05, 'static_encoder_batch_norm_0': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.4, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.15000000000000002, 'seq_0M_encoder_batch_norm_1': False, 'seq_2M_encoder_n_layers': 1, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.15000000000000002, 'seq_2M_encoder_batch_norm_0': True, 'seq_3M_encoder_n_layers': 2, 'seq_3M_encoder_units_0': 128, 'seq_3M_encoder_dropout_0': 0.45, 'seq_3M_encoder_batch_norm_0': True, 'seq_3M_encoder_units_1': 192, 'seq_3M_encoder_dropout_1': 0.45, 'seq_3M_encoder_batch_norm_1': True, 'seq_4M_encoder_n_layers': 3, 'seq_4M_encoder_units_0': 160, 'seq_4M_encoder_dropout_0': 0.15000000000

Best trial: 3. Best value: -0.787794:  26%|██▌       | 26/100 [22:07<41:18, 33.49s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 896    | train
1  | seq_0M_encoder        | Sequential             | 3.4 K  | train
2  | seq_2M_encoder        | Sequential             | 320    | train
3  | seq_3M_encoder        | Sequential             | 48.2 K | train
4  | seq_4M_encoder        | Sequential             | 44.1 K | train
5  | gou

[I 2025-11-09 15:11:07,392] Trial 25 finished with value: -0.6045862436294556 and parameters: {'batch_size': 32, 'reg_loss_weight': 0.25, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.2, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 32, 'static_encoder_dropout_1': 0.0, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 192, 'seq_0M_encoder_dropout_0': 0.2, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 128, 'seq_0M_encoder_dropout_1': 0.45, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 160, 'seq_0M_encoder_dropout_2': 0.2, 'seq_0M_encoder_batch_norm_2': False, 'seq_2M_encoder_n_layers': 1, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.05, 'seq_2M_encoder_batch_norm_0': True, 'seq_3M_encoder_n_layers': 1, 'seq_3M_encoder_units_0': 160, 'seq_3M_encoder_dropout_0': 0.30000000000000004, 'seq_3M_encoder_batch_norm_0': True, 'seq_4M_encoder_n_

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 3. Best value: -0.787794:  27%|██▋       | 27/100 [22:50<44:05, 36.24s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 1.8 K  | train
1  | seq_0M_encoder        | Sequential             | 38.9 K | train
2  | seq_2M_encoder        | Sequential             | 7.1 K  | train
3  | seq_3M_encoder        | Sequential             | 21.6 K | train
4  | seq_4M_encoder        | 

[I 2025-11-09 15:11:50,060] Trial 26 finished with value: -0.7314971685409546 and parameters: {'batch_size': 128, 'reg_loss_weight': 0.2, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 32, 'static_encoder_dropout_0': 0.30000000000000004, 'static_encoder_batch_norm_0': False, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 224, 'seq_0M_encoder_dropout_0': 0.30000000000000004, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 1, 'seq_2M_encoder_units_0': 32, 'seq_2M_encoder_dropout_0': 0.30000000000000004, 'seq_2M_encoder_batch_norm_0': True, 'seq_3M_encoder_n_layers': 3, 'seq_3M_encoder_units_0': 64, 'seq_3M_encoder_dropout_0': 0.4, 'seq_3M_encoder_batch_norm_0': False, 'seq_3M_encoder_units_1': 160, 'seq_3M_encoder_dropout_1': 0.30000000000000004, 'seq_3M_encoder_batch_norm_1': False, 'seq_3M_encoder_units_2': 224, 'seq_3M_encoder_dropout_2': 0.4, 'seq_3M_encoder_batch_norm_2': False, 'seq_4M_encoder_n_layers': 2, 'seq_4M_encoder_units_0': 160, 'seq_4M_e

Best trial: 3. Best value: -0.787794:  28%|██▊       | 28/100 [23:01<34:22, 28.64s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 9.0 K  | train
1  | seq_0M_encoder        | Sequential             | 43.8 K | train
2  | seq_2M_encoder        | Sequential             | 960    | train
3  | seq_3M_encoder        | Sequential             | 7.8 K  | train
4  | seq_4M_encoder        | Sequential             | 60.9 K | train
5  | gou

[I 2025-11-09 15:12:00,978] Trial 27 pruned. Trial was pruned at epoch 10.


Best trial: 3. Best value: -0.787794:  29%|██▉       | 29/100 [24:17<50:39, 42.81s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 13.8 K | train
1  | seq_0M_encoder        | Sequential             | 63.7 K | train
2  | seq_2M_encoder        | Sequential             | 960    | train
3  | seq_3M_encoder        | Sequential             | 3.8 K  | train
4  | seq_4M_encoder        | Sequential             | 2.9 K  | train
5  | gou

[I 2025-11-09 15:13:16,830] Trial 28 finished with value: -0.6745935678482056 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.1, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.35000000000000003, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 192, 'seq_0M_encoder_dropout_0': 0.05, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 96, 'seq_0M_encoder_dropout_1': 0.2, 'seq_0M_encoder_batch_norm_1': False, 'seq_0M_encoder_units_2': 224, 'seq_0M_encoder_dropout_2': 0.0, 'seq_0M_encoder_batch_norm_2': True, 'seq_2M_encoder_n_layers': 1, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.5, 'seq_2M_encoder_batch_norm_0': False, 'seq_3M_encoder_n_layers': 2, 'seq_3M_encoder_units_0': 96, 'seq_3M_encoder_dropout_0': 0.2, 'seq_3M_encoder_batch_norm_0': False, 'seq_3M_encoder_uni

Best trial: 3. Best value: -0.787794:  30%|███       | 30/100 [25:14<55:02, 47.18s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 18.0 K | train
1  | seq_0M_encoder        | Sequential             | 1.4 K  | train
2  | seq_2M_encoder        | Sequential             | 320    | train
3  | seq_3M_encoder        | Sequential             | 48.6 K | train
4  | seq_4M_encoder        | Sequential             | 3.4 K  | train
5  | gou

[I 2025-11-09 15:14:14,204] Trial 29 finished with value: -0.6447464227676392 and parameters: {'batch_size': 32, 'reg_loss_weight': 0.2, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 32, 'static_encoder_dropout_0': 0.45, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 96, 'static_encoder_dropout_1': 0.2, 'static_encoder_batch_norm_1': True, 'static_encoder_units_2': 96, 'static_encoder_dropout_2': 0.2, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.15000000000000002, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.05, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 256, 'seq_0M_encoder_dropout_2': 0.4, 'seq_0M_encoder_batch_norm_2': True, 'seq_2M_encoder_n_layers': 1, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.1, 'seq_2M_encoder_batch_norm_0': True, 'seq_3M_encoder_n_layers': 1, 'seq_3M_encoder_units_

Best trial: 3. Best value: -0.787794:  31%|███       | 31/100 [25:25<41:39, 36.23s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 4.3 K  | train
1  | seq_0M_encoder        | Sequential             | 49.0 K | train
2  | seq_2M_encoder        | Sequential             | 1.3 K  | train
3  | seq_3M_encoder        | Sequential             | 1.4 K  | train
4  | seq_4M_encoder        | Sequential             | 2.9 K  | train
5  | gou

[I 2025-11-09 15:14:24,882] Trial 30 pruned. Trial was pruned at epoch 10.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 3. Best value: -0.787794:  32%|███▏      | 32/100 [26:14<45:30, 40.15s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 5.1 K  | train
1  | seq_0M_encoder        | Sequential             | 48.6 K | train
2  | seq_2M_encoder        | Sequential             | 5.2 K  | train
3  | seq_3M_encoder        | Sequential             | 1.9 K  | train
4  | seq_4M_encoder        | 

[I 2025-11-09 15:15:14,190] Trial 31 finished with value: -0.7193642854690552 and parameters: {'batch_size': 128, 'reg_loss_weight': 0.35, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 32, 'static_encoder_dropout_0': 0.5, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 32, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 64, 'static_encoder_dropout_2': 0.0, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.25, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 160, 'seq_0M_encoder_dropout_1': 0.1, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 160, 'seq_0M_encoder_dropout_2': 0.0, 'seq_0M_encoder_batch_norm_2': True, 'seq_2M_encoder_n_layers': 1, 'seq_2M_encoder_units_0': 128, 'seq_2M_encoder_dropout_0': 0.35000000000000003, 'seq_2M_encoder_batch_norm_0': True, 'seq_3M_encoder_n_layers': 1, 'seq_3M_encoder_u

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 3. Best value: -0.787794:  33%|███▎      | 33/100 [27:06<48:50, 43.74s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 4.3 K  | train
1  | seq_0M_encoder        | Sequential             | 26.3 K | train
2  | seq_2M_encoder        | Sequential             | 640    | train
3  | seq_3M_encoder        | Sequential             | 960    | train
4  | seq_4M_encoder        | 

[I 2025-11-09 15:16:06,289] Trial 32 finished with value: -0.7327104806900024 and parameters: {'batch_size': 128, 'reg_loss_weight': 0.25, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.45, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 32, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 32, 'static_encoder_dropout_2': 0.1, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.1, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 160, 'seq_0M_encoder_dropout_1': 0.15000000000000002, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 192, 'seq_0M_encoder_dropout_2': 0.1, 'seq_0M_encoder_batch_norm_2': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.30000000000000004, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 32, 'seq

Best trial: 3. Best value: -0.787794:  34%|███▍      | 34/100 [27:33<42:40, 38.80s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 12.5 K | train
1  | seq_0M_encoder        | Sequential             | 65.4 K | train
2  | seq_2M_encoder        | Sequential             | 960    | train
3  | seq_3M_encoder        | Sequential             | 35.5 K | train
4  | seq_4M_encoder        | Sequential             | 61.9 K | train
5  | gou

[I 2025-11-09 15:16:33,562] Trial 33 finished with value: -0.4363018572330475 and parameters: {'batch_size': 128, 'reg_loss_weight': 0.2, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 32, 'static_encoder_dropout_0': 0.4, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 32, 'static_encoder_dropout_1': 0.4, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 64, 'static_encoder_dropout_2': 0.0, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 64, 'seq_0M_encoder_dropout_0': 0.2, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 128, 'seq_0M_encoder_dropout_1': 0.1, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 128, 'seq_0M_encoder_dropout_2': 0.05, 'seq_0M_encoder_batch_norm_2': True, 'seq_2M_encoder_n_layers': 1, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.4, 'seq_2M_encoder_batch_norm_0': True, 'seq_3M_encoder_n_layers': 1, 'seq_3M_encoder_units_0': 64, 'seq_3M

Best trial: 3. Best value: -0.787794:  35%|███▌      | 35/100 [27:59<37:49, 34.92s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 12.5 K | train
1  | seq_0M_encoder        | Sequential             | 53.0 K | train
2  | seq_2M_encoder        | Sequential             | 1.3 K  | train
3  | seq_3M_encoder        | Sequential             | 44.1 K | train
4  | seq_4M_encoder        | Sequential             | 61.9 K | train
5  | gou

[I 2025-11-09 15:16:59,422] Trial 34 finished with value: -0.770565390586853 and parameters: {'batch_size': 128, 'reg_loss_weight': 0.35, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.4, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.30000000000000004, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 96, 'static_encoder_dropout_2': 0.1, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 192, 'seq_0M_encoder_dropout_0': 0.35000000000000003, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 160, 'seq_0M_encoder_dropout_1': 0.2, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 192, 'seq_0M_encoder_dropout_2': 0.25, 'seq_0M_encoder_batch_norm_2': True, 'seq_2M_encoder_n_layers': 1, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.4, 'seq_2M_encoder_batch_norm_0': True, 'seq_3M_encoder_n_layers': 2, 'seq

Best trial: 3. Best value: -0.787794:  36%|███▌      | 36/100 [28:10<29:34, 27.73s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 14.7 K | train
1  | seq_0M_encoder        | Sequential             | 70.6 K | train
2  | seq_2M_encoder        | Sequential             | 960    | train
3  | seq_3M_encoder        | Sequential             | 35.5 K | train
4  | seq_4M_encoder        | Sequential             | 53.6 K | train
5  | gou

[I 2025-11-09 15:17:10,372] Trial 35 pruned. Trial was pruned at epoch 10.


Best trial: 3. Best value: -0.787794:  37%|███▋      | 37/100 [28:32<27:08, 25.85s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 16.2 K | train
1  | seq_0M_encoder        | Sequential             | 2.9 K  | train
2  | seq_2M_encoder        | Sequential             | 7.1 K  | train
3  | seq_3M_encoder        | Sequential             | 54.2 K | train
4  | seq_4M_encoder        | Sequential             | 76.0 K | train
5  | gou

[I 2025-11-09 15:17:31,853] Trial 36 finished with value: -0.7460567951202393 and parameters: {'batch_size': 128, 'reg_loss_weight': 0.4, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 64, 'static_encoder_dropout_0': 0.30000000000000004, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.25, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.1, 'static_encoder_batch_norm_2': False, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 192, 'seq_0M_encoder_dropout_0': 0.45, 'seq_0M_encoder_batch_norm_0': True, 'seq_0M_encoder_units_1': 160, 'seq_0M_encoder_dropout_1': 0.2, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 224, 'seq_0M_encoder_dropout_2': 0.30000000000000004, 'seq_0M_encoder_batch_norm_2': True, 'seq_2M_encoder_n_layers': 1, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.4, 'seq_2M_encoder_batch_norm_0': True, 'seq_3M_encoder_n_layers': 2, '

Best trial: 3. Best value: -0.787794:  38%|███▊      | 38/100 [30:54<1:02:48, 60.78s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 16.2 K | train
1  | seq_0M_encoder        | Sequential             | 71.0 K | train
2  | seq_2M_encoder        | Sequential             | 10.2 K | train
3  | seq_3M_encoder        | Sequential             | 38.9 K | train
4  | seq_4M_encoder        | Sequential             | 61.5 K | train
5  | g

[I 2025-11-09 15:19:54,144] Trial 37 finished with value: -0.7604950070381165 and parameters: {'batch_size': 32, 'reg_loss_weight': 0.30000000000000004, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.35000000000000003, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 96, 'static_encoder_dropout_1': 0.2, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 192, 'seq_0M_encoder_dropout_0': 0.4, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.2, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.30000000000000004, 'seq_2M_encoder_batch_norm_1': True, 'seq_3M_encoder_n_layers': 2, 'seq_3M_encoder_units_0': 224, 'seq_3M_encoder_dropout_0': 0.30000000000000004, 'seq_3M_encoder_batch_norm_0': False, 'seq_3M_encoder_units_1': 224, 'seq_3M_encoder_dropout_1': 0.35000000000

Best trial: 3. Best value: -0.787794:  39%|███▉      | 39/100 [31:31<54:25, 53.52s/it]  Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 16.2 K | train
1  | seq_0M_encoder        | Sequential             | 90.2 K | train
2  | seq_2M_encoder        | Sequential             | 4.9 K  | train
3  | seq_3M_encoder        | Sequential             | 46.6 K | train
4  | seq_4M_encoder        | Sequential             | 27.1 K | train
5  | g

[I 2025-11-09 15:20:30,730] Trial 38 pruned. Trial was pruned at epoch 10.


Best trial: 3. Best value: -0.787794:  40%|████      | 40/100 [33:53<1:20:14, 80.24s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 21.7 K | train
1  | seq_0M_encoder        | Sequential             | 39.7 K | train
2  | seq_2M_encoder        | Sequential             | 22.0 K | train
3  | seq_3M_encoder        | Sequential             | 61.5 K | train
4  | seq_4M_encoder        | Sequential             | 67.6 K | train
5  | g

[I 2025-11-09 15:22:53,315] Trial 39 finished with value: -0.7403542995452881 and parameters: {'batch_size': 32, 'reg_loss_weight': 0.30000000000000004, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.4, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 96, 'static_encoder_dropout_1': 0.30000000000000004, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 192, 'seq_0M_encoder_dropout_0': 0.35000000000000003, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.2, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 256, 'seq_0M_encoder_dropout_2': 0.4, 'seq_0M_encoder_batch_norm_2': True, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.05, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 64, 'seq_2M_encoder_dropout_1': 0.15000000000000002, 'seq_2M_encoder_batch_norm_1': True, '

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 3. Best value: -0.787794:  41%|████      | 41/100 [36:42<1:44:58, 106.75s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 2.7 K  | train
1  | seq_0M_encoder        | Sequential             | 2.4 K  | train
2  | seq_2M_encoder        | Sequential             | 640    | train
3  | seq_3M_encoder        | Sequential             | 46.9 K | train
4  | seq_4M_encoder       

[I 2025-11-09 15:25:41,925] Trial 40 finished with value: -0.7159669995307922 and parameters: {'batch_size': 32, 'reg_loss_weight': 0.35, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.35000000000000003, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 96, 'static_encoder_dropout_1': 0.15000000000000002, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 96, 'static_encoder_dropout_2': 0.25, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 224, 'seq_0M_encoder_dropout_0': 0.5, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 160, 'seq_0M_encoder_dropout_1': 0.35000000000000003, 'seq_0M_encoder_batch_norm_1': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.15000000000000002, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.5, 'seq_2M_encoder_batch_norm_1': True,

Best trial: 3. Best value: -0.787794:  42%|████▏     | 42/100 [38:14<1:38:52, 102.28s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 12.0 K | train
1  | seq_0M_encoder        | Sequential             | 2.9 K  | train
2  | seq_2M_encoder        | Sequential             | 640    | train
3  | seq_3M_encoder        | Sequential             | 46.9 K | train
4  | seq_4M_encoder        | Sequential             | 83.6 K | train
5  | 

[I 2025-11-09 15:27:13,778] Trial 41 finished with value: -0.7612229585647583 and parameters: {'batch_size': 32, 'reg_loss_weight': 0.30000000000000004, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.25, 'static_encoder_batch_norm_0': False, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.45, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 1, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.25, 'seq_2M_encoder_batch_norm_0': True, 'seq_3M_encoder_n_layers': 2, 'seq_3M_encoder_units_0': 224, 'seq_3M_encoder_dropout_0': 0.30000000000000004, 'seq_3M_encoder_batch_norm_0': False, 'seq_3M_encoder_units_1': 192, 'seq_3M_encoder_dropout_1': 0.30000000000000004, 'seq_3M_encoder_batch_norm_1': True, 'seq_4M_encoder_n_layers': 3, 'seq_4M_encoder_units_0': 256, 'seq_4M_encoder_dropout_0': 0.4, 'seq_4M_encoder_batch_norm_0': False, 'seq_4M_encoder_units_1': 224, 'seq_4M_encoder_dropout

Best trial: 3. Best value: -0.787794:  43%|████▎     | 43/100 [39:40<1:32:43, 97.60s/it] Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 2.7 K  | train
1  | seq_0M_encoder        | Sequential             | 2.4 K  | train
2  | seq_2M_encoder        | Sequential             | 640    | train
3  | seq_3M_encoder        | Sequential             | 61.9 K | train
4  | seq_4M_encoder        | Sequential             | 78.0 K | train
5  | 

[I 2025-11-09 15:28:40,451] Trial 42 finished with value: -0.7488473653793335 and parameters: {'batch_size': 32, 'reg_loss_weight': 0.30000000000000004, 'static_encoder_n_layers': 2, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.30000000000000004, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.2, 'static_encoder_batch_norm_1': False, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 192, 'seq_0M_encoder_dropout_0': 0.45, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 1, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.25, 'seq_2M_encoder_batch_norm_0': True, 'seq_3M_encoder_n_layers': 2, 'seq_3M_encoder_units_0': 224, 'seq_3M_encoder_dropout_0': 0.30000000000000004, 'seq_3M_encoder_batch_norm_0': False, 'seq_3M_encoder_units_1': 192, 'seq_3M_encoder_dropout_1': 0.30000000000000004, 'seq_3M_encoder_batch_norm_1': True, 'seq_4M_encoder_n_layers': 3, 'seq_4M_encoder_units_0': 256, 'seq_4M_

Best trial: 3. Best value: -0.787794:  44%|████▍     | 44/100 [40:43<1:21:21, 87.16s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 37.1 K | train
1  | seq_0M_encoder        | Sequential             | 2.4 K  | train
2  | seq_2M_encoder        | Sequential             | 7.1 K  | train
3  | seq_3M_encoder        | Sequential             | 46.9 K | train
4  | seq_4M_encoder        | Sequential             | 78.5 K | train
5  | g

[I 2025-11-09 15:29:43,257] Trial 43 finished with value: -0.7034700512886047 and parameters: {'batch_size': 32, 'reg_loss_weight': 0.35, 'static_encoder_n_layers': 1, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.25, 'static_encoder_batch_norm_0': False, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.4, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 1, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.30000000000000004, 'seq_2M_encoder_batch_norm_0': True, 'seq_3M_encoder_n_layers': 2, 'seq_3M_encoder_units_0': 256, 'seq_3M_encoder_dropout_0': 0.2, 'seq_3M_encoder_batch_norm_0': False, 'seq_3M_encoder_units_1': 224, 'seq_3M_encoder_dropout_1': 0.30000000000000004, 'seq_3M_encoder_batch_norm_1': True, 'seq_4M_encoder_n_layers': 3, 'seq_4M_encoder_units_0': 224, 'seq_4M_encoder_dropout_0': 0.45, 'seq_4M_encoder_batch_norm_0': False, 'seq_4M_encoder_units_1': 256, 'seq_4M_encoder_dropout_1': 0.05, 'seq_

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 3. Best value: -0.787794:  45%|████▌     | 45/100 [43:16<1:37:56, 106.84s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 32.1 K | train
1  | seq_0M_encoder        | Sequential             | 2.4 K  | train
2  | seq_2M_encoder        | Sequential             | 4.9 K  | train
3  | seq_3M_encoder        | Sequential             | 27.1 K | train
4  | seq_4M_encoder       

[I 2025-11-09 15:32:16,012] Trial 44 finished with value: -0.7828196883201599 and parameters: {'batch_size': 32, 'reg_loss_weight': 0.25, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.45, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 128, 'static_encoder_dropout_1': 0.35000000000000003, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.1, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.45, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.35000000000000003, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.4, 'seq_2M_encoder_batch_norm_1': True, 'seq_3M_encoder_n_layers': 2, 'seq_3M_encoder_units_0': 224, 'seq_3M_encoder_dropout_0': 0.35000000000000003, 'seq_3M_encoder_batch

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 3. Best value: -0.787794:  46%|████▌     | 46/100 [45:55<1:50:08, 122.37s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 37.1 K | train
1  | seq_0M_encoder        | Sequential             | 1.9 K  | train
2  | seq_2M_encoder        | Sequential             | 960    | train
3  | seq_3M_encoder        | Sequential             | 40.3 K | train
4  | seq_4M_encoder       

[I 2025-11-09 15:34:54,622] Trial 45 finished with value: -0.7012860774993896 and parameters: {'batch_size': 32, 'reg_loss_weight': 0.25, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.5, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 128, 'static_encoder_dropout_1': 0.35000000000000003, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.1, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.5, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.35000000000000003, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 64, 'seq_2M_encoder_dropout_1': 0.4, 'seq_2M_encoder_batch_norm_1': True, 'seq_3M_encoder_n_layers': 2, 'seq_3M_encoder_units_0': 128, 'seq_3M_encoder_dropout_0': 0.35000000000000003, 'seq_3M_encoder_batch_no

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 3. Best value: -0.787794:  47%|████▋     | 47/100 [47:27<1:40:02, 113.25s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 9.2 K  | train
1  | seq_0M_encoder        | Sequential             | 2.4 K  | train
2  | seq_2M_encoder        | Sequential             | 9.2 K  | train
3  | seq_3M_encoder        | Sequential             | 46.9 K | train
4  | seq_4M_encoder       

[I 2025-11-09 15:36:26,600] Trial 46 finished with value: -0.7256733775138855 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.2, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.45, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 128, 'static_encoder_dropout_1': 0.4, 'static_encoder_batch_norm_1': True, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.15000000000000002, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.45, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 1, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.45, 'seq_2M_encoder_batch_norm_0': True, 'seq_3M_encoder_n_layers': 2, 'seq_3M_encoder_units_0': 192, 'seq_3M_encoder_dropout_0': 0.4, 'seq_3M_encoder_batch_norm_0': False, 'seq_3M_encoder_units_1': 192, 'seq_3M_encoder_dropout_1': 0.25, 'seq_3M_encoder_batch_norm_1': False, 'seq_4M_encod

Best trial: 3. Best value: -0.787794:  48%|████▊     | 48/100 [48:00<1:17:29, 89.41s/it] Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 20.5 K | train
1  | seq_0M_encoder        | Sequential             | 35.5 K | train
2  | seq_2M_encoder        | Sequential             | 640    | train
3  | seq_3M_encoder        | Sequential             | 66.1 K | train
4  | seq_4M_encoder        | Sequential             | 103 K  | train
5  | 

[I 2025-11-09 15:37:00,376] Trial 47 pruned. Trial was pruned at epoch 10.


Best trial: 3. Best value: -0.787794:  49%|████▉     | 49/100 [49:55<1:22:23, 96.94s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 20.5 K | train
1  | seq_0M_encoder        | Sequential             | 35.5 K | train
2  | seq_2M_encoder        | Sequential             | 960    | train
3  | seq_3M_encoder        | Sequential             | 53.8 K | train
4  | seq_4M_encoder        | Sequential             | 61.5 K | train
5  | g

[I 2025-11-09 15:38:54,877] Trial 48 finished with value: -0.7640135884284973 and parameters: {'batch_size': 32, 'reg_loss_weight': 0.25, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.4, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.05, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.5, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 256, 'seq_0M_encoder_dropout_1': 0.05, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 1, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.35000000000000003, 'seq_2M_encoder_batch_norm_0': True, 'seq_3M_encoder_n_layers': 3, 'seq_3M_encoder_units_0': 256, 'seq_3M_encoder_dropout_0': 0.45, 'seq_3M_encoder_batch_norm_0': False, 'seq_3M_encod

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 3. Best value: -0.787794:  50%|█████     | 50/100 [52:30<1:35:17, 114.35s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 20.5 K | train
1  | seq_0M_encoder        | Sequential             | 31.3 K | train
2  | seq_2M_encoder        | Sequential             | 16.6 K | train
3  | seq_3M_encoder        | Sequential             | 66.1 K | train
4  | seq_4M_encoder       

[I 2025-11-09 15:41:29,853] Trial 49 finished with value: -0.6471730470657349 and parameters: {'batch_size': 32, 'reg_loss_weight': 0.25, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.45, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.05, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.5, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 256, 'seq_0M_encoder_dropout_1': 0.15000000000000002, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 1, 'seq_2M_encoder_units_0': 96, 'seq_2M_encoder_dropout_0': 0.45, 'seq_2M_encoder_batch_norm_0': False, 'seq_3M_encoder_n_layers': 3, 'seq_3M_encoder_units_0': 256, 'seq_3M_encoder_dropout_0': 0.45, 'seq_3M_encoder_batch_norm_0': False, 'seq_3M_enc

Best trial: 50. Best value: -0.811696:  51%|█████     | 51/100 [53:22<1:18:06, 95.65s/it] Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 20.5 K | train
1  | seq_0M_encoder        | Sequential             | 35.5 K | train
2  | seq_2M_encoder        | Sequential             | 16.6 K | train
3  | seq_3M_encoder        | Sequential             | 66.1 K | train
4  | seq_4M_encoder        | Sequential             | 52.8 K | train
5  |

[I 2025-11-09 15:42:21,867] Trial 50 finished with value: -0.8116961717605591 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.2, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.4, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.05, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.5, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.05, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.35000000000000003, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.35000000000000003, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 96, 

Best trial: 50. Best value: -0.811696:  52%|█████▏    | 52/100 [53:43<58:33, 73.19s/it]  Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 20.5 K | train
1  | seq_0M_encoder        | Sequential             | 31.3 K | train
2  | seq_2M_encoder        | Sequential             | 13.4 K | train
3  | seq_3M_encoder        | Sequential             | 60.9 K | train
4  | seq_4M_encoder        | Sequential             | 103 K  | train
5  | 

[I 2025-11-09 15:42:42,647] Trial 51 pruned. Trial was pruned at epoch 10.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 50. Best value: -0.811696:  53%|█████▎    | 53/100 [55:20<1:03:05, 80.54s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 20.5 K | train
1  | seq_0M_encoder        | Sequential             | 23.6 K | train
2  | seq_2M_encoder        | Sequential             | 13.4 K | train
3  | seq_3M_encoder        | Sequential             | 55.7 K | train
4  | seq_4M_encoder       

[I 2025-11-09 15:44:20,352] Trial 52 finished with value: -0.7880369424819946 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.25, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.45, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.1, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.5, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.05, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.4, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.45, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 64, 'seq_2M_encoder_dropout_2': 0.

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 50. Best value: -0.811696:  54%|█████▍    | 54/100 [56:52<1:04:19, 83.91s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 18.4 K | train
1  | seq_0M_encoder        | Sequential             | 38.9 K | train
2  | seq_2M_encoder        | Sequential             | 13.4 K | train
3  | seq_3M_encoder        | Sequential             | 49.6 K | train
4  | seq_4M_encoder       

[I 2025-11-09 15:45:52,123] Trial 53 finished with value: -0.7507886290550232 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.15000000000000002, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.45, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.4, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.1, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.45, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.1, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.4, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.45, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 64, 'seq_2M_encoder_d

Best trial: 54. Best value: -0.839481:  55%|█████▌    | 55/100 [57:52<57:32, 76.72s/it]  Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 20.5 K | train
1  | seq_0M_encoder        | Sequential             | 31.3 K | train
2  | seq_2M_encoder        | Sequential             | 13.4 K | train
3  | seq_3M_encoder        | Sequential             | 49.6 K | train
4  | seq_4M_encoder        | Sequential             | 102 K  | train
5  | 

[I 2025-11-09 15:46:52,057] Trial 54 finished with value: -0.8394807577133179 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.2, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.5, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.35000000000000003, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 96, 'static_encoder_dropout_2': 0.15000000000000002, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.5, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.0, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.4, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.4, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 64, 'seq

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 54. Best value: -0.839481:  56%|█████▌    | 56/100 [59:25<59:50, 81.61s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 20.5 K | train
1  | seq_0M_encoder        | Sequential             | 31.3 K | train
2  | seq_2M_encoder        | Sequential             | 13.4 K | train
3  | seq_3M_encoder        | Sequential             | 49.6 K | train
4  | seq_4M_encoder        |

[I 2025-11-09 15:48:25,077] Trial 55 finished with value: -0.7847609519958496 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.2, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.5, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.2, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.5, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.0, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.45, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.4, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 64, 'seq_2M_encoder_dropout_2': 0.350

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 54. Best value: -0.839481:  57%|█████▋    | 57/100 [1:01:01<1:01:32, 85.86s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 20.5 K | train
1  | seq_0M_encoder        | Sequential             | 31.3 K | train
2  | seq_2M_encoder        | Sequential             | 13.4 K | train
3  | seq_3M_encoder        | Sequential             | 49.6 K | train
4  | seq_4M_encoder     

[I 2025-11-09 15:50:00,868] Trial 56 finished with value: -0.8007765412330627 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.2, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.5, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.2, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.45, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.0, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.45, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.4, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 64, 'seq_2M_encoder_dropout_2': 0.3

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 54. Best value: -0.839481:  58%|█████▊    | 58/100 [1:02:34<1:01:38, 88.06s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 20.5 K | train
1  | seq_0M_encoder        | Sequential             | 23.6 K | train
2  | seq_2M_encoder        | Sequential             | 13.4 K | train
3  | seq_3M_encoder        | Sequential             | 38.2 K | train
4  | seq_4M_encoder     

[I 2025-11-09 15:51:34,064] Trial 57 finished with value: -0.7808784246444702 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.2, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.5, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.2, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.5, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.0, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.45, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.4, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 64, 'seq_2M_encoder_dropout_2': 0.35

Best trial: 54. Best value: -0.839481:  59%|█████▉    | 59/100 [1:03:58<59:15, 86.72s/it]  Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 20.5 K | train
1  | seq_0M_encoder        | Sequential             | 31.3 K | train
2  | seq_2M_encoder        | Sequential             | 10.0 K | train
3  | seq_3M_encoder        | Sequential             | 49.6 K | train
4  | seq_4M_encoder        | Sequential             | 61.5 K | train
5  

[I 2025-11-09 15:52:57,652] Trial 58 finished with value: -0.7717787027359009 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.2, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.5, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.25, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.5, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.0, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.5, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.45, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 64, 'seq_2M_encoder_dropout_2': 0.30

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 54. Best value: -0.839481:  60%|██████    | 60/100 [1:05:31<59:11, 88.78s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 20.5 K | train
1  | seq_0M_encoder        | Sequential             | 31.3 K | train
2  | seq_2M_encoder        | Sequential             | 16.6 K | train
3  | seq_3M_encoder        | Sequential             | 49.6 K | train
4  | seq_4M_encoder       

[I 2025-11-09 15:54:31,232] Trial 59 finished with value: -0.7755399346351624 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.15000000000000002, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.45, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.2, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.45, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.05, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 32, 'seq_2M_encoder_dropout_0': 0.45, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.35000000000000003, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 64

Best trial: 54. Best value: -0.839481:  61%|██████    | 61/100 [1:06:57<57:12, 88.02s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 20.5 K | train
1  | seq_0M_encoder        | Sequential             | 35.5 K | train
2  | seq_2M_encoder        | Sequential             | 13.4 K | train
3  | seq_3M_encoder        | Sequential             | 50.1 K | train
4  | seq_4M_encoder        | Sequential             | 102 K  | train
5  | 

[I 2025-11-09 15:55:57,473] Trial 60 finished with value: -0.7619509696960449 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.2, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.5, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.15000000000000002, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.4, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.0, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.5, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.4, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 96, 'seq_2M_encoder_dr

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 54. Best value: -0.839481:  62%|██████▏   | 62/100 [1:08:29<56:30, 89.23s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 20.5 K | train
1  | seq_0M_encoder        | Sequential             | 33.7 K | train
2  | seq_2M_encoder        | Sequential             | 16.6 K | train
3  | seq_3M_encoder        | Sequential             | 38.2 K | train
4  | seq_4M_encoder       

[I 2025-11-09 15:57:29,527] Trial 61 finished with value: -0.7166949510574341 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.25, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.45, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.4, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.15000000000000002, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.45, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 256, 'seq_0M_encoder_dropout_1': 0.05, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.45, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.4, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 64, 'seq_2M_encod

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 54. Best value: -0.839481:  63%|██████▎   | 63/100 [1:10:01<55:28, 89.95s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 12.3 K | train
1  | seq_0M_encoder        | Sequential             | 38.9 K | train
2  | seq_2M_encoder        | Sequential             | 13.4 K | train
3  | seq_3M_encoder        | Sequential             | 60.5 K | train
4  | seq_4M_encoder       

[I 2025-11-09 15:59:01,171] Trial 62 finished with value: -0.7744479179382324 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.25, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.45, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.35000000000000003, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.2, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.45, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.0, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.35000000000000003, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.45, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 96,

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 54. Best value: -0.839481:  64%|██████▍   | 64/100 [1:11:35<54:44, 91.24s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 20.5 K | train
1  | seq_0M_encoder        | Sequential             | 20.4 K | train
2  | seq_2M_encoder        | Sequential             | 10.2 K | train
3  | seq_3M_encoder        | Sequential             | 45.4 K | train
4  | seq_4M_encoder       

[I 2025-11-09 16:00:35,404] Trial 63 finished with value: -0.6406212449073792 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.2, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.5, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 32, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.15000000000000002, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.5, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.05, 'seq_0M_encoder_batch_norm_1': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.4, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.5, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 64, 'seq_2M_encoder

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 54. Best value: -0.839481:  65%|██████▌   | 65/100 [1:13:11<53:59, 92.55s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 28.8 K | train
1  | seq_0M_encoder        | Sequential             | 31.3 K | train
2  | seq_2M_encoder        | Sequential             | 13.4 K | train
3  | seq_3M_encoder        | Sequential             | 49.6 K | train
4  | seq_4M_encoder       

[I 2025-11-09 16:02:11,012] Trial 64 finished with value: -0.7779664993286133 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.25, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.5, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.4, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.1, 'static_encoder_batch_norm_2': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.5, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.1, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.45, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.30000000000000004, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 32, 'seq_2M_encoder_d

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 54. Best value: -0.839481:  66%|██████▌   | 66/100 [1:14:49<53:23, 94.23s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 18.4 K | train
1  | seq_0M_encoder        | Sequential             | 44.1 K | train
2  | seq_2M_encoder        | Sequential             | 11.4 K | train
3  | seq_3M_encoder        | Sequential             | 49.2 K | train
4  | seq_4M_encoder       

[I 2025-11-09 16:03:49,171] Trial 65 finished with value: -0.657000720500946 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.15000000000000002, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.45, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 96, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.15000000000000002, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.45, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.0, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.4, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.4, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 64, '

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 54. Best value: -0.839481:  67%|██████▋   | 67/100 [1:16:12<49:58, 90.85s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 12.3 K | train
1  | seq_0M_encoder        | Sequential             | 27.1 K | train
2  | seq_2M_encoder        | Sequential             | 10.0 K | train
3  | seq_3M_encoder        | Sequential             | 60.9 K | train
4  | seq_4M_encoder       

[I 2025-11-09 16:05:12,138] Trial 66 finished with value: -0.7702014446258545 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.2, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.45, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.35000000000000003, 'static_encoder_batch_norm_1': True, 'static_encoder_units_2': 96, 'static_encoder_dropout_2': 0.05, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.5, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 256, 'seq_0M_encoder_dropout_1': 0.0, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.35000000000000003, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 64, 'seq_2M_encoder_dropout_1': 0.35000000000000003, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_

Best trial: 54. Best value: -0.839481:  68%|██████▊   | 68/100 [1:16:34<37:26, 70.21s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 20.5 K | train
1  | seq_0M_encoder        | Sequential             | 61.9 K | train
2  | seq_2M_encoder        | Sequential             | 17.6 K | train
3  | seq_3M_encoder        | Sequential             | 50.1 K | train
4  | seq_4M_encoder        | Sequential             | 75.2 K | train
5  | 

[I 2025-11-09 16:05:34,188] Trial 67 pruned. Trial was pruned at epoch 10.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 54. Best value: -0.839481:  69%|██████▉   | 69/100 [1:18:13<40:42, 78.80s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 11.1 K | train
1  | seq_0M_encoder        | Sequential             | 26.8 K | train
2  | seq_2M_encoder        | Sequential             | 11.4 K | train
3  | seq_3M_encoder        | Sequential             | 41.4 K | train
4  | seq_4M_encoder       

[I 2025-11-09 16:07:13,029] Trial 68 finished with value: -0.7084445953369141 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.25, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.0, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.2, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 256, 'seq_0M_encoder_dropout_0': 0.5, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.0, 'seq_0M_encoder_batch_norm_1': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.4, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.45, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 64, 'seq_2M_encoder_dropout_2': 0.3

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 54. Best value: -0.839481:  70%|███████   | 70/100 [1:19:44<41:12, 82.43s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 28.8 K | train
1  | seq_0M_encoder        | Sequential             | 33.7 K | train
2  | seq_2M_encoder        | Sequential             | 10.2 K | train
3  | seq_3M_encoder        | Sequential             | 66.1 K | train
4  | seq_4M_encoder       

[I 2025-11-09 16:08:43,936] Trial 69 finished with value: -0.740960955619812 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.2, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.5, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 32, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 96, 'static_encoder_dropout_2': 0.1, 'static_encoder_batch_norm_2': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.0, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 256, 'seq_0M_encoder_dropout_1': 0.05, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.5, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 64, 'seq_2M_encoder_dropout_1': 0.4, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 96, 'seq_2M_encoder_dropout_2': 0.45, '

Best trial: 54. Best value: -0.839481:  71%|███████   | 71/100 [1:21:06<39:45, 82.26s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 20.5 K | train
1  | seq_0M_encoder        | Sequential             | 31.3 K | train
2  | seq_2M_encoder        | Sequential             | 13.4 K | train
3  | seq_3M_encoder        | Sequential             | 49.6 K | train
4  | seq_4M_encoder        | Sequential             | 61.5 K | train
5  | 

[I 2025-11-09 16:10:05,807] Trial 70 finished with value: -0.7043193578720093 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.15000000000000002, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.35000000000000003, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 96, 'static_encoder_dropout_1': 0.25, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.30000000000000004, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.4, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.0, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.45, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.30000000000000004, 'seq_2M_encoder_batch_norm_1': False,

Best trial: 54. Best value: -0.839481:  72%|███████▏  | 72/100 [1:21:26<29:46, 63.79s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 20.5 K | train
1  | seq_0M_encoder        | Sequential             | 31.3 K | train
2  | seq_2M_encoder        | Sequential             | 13.4 K | train
3  | seq_3M_encoder        | Sequential             | 49.6 K | train
4  | seq_4M_encoder        | Sequential             | 61.5 K | train
5  | 

[I 2025-11-09 16:10:26,501] Trial 71 pruned. Trial was pruned at epoch 10.


Best trial: 54. Best value: -0.839481:  73%|███████▎  | 73/100 [1:22:49<31:13, 69.39s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 20.5 K | train
1  | seq_0M_encoder        | Sequential             | 31.3 K | train
2  | seq_2M_encoder        | Sequential             | 13.4 K | train
3  | seq_3M_encoder        | Sequential             | 49.6 K | train
4  | seq_4M_encoder        | Sequential             | 35.5 K | train
5  | 

[I 2025-11-09 16:11:48,963] Trial 72 finished with value: -0.7819703817367554 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.2, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.45, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.2, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.5, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.05, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.4, 'seq_2M_encoder_batch_norm_0': False, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.45, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 64, 'seq_2M_encoder_dropout_2': 0.

Best trial: 54. Best value: -0.839481:  74%|███████▍  | 74/100 [1:23:10<23:48, 54.95s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 14.1 K | train
1  | seq_0M_encoder        | Sequential             | 31.3 K | train
2  | seq_2M_encoder        | Sequential             | 13.4 K | train
3  | seq_3M_encoder        | Sequential             | 50.1 K | train
4  | seq_4M_encoder        | Sequential             | 61.5 K | train
5  | 

[I 2025-11-09 16:12:10,200] Trial 73 pruned. Trial was pruned at epoch 10.


Best trial: 54. Best value: -0.839481:  75%|███████▌  | 75/100 [1:23:31<18:41, 44.84s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 20.5 K | train
1  | seq_0M_encoder        | Sequential             | 26.8 K | train
2  | seq_2M_encoder        | Sequential             | 13.4 K | train
3  | seq_3M_encoder        | Sequential             | 49.6 K | train
4  | seq_4M_encoder        | Sequential             | 61.9 K | train
5  | 

[I 2025-11-09 16:12:31,471] Trial 74 pruned. Trial was pruned at epoch 10.


Best trial: 54. Best value: -0.839481:  76%|███████▌  | 76/100 [1:23:53<15:06, 37.78s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 20.5 K | train
1  | seq_0M_encoder        | Sequential             | 38.9 K | train
2  | seq_2M_encoder        | Sequential             | 21.8 K | train
3  | seq_3M_encoder        | Sequential             | 41.4 K | train
4  | seq_4M_encoder        | Sequential             | 52.8 K | train
5  | 

[I 2025-11-09 16:12:52,773] Trial 75 pruned. Trial was pruned at epoch 10.


Best trial: 54. Best value: -0.839481:  77%|███████▋  | 77/100 [1:24:38<15:18, 39.92s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 28.7 K | train
1  | seq_0M_encoder        | Sequential             | 33.7 K | train
2  | seq_2M_encoder        | Sequential             | 4.8 K  | train
3  | seq_3M_encoder        | Sequential             | 28.6 K | train
4  | seq_4M_encoder        | Sequential             | 23.3 K | train
5  | 

[I 2025-11-09 16:13:37,686] Trial 76 finished with value: -0.7879155874252319 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.15000000000000002, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.4, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.0, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.45, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.05, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.30000000000000004, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.45, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 96,

Best trial: 54. Best value: -0.839481:  78%|███████▊  | 78/100 [1:26:05<19:52, 54.22s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 9.2 K  | train
1  | seq_0M_encoder        | Sequential             | 38.9 K | train
2  | seq_2M_encoder        | Sequential             | 21.8 K | train
3  | seq_3M_encoder        | Sequential             | 41.4 K | train
4  | seq_4M_encoder        | Sequential             | 46.6 K | train
5  | 

[I 2025-11-09 16:15:05,283] Trial 77 finished with value: -0.7413249015808105 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.15000000000000002, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.4, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 128, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': True, 'static_encoder_units_2': 64, 'static_encoder_dropout_2': 0.0, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.45, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.15000000000000002, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 32, 'seq_2M_encoder_dropout_0': 0.30000000000000004, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.4, 'seq_2M_encoder_batch_norm_1': False, 'seq_3M_encoder_n

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 54. Best value: -0.839481:  79%|███████▉  | 79/100 [1:27:37<22:56, 65.52s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 28.8 K | train
1  | seq_0M_encoder        | Sequential             | 33.7 K | train
2  | seq_2M_encoder        | Sequential             | 21.8 K | train
3  | seq_3M_encoder        | Sequential             | 28.6 K | train
4  | seq_4M_encoder       

[I 2025-11-09 16:16:37,176] Trial 78 finished with value: -0.6509342193603516 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.1, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.2, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 32, 'static_encoder_dropout_1': 0.4, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 96, 'static_encoder_dropout_2': 0.0, 'static_encoder_batch_norm_2': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.4, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.0, 'seq_0M_encoder_batch_norm_1': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.30000000000000004, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.45, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 96, 'seq_2M_encoder_dr

Best trial: 54. Best value: -0.839481:  80%|████████  | 80/100 [1:28:45<22:01, 66.09s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 20.5 K | train
1  | seq_0M_encoder        | Sequential             | 52.8 K | train
2  | seq_2M_encoder        | Sequential             | 4.9 K  | train
3  | seq_3M_encoder        | Sequential             | 47.7 K | train
4  | seq_4M_encoder        | Sequential             | 75.6 K | train
5  | 

[I 2025-11-09 16:17:44,574] Trial 79 finished with value: -0.7364717721939087 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.15000000000000002, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.4, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 96, 'static_encoder_dropout_1': 0.35000000000000003, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.05, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.4, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.05, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.35000000000000003, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.35000000000000003, 'seq_2M_encoder_batch_norm_1': False,

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 54. Best value: -0.839481:  81%|████████  | 81/100 [1:30:27<24:24, 77.10s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 20.5 K | train
1  | seq_0M_encoder        | Sequential             | 31.3 K | train
2  | seq_2M_encoder        | Sequential             | 16.6 K | train
3  | seq_3M_encoder        | Sequential             | 49.6 K | train
4  | seq_4M_encoder       

[I 2025-11-09 16:19:27,385] Trial 80 finished with value: -0.6295801997184753 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.2, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.35000000000000003, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.1, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.0, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 192, 'seq_0M_encoder_dropout_0': 0.45, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 256, 'seq_0M_encoder_dropout_1': 0.0, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.30000000000000004, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 64, 'seq_2M_encoder_dropout_1': 0.25, 'seq_2M_encoder_batch_norm_1': True, 'seq_3M_encoder_n_layers': 3, 'se

Best trial: 54. Best value: -0.839481:  82%|████████▏ | 82/100 [1:31:31<21:56, 73.13s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 20.5 K | train
1  | seq_0M_encoder        | Sequential             | 38.9 K | train
2  | seq_2M_encoder        | Sequential             | 19.7 K | train
3  | seq_3M_encoder        | Sequential             | 53.8 K | train
4  | seq_4M_encoder        | Sequential             | 3.4 K  | train
5  | 

[I 2025-11-09 16:20:31,254] Trial 81 finished with value: -0.800776481628418 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.2, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.45, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.5, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.5, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.05, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.4, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.45, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 96, 'seq_2M_encoder_dropout_2': 0.4, 

Best trial: 54. Best value: -0.839481:  83%|████████▎ | 83/100 [1:33:04<22:24, 79.07s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 20.5 K | train
1  | seq_0M_encoder        | Sequential             | 31.3 K | train
2  | seq_2M_encoder        | Sequential             | 16.6 K | train
3  | seq_3M_encoder        | Sequential             | 55.7 K | train
4  | seq_4M_encoder        | Sequential             | 70.1 K | train
5  | 

[I 2025-11-09 16:22:04,192] Trial 82 finished with value: -0.7811211347579956 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.15000000000000002, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.45, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.45, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.5, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.05, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.35000000000000003, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.4, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 128,

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 54. Best value: -0.839481:  84%|████████▍ | 84/100 [1:34:39<22:20, 83.79s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 20.5 K | train
1  | seq_0M_encoder        | Sequential             | 31.3 K | train
2  | seq_2M_encoder        | Sequential             | 16.6 K | train
3  | seq_3M_encoder        | Sequential             | 38.2 K | train
4  | seq_4M_encoder       

[I 2025-11-09 16:23:38,991] Trial 83 finished with value: -0.7697160840034485 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.2, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.5, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.5, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.5, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.1, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.4, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.5, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 96, 'seq_2M_encoder_dropout_2': 0.4, 's

Best trial: 54. Best value: -0.839481:  85%|████████▌ | 85/100 [1:36:01<20:51, 83.41s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 18.4 K | train
1  | seq_0M_encoder        | Sequential             | 33.7 K | train
2  | seq_2M_encoder        | Sequential             | 26.0 K | train
3  | seq_3M_encoder        | Sequential             | 60.5 K | train
4  | seq_4M_encoder        | Sequential             | 3.4 K  | train
5  | 

[I 2025-11-09 16:25:01,496] Trial 84 finished with value: -0.7285853028297424 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.25, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.4, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.35000000000000003, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.45, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.1, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.0, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.45, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 96, 'seq_2M_encoder_

Best trial: 54. Best value: -0.839481:  86%|████████▌ | 86/100 [1:36:22<15:05, 64.68s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 20.5 K | train
1  | seq_0M_encoder        | Sequential             | 38.9 K | train
2  | seq_2M_encoder        | Sequential             | 16.6 K | train
3  | seq_3M_encoder        | Sequential             | 60.9 K | train
4  | seq_4M_encoder        | Sequential             | 35.5 K | train
5  | 

[I 2025-11-09 16:25:22,489] Trial 85 pruned. Trial was pruned at epoch 10.


`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 54. Best value: -0.839481:  87%|████████▋ | 87/100 [1:39:17<21:10, 97.77s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 17.6 K | train
1  | seq_0M_encoder        | Sequential             | 31.3 K | train
2  | seq_2M_encoder        | Sequential             | 11.4 K | train
3  | seq_3M_encoder        | Sequential             | 60.9 K | train
4  | seq_4M_encoder       

[I 2025-11-09 16:28:17,462] Trial 86 finished with value: -0.7994418740272522 and parameters: {'batch_size': 32, 'reg_loss_weight': 0.1, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.45, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.25, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.45, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.45, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.0, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.35000000000000003, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.45, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 96, 'seq_2M_encoder

Best trial: 54. Best value: -0.839481:  88%|████████▊ | 88/100 [1:40:40<18:38, 93.22s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 20.5 K | train
1  | seq_0M_encoder        | Sequential             | 46.6 K | train
2  | seq_2M_encoder        | Sequential             | 6.1 K  | train
3  | seq_3M_encoder        | Sequential             | 49.6 K | train
4  | seq_4M_encoder        | Sequential             | 44.1 K | train
5  | 

[I 2025-11-09 16:29:40,054] Trial 87 finished with value: -0.7800291776657104 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.1, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.4, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.25, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.5, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.25, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.0, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.45, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 64, 'seq_2M_encoder_dropout_1': 0.5, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 96, 'seq_2M_encoder_dropout_2': 0.5, '

Best trial: 54. Best value: -0.839481:  89%|████████▉ | 89/100 [1:41:56<16:09, 88.10s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 18.4 K | train
1  | seq_0M_encoder        | Sequential             | 23.6 K | train
2  | seq_2M_encoder        | Sequential             | 16.6 K | train
3  | seq_3M_encoder        | Sequential             | 22.1 K | train
4  | seq_4M_encoder        | Sequential             | 35.5 K | train
5  | 

[I 2025-11-09 16:30:56,210] Trial 88 finished with value: -0.7628003358840942 and parameters: {'batch_size': 32, 'reg_loss_weight': 0.1, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.5, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.25, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.45, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 192, 'seq_0M_encoder_dropout_0': 0.30000000000000004, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.0, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.4, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 32, 'seq_2M_encoder_dropout_1': 0.45, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 96, 'seq_2M_encoder_d

Best trial: 54. Best value: -0.839481:  90%|█████████ | 90/100 [1:43:03<13:38, 81.81s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 10.3 K | train
1  | seq_0M_encoder        | Sequential             | 44.1 K | train
2  | seq_2M_encoder        | Sequential             | 13.2 K | train
3  | seq_3M_encoder        | Sequential             | 44.5 K | train
4  | seq_4M_encoder        | Sequential             | 35.5 K | train
5  | 

[I 2025-11-09 16:32:03,347] Trial 89 finished with value: -0.7546712160110474 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.15000000000000002, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.45, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.0, 'static_encoder_batch_norm_1': True, 'static_encoder_units_2': 96, 'static_encoder_dropout_2': 0.45, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 96, 'seq_0M_encoder_dropout_0': 0.45, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.05, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.45, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.45, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 96, 'seq_2M_encoder_d

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 54. Best value: -0.839481:  91%|█████████ | 91/100 [1:45:57<16:25, 109.47s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 20.5 K | train
1  | seq_0M_encoder        | Sequential             | 2.4 K  | train
2  | seq_2M_encoder        | Sequential             | 16.6 K | train
3  | seq_3M_encoder        | Sequential             | 72.3 K | train
4  | seq_4M_encoder      

[I 2025-11-09 16:34:57,345] Trial 90 finished with value: -0.7274932861328125 and parameters: {'batch_size': 32, 'reg_loss_weight': 0.1, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 96, 'static_encoder_dropout_0': 0.30000000000000004, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 32, 'static_encoder_dropout_1': 0.15000000000000002, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.05, 'static_encoder_batch_norm_2': False, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.15000000000000002, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 256, 'seq_0M_encoder_dropout_1': 0.0, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 32, 'seq_2M_encoder_dropout_0': 0.35000000000000003, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.5, 'seq_2M_encoder_batch_norm_1': False, '

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 54. Best value: -0.839481:  92%|█████████▏| 92/100 [1:48:48<17:03, 127.90s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 20.5 K | train
1  | seq_0M_encoder        | Sequential             | 31.3 K | train
2  | seq_2M_encoder        | Sequential             | 13.4 K | train
3  | seq_3M_encoder        | Sequential             | 53.6 K | train
4  | seq_4M_encoder      

[I 2025-11-09 16:37:48,245] Trial 91 finished with value: -0.7569764852523804 and parameters: {'batch_size': 32, 'reg_loss_weight': 0.5, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.45, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.30000000000000004, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.4, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.45, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.35000000000000003, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.4, 'seq_2M_encoder_batch_norm_1': True, 'seq_2M_encoder_units_2': 96, 'seq_2M_encoder_dropout_2': 0.5, 'seq_2M_encoder_batch_norm_2': True, 'seq_3M_encoder_n_layers': 3, 'seq

Best trial: 54. Best value: -0.839481:  93%|█████████▎| 93/100 [1:51:41<16:29, 141.42s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 20.5 K | train
1  | seq_0M_encoder        | Sequential             | 31.3 K | train
2  | seq_2M_encoder        | Sequential             | 19.7 K | train
3  | seq_3M_encoder        | Sequential             | 39.7 K | train
4  | seq_4M_encoder        | Sequential             | 61.5 K | train
5  |

[I 2025-11-09 16:40:41,210] Trial 92 finished with value: -0.7580684423446655 and parameters: {'batch_size': 32, 'reg_loss_weight': 0.15000000000000002, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.45, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.2, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.15000000000000002, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.5, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.05, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.30000000000000004, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.4, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_

Best trial: 54. Best value: -0.839481:  94%|█████████▍| 94/100 [1:53:13<12:39, 126.50s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 37.1 K | train
1  | seq_0M_encoder        | Sequential             | 3.4 K  | train
2  | seq_2M_encoder        | Sequential             | 16.6 K | train
3  | seq_3M_encoder        | Sequential             | 70.5 K | train
4  | seq_4M_encoder        | Sequential             | 111 K  | train
5  |

[I 2025-11-09 16:42:12,917] Trial 93 finished with value: -0.7550351619720459 and parameters: {'batch_size': 32, 'reg_loss_weight': 0.2, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.4, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.2, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.1, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.4, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 224, 'seq_0M_encoder_dropout_1': 0.0, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.4, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.45, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 128, 'seq_2M_encoder_dropout_2': 0.25, 

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 54. Best value: -0.839481:  95%|█████████▌| 95/100 [1:56:15<11:55, 143.19s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 20.5 K | train
1  | seq_0M_encoder        | Sequential             | 58.7 K | train
2  | seq_2M_encoder        | Sequential             | 13.4 K | train
3  | seq_3M_encoder        | Sequential             | 78.5 K | train
4  | seq_4M_encoder      

[I 2025-11-09 16:45:15,032] Trial 94 finished with value: -0.7431448698043823 and parameters: {'batch_size': 32, 'reg_loss_weight': 0.25, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.5, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 128, 'static_encoder_dropout_1': 0.45, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.05, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 224, 'seq_0M_encoder_dropout_0': 0.5, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.35000000000000003, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.35000000000000003, 'seq_2M_encoder_batch_norm_1': True, 'seq_2M_encoder_units_2': 96, 'seq_2M_encoder_dropout_2': 0.35000000000000003, 'seq_2M_encoder_batch_norm_2': False, 'seq_3M_encoder

Best trial: 54. Best value: -0.839481:  96%|█████████▌| 96/100 [1:59:02<10:01, 150.44s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 28.8 K | train
1  | seq_0M_encoder        | Sequential             | 35.5 K | train
2  | seq_2M_encoder        | Sequential             | 7.1 K  | train
3  | seq_3M_encoder        | Sequential             | 27.8 K | train
4  | seq_4M_encoder        | Sequential             | 82.5 K | train
5  |

[I 2025-11-09 16:48:02,402] Trial 95 finished with value: -0.7490900158882141 and parameters: {'batch_size': 32, 'reg_loss_weight': 0.15000000000000002, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.45, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.25, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.15000000000000002, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 3, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.2, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.30000000000000004, 'seq_0M_encoder_batch_norm_1': True, 'seq_0M_encoder_units_2': 128, 'seq_0M_encoder_dropout_2': 0.35000000000000003, 'seq_0M_encoder_batch_norm_2': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.25, 'seq_2M_encoder_batch_norm_0': Tru

Best trial: 54. Best value: -0.839481:  97%|█████████▋| 97/100 [2:00:31<06:35, 131.79s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 20.5 K | train
1  | seq_0M_encoder        | Sequential             | 34.1 K | train
2  | seq_2M_encoder        | Sequential             | 13.4 K | train
3  | seq_3M_encoder        | Sequential             | 60.9 K | train
4  | seq_4M_encoder        | Sequential             | 2.9 K  | train
5  |

[I 2025-11-09 16:49:30,675] Trial 96 finished with value: -0.6613686084747314 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.2, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.05, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 96, 'static_encoder_dropout_1': 0.4, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.1, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.45, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 256, 'seq_0M_encoder_dropout_1': 0.05, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 2, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.4, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.2, 'seq_2M_encoder_batch_norm_1': False, 'seq_3M_encoder_n_layers': 2, 'seq_3M_encoder_units_0': 192, 's

`Trainer.fit` stopped: `max_epochs=50` reached.
Best trial: 54. Best value: -0.839481:  98%|█████████▊| 98/100 [2:02:02<03:59, 119.62s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 14.1 K | train
1  | seq_0M_encoder        | Sequential             | 2.4 K  | train
2  | seq_2M_encoder        | Sequential             | 16.6 K | train
3  | seq_3M_encoder        | Sequential             | 60.9 K | train
4  | seq_4M_encoder      

[I 2025-11-09 16:51:01,881] Trial 97 finished with value: -0.7733559608459473 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.30000000000000004, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.5, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.5, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 128, 'static_encoder_dropout_2': 0.5, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 192, 'seq_0M_encoder_dropout_0': 0.5, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 160, 'seq_0M_encoder_dropout_1': 0.1, 'seq_0M_encoder_batch_norm_1': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.30000000000000004, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 128, 'seq_2M_encoder_dropout_1': 0.45, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 32, 

Best trial: 54. Best value: -0.839481:  99%|█████████▉| 99/100 [2:03:52<01:56, 116.81s/it]Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 16.3 K | train
1  | seq_0M_encoder        | Sequential             | 27.1 K | train
2  | seq_2M_encoder        | Sequential             | 16.6 K | train
3  | seq_3M_encoder        | Sequential             | 60.9 K | train
4  | seq_4M_encoder        | Sequential             | 61.5 K | train
5  |

[I 2025-11-09 16:52:52,130] Trial 98 finished with value: -0.7951953411102295 and parameters: {'batch_size': 32, 'reg_loss_weight': 0.2, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.35000000000000003, 'static_encoder_batch_norm_0': False, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.15000000000000002, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 32, 'static_encoder_dropout_2': 0.0, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 1, 'seq_0M_encoder_units_0': 160, 'seq_0M_encoder_dropout_0': 0.45, 'seq_0M_encoder_batch_norm_0': False, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.45, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.4, 'seq_2M_encoder_batch_norm_1': True, 'seq_2M_encoder_units_2': 96, 'seq_2M_encoder_dropout_2': 0.30000000000000004, 'seq_2M_encoder_batch_norm_2': True, 'seq_3M_encoder_n_

Best trial: 54. Best value: -0.839481: 100%|██████████| 100/100 [2:05:08<00:00, 75.08s/it] 

[I 2025-11-09 16:54:07,881] Trial 99 finished with value: -0.7465420961380005 and parameters: {'batch_size': 64, 'reg_loss_weight': 0.2, 'static_encoder_n_layers': 3, 'static_encoder_units_0': 128, 'static_encoder_dropout_0': 0.35000000000000003, 'static_encoder_batch_norm_0': True, 'static_encoder_units_1': 64, 'static_encoder_dropout_1': 0.15000000000000002, 'static_encoder_batch_norm_1': False, 'static_encoder_units_2': 64, 'static_encoder_dropout_2': 0.0, 'static_encoder_batch_norm_2': True, 'seq_0M_encoder_n_layers': 2, 'seq_0M_encoder_units_0': 128, 'seq_0M_encoder_dropout_0': 0.45, 'seq_0M_encoder_batch_norm_0': False, 'seq_0M_encoder_units_1': 192, 'seq_0M_encoder_dropout_1': 0.0, 'seq_0M_encoder_batch_norm_1': True, 'seq_2M_encoder_n_layers': 3, 'seq_2M_encoder_units_0': 64, 'seq_2M_encoder_dropout_0': 0.5, 'seq_2M_encoder_batch_norm_0': True, 'seq_2M_encoder_units_1': 96, 'seq_2M_encoder_dropout_1': 0.4, 'seq_2M_encoder_batch_norm_1': False, 'seq_2M_encoder_units_2': 96, 'seq




### 개별 모델 및 Sequential 모델 성능 평가 함수 정의

In [25]:
@torch.no_grad()
def evaluate_individual_model(model, testloader, model_name):
    """개별 모델의 성능 평가"""
    model.eval()
    
    all_preds = []
    all_targets = []
    
    for batch in testloader:
        if model_name == "Model1":
            x_static, x_0M, x_0M_goutallier, y_2M = batch
            pred = model(x_static, x_0M, x_0M_goutallier)
            all_preds.append(pred)
            all_targets.append(y_2M)
        elif model_name == "Model2":
            x_static, x_0M, x_2M, x_0M_goutallier, y_3M = batch
            pred = model(x_static, x_0M, x_2M, x_0M_goutallier)
            all_preds.append(pred)
            all_targets.append(y_3M)
        elif model_name == "Model3":
            x_static, x_0M, x_2M, x_3M, x_0M_goutallier, y_4M = batch
            pred = model(x_static, x_0M, x_2M, x_3M, x_0M_goutallier)
            all_preds.append(pred)
            all_targets.append(y_4M)
        elif model_name == "Model4":
            x_static, x_0M, x_2M, x_3M, x_4M, x_0M_goutallier, y_combined = batch
            logits, regs, _ = model(x_static, x_0M, x_2M, x_3M, x_4M, x_0M_goutallier)
            y_label = y_combined[:, seq_features_6M:seq_features_6M+1]
            y_reg = torch.cat([y_combined[:, :seq_features_6M], y_combined[:, seq_features_6M+1:]], dim=1)
            
            all_preds.append({'logits': logits, 'regs': regs})
            all_targets.append({'label': y_label, 'reg': y_reg})
    
    if model_name == "Model4":
        # 분류 및 회귀 성능 계산
        all_logits = torch.cat([p['logits'] for p in all_preds], dim=0)
        all_regs = torch.cat([p['regs'] for p in all_preds], dim=0)
        all_labels = torch.cat([t['label'] for t in all_targets], dim=0)
        all_reg_targets = torch.cat([t['reg'] for t in all_targets], dim=0)
        
        mse = F.mse_loss(all_regs, all_reg_targets).item()
        probs = all_logits.sigmoid().flatten()
        labels_int = all_labels.flatten().to(torch.int)
        
        # 분류 성능 계산
        pred_labels = (probs > 0.5).int().cpu().numpy()
        labels_np = labels_int.cpu().numpy()
        
        roc_auc = roc_auc_score(labels_np, probs.cpu().numpy())
        ap = average_precision_score(labels_np, probs.cpu().numpy())
        accuracy = accuracy_score(labels_np, pred_labels)
        
        return {
            'mse': mse,
            'roc_auc': roc_auc,
            'ap': ap,
            'accuracy': accuracy
        }
    else:
        # 회귀 성능만 계산
        all_preds = torch.cat(all_preds, dim=0)
        all_targets = torch.cat(all_targets, dim=0)
        mse = F.mse_loss(all_preds, all_targets).item()
        mae = F.l1_loss(all_preds, all_targets).item()
        
        return {
            'mse': mse,
            'mae': mae
        }

@torch.no_grad()
def evaluate_sequential_model(sequential_model, testset_model1):
    """Sequential 모델의 성능 평가 (0M 입력만으로 전체 시계열 예측)"""
    sequential_model.eval()
    
    all_pred_2M = []
    all_pred_3M = []
    all_pred_4M = []
    all_pred_6M = []
    all_pred_y_logits = []
    all_pred_6M_goutallier = []
    
    all_true_2M = []
    all_true_3M = []
    all_true_4M = []
    all_true_6M = []
    all_true_y = []
    all_true_6M_goutallier = []
    
    for i in range(len(testset_model1)):
        x_static, x_0M, x_0M_goutallier, y_2M = testset_model1[i]
        x_static = x_static.unsqueeze(0)
        x_0M = x_0M.unsqueeze(0)
        x_0M_goutallier = x_0M_goutallier.unsqueeze(0)
        
        # Sequential 모델 예측
        predictions = sequential_model(x_static, x_0M, x_0M_goutallier)
        
        all_pred_2M.append(predictions['pred_2M'])
        all_pred_3M.append(predictions['pred_3M'])
        all_pred_4M.append(predictions['pred_4M'])
        all_pred_6M.append(predictions['pred_6M'])
        all_pred_y_logits.append(predictions['pred_y_logits'])
        all_pred_6M_goutallier.append(predictions['pred_6M_goutallier'])
        
        # 실제 값 추출
        _, _, _, _, y_3M = testset_model2[i]
        _, _, _, _, _, y_4M = testset_model3[i]
        _, _, _, _, _, _, y_combined = testset_model4[i]
        
        y_6M = y_combined[:seq_features_6M]
        y_label = y_combined[seq_features_6M:seq_features_6M+1]
        y_6M_goutallier = y_combined[seq_features_6M+1:]
        
        all_true_2M.append(y_2M)
        all_true_3M.append(y_3M)
        all_true_4M.append(y_4M)
        all_true_6M.append(y_6M)
        all_true_y.append(y_label)
        all_true_6M_goutallier.append(y_6M_goutallier)
    
    # 텐서로 변환
    pred_2M = torch.cat(all_pred_2M, dim=0)
    pred_3M = torch.cat(all_pred_3M, dim=0)
    pred_4M = torch.cat(all_pred_4M, dim=0)
    pred_6M = torch.cat(all_pred_6M, dim=0)
    pred_y_logits = torch.cat(all_pred_y_logits, dim=0)
    pred_6M_goutallier = torch.cat(all_pred_6M_goutallier, dim=0)
    
    true_2M = torch.stack(all_true_2M)
    true_3M = torch.stack(all_true_3M)
    true_4M = torch.stack(all_true_4M)
    true_6M = torch.stack(all_true_6M)
    true_y = torch.stack(all_true_y)
    true_6M_goutallier = torch.stack(all_true_6M_goutallier)
    
    # 성능 계산
    mse_2M = F.mse_loss(pred_2M, true_2M).item()
    mse_3M = F.mse_loss(pred_3M, true_3M).item()
    mse_4M = F.mse_loss(pred_4M, true_4M).item()
    mse_6M = F.mse_loss(pred_6M, true_6M).item()
    mse_6M_goutallier = F.mse_loss(pred_6M_goutallier, true_6M_goutallier).item()
    
    mae_2M = F.l1_loss(pred_2M, true_2M).item()
    mae_3M = F.l1_loss(pred_3M, true_3M).item()
    mae_4M = F.l1_loss(pred_4M, true_4M).item()
    mae_6M = F.l1_loss(pred_6M, true_6M).item()
    mae_6M_goutallier = F.l1_loss(pred_6M_goutallier, true_6M_goutallier).item()
    
    # 분류 성능
    pred_y_probs = pred_y_logits.sigmoid().flatten()
    true_y_int = true_y.flatten().to(torch.int)
    
    # 분류 예측 및 성능 계산
    pred_labels = (pred_y_probs > 0.5).int().cpu().numpy()
    labels_np = true_y_int.cpu().numpy()
    
    roc_auc = roc_auc_score(labels_np, pred_y_probs.cpu().numpy())
    ap = average_precision_score(labels_np, pred_y_probs.cpu().numpy())
    accuracy = accuracy_score(labels_np, pred_labels)
    
    return {
        'mse_2M': mse_2M,
        'mse_3M': mse_3M,
        'mse_4M': mse_4M,
        'mse_6M': mse_6M,
        'mse_6M_goutallier': mse_6M_goutallier,
        'mae_2M': mae_2M,
        'mae_3M': mae_3M,
        'mae_4M': mae_4M,
        'mae_6M': mae_6M,
        'mae_6M_goutallier': mae_6M_goutallier,
        'roc_auc': roc_auc,
        'ap': ap,
        'accuracy': accuracy
    }

print("성능 평가 함수 정의 완료")


성능 평가 함수 정의 완료


### 최적화된 파라미터로 최종 모델 생성 및 학습

In [None]:
def create_final_model_from_study(study, model_class, model_kwargs):
    """최적화된 파라미터로 최종 모델 생성"""
    best_trial = study.best_trial
    model = model_class(trial=best_trial, **model_kwargs)
    return model, best_trial.params

# 최적화된 모델 생성
print("\n" + "=" * 80)
print("최적화된 파라미터로 최종 모델 생성")
print("=" * 80)

# Model 1 최종 모델
print("\n[Model 1] 최종 모델 생성 중...")
final_model1, best_params1 = create_final_model_from_study(
    study1,
    OptimizedSequentialMLP1,
    {
        'static_features': static_features,
        'seq_0M_features': seq_features_0M,
        'goutallier_0M_features': goutallier_features_0M,
        'out_features_2M': seq_features_2M
    }
)

# Model 2 최종 모델
print("[Model 2] 최종 모델 생성 중...")
final_model2, best_params2 = create_final_model_from_study(
    study2,
    OptimizedSequentialMLP2,
    {
        'static_features': static_features,
        'seq_0M_features': seq_features_0M,
        'seq_2M_features': seq_features_2M,
        'goutallier_0M_features': goutallier_features_0M,
        'out_features_3M': seq_features_3M
    }
)

# Model 3 최종 모델
print("[Model 3] 최종 모델 생성 중...")
final_model3, best_params3 = create_final_model_from_study(
    study3,
    OptimizedSequentialMLP3,
    {
        'static_features': static_features,
        'seq_0M_features': seq_features_0M,
        'seq_2M_features': seq_features_2M,
        'seq_3M_features': seq_features_3M,
        'goutallier_0M_features': goutallier_features_0M,
        'out_features_4M': seq_features_4M
    }
)

# Model 4 최종 모델
print("[Model 4] 최종 모델 생성 중...")
final_model4, best_params4 = create_final_model_from_study(
    study4,
    OptimizedSequentialMLP4,
    {
        'static_features': static_features,
        'seq_0M_features': seq_features_0M,
        'seq_2M_features': seq_features_2M,
        'seq_3M_features': seq_features_3M,
        'seq_4M_features': seq_features_4M,
        'goutallier_0M_features': goutallier_features_0M,
        'out_features_total': seq_features_6M + 1 + goutallier_features_6M
    }
)

print("\n최종 모델 생성 완료!")

# 최종 모델 학습
print("\n" + "=" * 80)
print("최종 모델 학습 시작")
print("=" * 80)

# 최적 배치 크기 추출
batch_size1 = best_params1.get('batch_size', 64)
batch_size2 = best_params2.get('batch_size', 64)
batch_size3 = best_params3.get('batch_size', 64)
batch_size4 = best_params4.get('batch_size', 64)

# Model 1 학습
print("\n[Model 1] 학습 시작...")
trainloader1_final = DataLoader(trainset_model1, batch_size=batch_size1, shuffle=True, pin_memory=True)
valloader1_final = DataLoader(valset_model1, batch_size=batch_size1)
testloader1_final = DataLoader(testset_model1, batch_size=batch_size1)

trainer1_final = L.Trainer(
    max_epochs=100,
    gradient_clip_val=1.0,
    callbacks=[
        ModelCheckpoint(monitor='val/loss', mode='min', save_top_k=1, save_last=False, filename='final-model1-best', dirpath=CHECKPOINT_DIR),
        CleanupCheckpointCallback(checkpoint_dir=CHECKPOINT_DIR, filename_prefix='final-model1-best'),
        EarlyStopping(monitor='val/loss', mode='min', patience=15)
    ]
)

trainer1_final.fit(final_model1, trainloader1_final, valloader1_final)
test_result1_final = trainer1_final.test(final_model1, testloader1_final)
print(f"[Model 1] 학습 완료 - Test MSE: {test_result1_final[0]['test/mse']:.6f}")

# Model 2 학습
print("\n[Model 2] 학습 시작...")
trainloader2_final = DataLoader(trainset_model2, batch_size=batch_size2, shuffle=True, pin_memory=True)
valloader2_final = DataLoader(valset_model2, batch_size=batch_size2)
testloader2_final = DataLoader(testset_model2, batch_size=batch_size2)

trainer2_final = L.Trainer(
    max_epochs=100,
    gradient_clip_val=1.0,
    callbacks=[
        ModelCheckpoint(monitor='val/loss', mode='min', save_top_k=1, save_last=False, filename='final-model2-best', dirpath=CHECKPOINT_DIR),
        CleanupCheckpointCallback(checkpoint_dir=CHECKPOINT_DIR, filename_prefix='final-model2-best'),
        EarlyStopping(monitor='val/loss', mode='min', patience=15)
    ]
)

trainer2_final.fit(final_model2, trainloader2_final, valloader2_final)
test_result2_final = trainer2_final.test(final_model2, testloader2_final)
print(f"[Model 2] 학습 완료 - Test MSE: {test_result2_final[0]['test/mse']:.6f}")

# Model 3 학습
print("\n[Model 3] 학습 시작...")
trainloader3_final = DataLoader(trainset_model3, batch_size=batch_size3, shuffle=True, pin_memory=True)
valloader3_final = DataLoader(valset_model3, batch_size=batch_size3)
testloader3_final = DataLoader(testset_model3, batch_size=batch_size3)

trainer3_final = L.Trainer(
    max_epochs=100,
    gradient_clip_val=1.0,
    callbacks=[
        ModelCheckpoint(monitor='val/loss', mode='min', save_top_k=1, save_last=False, filename='final-model3-best', dirpath=CHECKPOINT_DIR),
        CleanupCheckpointCallback(checkpoint_dir=CHECKPOINT_DIR, filename_prefix='final-model3-best'),
        EarlyStopping(monitor='val/loss', mode='min', patience=15)
    ]
)

trainer3_final.fit(final_model3, trainloader3_final, valloader3_final)
test_result3_final = trainer3_final.test(final_model3, testloader3_final)
print(f"[Model 3] 학습 완료 - Test MSE: {test_result3_final[0]['test/mse']:.6f}")

# Model 4 학습
print("\n[Model 4] 학습 시작...")
trainloader4_final = DataLoader(trainset_model4, batch_size=batch_size4, shuffle=True, pin_memory=True)
valloader4_final = DataLoader(valset_model4, batch_size=batch_size4)
testloader4_final = DataLoader(testset_model4, batch_size=batch_size4)

trainer4_final = L.Trainer(
    max_epochs=100,
    gradient_clip_val=1.0,
    callbacks=[
        ModelCheckpoint(monitor='val/roc', mode='max', save_top_k=1, save_last=False, filename='final-model4-best', dirpath=CHECKPOINT_DIR),
        CleanupCheckpointCallback(checkpoint_dir=CHECKPOINT_DIR, filename_prefix='final-model4-best'),
        EarlyStopping(monitor='val/roc', mode='max', patience=15)
    ]
)

trainer4_final.fit(final_model4, trainloader4_final, valloader4_final)
test_result4_final = trainer4_final.test(final_model4, testloader4_final)
print(f"[Model 4] 학습 완료 - Test ROC: {test_result4_final[0]['test/roc']:.6f}, Test AP: {test_result4_final[0]['test/ap']:.6f}")

print("\n" + "=" * 80)
print("모든 최종 모델 학습 완료!")
print("=" * 80)


Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
/data/miniconda3/envs/arcr/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/logger_connector.py:76: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `lightning.pytorch` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type           


최적화된 파라미터로 최종 모델 생성

[Model 1] 최종 모델 생성 중...
[Model 2] 최종 모델 생성 중...
[Model 3] 최종 모델 생성 중...
[Model 4] 최종 모델 생성 중...

최종 모델 생성 완료!

최종 모델 학습 시작

[Model 1] 학습 시작...
Epoch 0:  25%|██▍       | 14/57 [00:00<00:00, 97.18it/s, v_num=0, train/loss_step=1.030]

Epoch 27: 100%|██████████| 57/57 [00:00<00:00, 87.70it/s, v_num=0, train/loss_step=0.667, val/loss=1.030, train/loss_epoch=0.695]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
/data/miniconda3/envs/arcr/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:433: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=95` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 110.19it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test/loss           0.9623681902885437
        test/mse            0.9623681902885437
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True


[Model 1] 학습 완료 - Test MSE: 0.962368

[Model 2] 학습 시작...


TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 3.6 K  | train
1 | seq_0M_encoder        | Sequential       | 960    | train
2 | seq_2M_encoder        | Sequential       | 30.4 K | train
3 | goutallier_0M_encoder | Sequential       | 14.0 K | train
4 | output_head           | Sequential       | 109 K  | train
5 | train_mse             | MeanSquaredError | 0      | train
6 | val_mse               | MeanSquaredError | 0      | train
7 | test_mse              | MeanSquaredError | 0      | train
-------------------------------------------------------------------
158 K     Trainable params
0         Non-trainable params
158 K     Total params
0.632     Total estimated model params size (MB)
44        Modules in train mode
0         Modules in eval mode


Epoch 55: 100%|██████████| 225/225 [00:02<00:00, 94.86it/s, v_num=1, train/loss_step=0.792, val/loss=0.823, train/loss_epoch=0.496] 


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]


Testing DataLoader 0: 100%|██████████| 4/4 [00:00<00:00, 182.29it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test/loss           0.9028714895248413
        test/mse            0.9028714895248413
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.


[Model 2] 학습 완료 - Test MSE: 0.902871

[Model 3] 학습 시작...


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name                  | Type             | Params | Mode 
-------------------------------------------------------------------
0 | static_encoder        | Sequential       | 9.0 K  | train
1 | seq_0M_encoder        | Sequential       | 32.5 K | train
2 | seq_2M_encoder        | Sequential       | 4.8 K  | train
3 | seq_3M_encoder        | Sequential       | 2.4 K  | train
4 | goutallier_0M_encoder | Sequential       | 7.4 K  | train
5 | output_head           | Sequential       | 172 K  | train
6 | train_mse             | MeanSquaredError | 0      | train
7 | val_mse               | MeanSquaredError | 0      | train
8 | test_mse              | MeanSquaredError | 0      | train
-------------------------------------------------------------------
228 K     Trainable params
0         Non-trainable params
228 K     Total params
0.913     Total estimated 

Epoch 99: 100%|██████████| 113/113 [00:01<00:00, 65.41it/s, v_num=2, train/loss_step=0.596, val/loss=0.896, train/loss_epoch=0.459]

`Trainer.fit` stopped: `max_epochs=100` reached.


Epoch 99: 100%|██████████| 113/113 [00:01<00:00, 65.26it/s, v_num=2, train/loss_step=0.596, val/loss=0.896, train/loss_epoch=0.459]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]



Testing DataLoader 0: 100%|██████████| 2/2 [00:00<00:00, 124.96it/s]

Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

   | Name                  | Type                   | Params | Mode 
--------------------------------------------------------------------------
0  | static_encoder        | Sequential             | 18.4 K | train
1  | seq_0M_encoder        | Sequential             | 38.9 K | train
2  | seq_2M_encoder        | Sequential             | 13.4 K | train
3  | seq_3M_encoder        | Sequential             | 49.6 K | train
4  | seq_4M_encoder        | Sequential             | 61.5 K | train
5  | goutallier_0M_encoder | Sequential             | 4.8 K  | train
6  | clshead             


────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test/loss           1.3153107166290283
        test/mse            1.3153107166290283
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
[Model 3] 학습 완료 - Test MSE: 1.315311

[Model 4] 학습 시작...
Epoch 45: 100%|██████████| 113/113 [00:02<00:00, 55.39it/s, v_num=3, train/loss_step=0.687, val/loss=0.665, train/loss_epoch=0.669]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]


Testing DataLoader 0: 100%|██████████| 2/2 [00:00<00:00, 81.51it/s] 
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
         test/ap            0.7708374261856079
      test/clf_loss         0.5909584760665894
        test/loss            0.668912410736084
        test/mse            1.2943609952926636
      test/reg_loss         0.38976967334747314
        test/roc            0.7491999864578247
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
[Model 4] 학습 완료 - Test ROC: 0.749200, Test AP: 0.770837

모든 최종 모델 학습 완료!


### Sequential 모델 연결 및 성능 평가

In [27]:
print("\n" + "=" * 80)
print("최적화된 Sequential 모델 생성 및 성능 평가")
print("=" * 80)

# Sequential 모델 생성
sequential_model = SequentialModel(final_model1, final_model2, final_model3, final_model4)
print("Sequential 모델 생성 완료")

# Test DataLoader 생성
batch_size = 64
testloader1 = DataLoader(testset_model1, batch_size=batch_size)
testloader2 = DataLoader(testset_model2, batch_size=batch_size)
testloader3 = DataLoader(testset_model3, batch_size=batch_size)
testloader4 = DataLoader(testset_model4, batch_size=batch_size)

# 개별 모델 성능 평가
print("\n" + "-" * 80)
print("개별 모델 성능 평가")
print("-" * 80)

print("\n[Model 1] 평가 중...")
model1_results = evaluate_individual_model(final_model1, testloader1, "Model1")
print(f"  MSE: {model1_results['mse']:.6f}")
print(f"  MAE: {model1_results['mae']:.6f}")

print("\n[Model 2] 평가 중...")
model2_results = evaluate_individual_model(final_model2, testloader2, "Model2")
print(f"  MSE: {model2_results['mse']:.6f}")
print(f"  MAE: {model2_results['mae']:.6f}")

print("\n[Model 3] 평가 중...")
model3_results = evaluate_individual_model(final_model3, testloader3, "Model3")
print(f"  MSE: {model3_results['mse']:.6f}")
print(f"  MAE: {model3_results['mae']:.6f}")

print("\n[Model 4] 평가 중...")
model4_results = evaluate_individual_model(final_model4, testloader4, "Model4")
print(f"  MSE: {model4_results['mse']:.6f}")
print(f"  ROC AUC: {model4_results['roc_auc']:.6f}")
print(f"  AP: {model4_results['ap']:.6f}")
print(f"  Accuracy: {model4_results['accuracy']:.6f}")

# Sequential 모델 성능 평가
print("\n" + "-" * 80)
print("Sequential 모델 성능 평가 (0M 입력만으로 전체 시계열 예측)")
print("-" * 80)

sequential_results = evaluate_sequential_model(sequential_model, testset_model1)

print("\n[회귀 성능 - MSE]")
print(f"  2M 예측 MSE: {sequential_results['mse_2M']:.6f}")
print(f"  3M 예측 MSE: {sequential_results['mse_3M']:.6f}")
print(f"  4M 예측 MSE: {sequential_results['mse_4M']:.6f}")
print(f"  6M 예측 MSE: {sequential_results['mse_6M']:.6f}")
print(f"  6M Goutallier 예측 MSE: {sequential_results['mse_6M_goutallier']:.6f}")

print("\n[회귀 성능 - MAE]")
print(f"  2M 예측 MAE: {sequential_results['mae_2M']:.6f}")
print(f"  3M 예측 MAE: {sequential_results['mae_3M']:.6f}")
print(f"  4M 예측 MAE: {sequential_results['mae_4M']:.6f}")
print(f"  6M 예측 MAE: {sequential_results['mae_6M']:.6f}")
print(f"  6M Goutallier 예측 MAE: {sequential_results['mae_6M_goutallier']:.6f}")

print("\n[분류 성능]")
print(f"  ROC AUC: {sequential_results['roc_auc']:.6f}")
print(f"  AP (Average Precision): {sequential_results['ap']:.6f}")
print(f"  Accuracy: {sequential_results['accuracy']:.6f}")

# 성능 요약
print("\n" + "=" * 80)
print("성능 평가 요약")
print("=" * 80)
print("\n개별 모델 성능:")
print(f"  Model 1 (2M 예측): MSE={model1_results['mse']:.6f}, MAE={model1_results['mae']:.6f}")
print(f"  Model 2 (3M 예측): MSE={model2_results['mse']:.6f}, MAE={model2_results['mae']:.6f}")
print(f"  Model 3 (4M 예측): MSE={model3_results['mse']:.6f}, MAE={model3_results['mae']:.6f}")
print(f"  Model 4 (6M+분류): MSE={model4_results['mse']:.6f}, ROC={model4_results['roc_auc']:.6f}, AP={model4_results['ap']:.6f}, Accuracy={model4_results['accuracy']:.6f}")
print("\nSequential 모델 성능:")
print(f"  전체 파이프라인: ROC={sequential_results['roc_auc']:.6f}, AP={sequential_results['ap']:.6f}, Accuracy={sequential_results['accuracy']:.6f}")
print(f"  중간 단계 예측: 2M MSE={sequential_results['mse_2M']:.6f}, 3M MSE={sequential_results['mse_3M']:.6f}, 4M MSE={sequential_results['mse_4M']:.6f}, 6M MSE={sequential_results['mse_6M']:.6f}")

print("\n" + "=" * 80)
print("모든 평가 완료!")
print("=" * 80)



최적화된 Sequential 모델 생성 및 성능 평가
Sequential 모델 생성 완료

--------------------------------------------------------------------------------
개별 모델 성능 평가
--------------------------------------------------------------------------------

[Model 1] 평가 중...
  MSE: 0.962368
  MAE: 0.778129

[Model 2] 평가 중...
  MSE: 0.902871
  MAE: 0.674036

[Model 3] 평가 중...
  MSE: 1.315310
  MAE: 0.773468

[Model 4] 평가 중...
  MSE: 1.294361
  ROC AUC: 0.749200
  AP: 0.770837
  Accuracy: 0.680000

--------------------------------------------------------------------------------
Sequential 모델 성능 평가 (0M 입력만으로 전체 시계열 예측)
--------------------------------------------------------------------------------

[회귀 성능 - MSE]
  2M 예측 MSE: 0.962368
  3M 예측 MSE: 1.034777
  4M 예측 MSE: 1.688876
  6M 예측 MSE: 1.741515
  6M Goutallier 예측 MSE: 1.023012

[회귀 성능 - MAE]
  2M 예측 MAE: 0.778129
  3M 예측 MAE: 0.736314
  4M 예측 MAE: 0.890636
  6M 예측 MAE: 0.934148
  6M Goutallier 예측 MAE: 0.470817

[분류 성능]
  ROC AUC: 0.574000
  AP (Average Precision):