In [1]:
import pandas as pd
import numpy as np

In [2]:
import matplotlib.pyplot as plt

In [3]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import StackingRegressor
from sklearn.linear_model import Ridge
from catboost import CatBoostRegressor
import optuna

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from sklearn.linear_model import ElasticNet, HuberRegressor, Lasso, BayesianRidge
from sklearn.neural_network import MLPRegressor
from sklearn.inspection import permutation_importance
from sklearn.ensemble import GradientBoostingRegressor, BaggingRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.svm import SVR
from pytorch_tabnet.tab_model import TabNetRegressor
from sklearn.base import BaseEstimator, RegressorMixin

In [5]:
#model training, base models
train_processed = pd.read_csv('train_processed.csv')
test_processed = pd.read_csv('test_processed.csv')

# define features and target variable
X = train_processed.drop(['SalePrice'], axis=1)
y = train_processed['SalePrice']  # log transformed
X_test = test_processed.copy()
test = pd.read_csv('test.csv')
test_ID = test['Id']  # save test IDs for submission

# split train and validation sets,这里需要分层划分,以避免famd出现问题
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# 默认参数基模型训练（使用降维后数据）
# Random Forest
rf = RandomForestRegressor(random_state=42)
rf.fit(X_train, y_train)
rf_pred = rf.predict(X_val)
rf_rmse = np.sqrt(mean_squared_error(y_val, rf_pred))
print(f"RandomForest RMSE (default): {rf_rmse:.5f}")

# XGBoost
xgb = XGBRegressor(random_state=42)
xgb.fit(X_train, y_train)
xgb_pred = xgb.predict(X_val)
xgb_rmse = np.sqrt(mean_squared_error(y_val, xgb_pred))
print(f"XGBoost RMSE (default): {xgb_rmse:.5f}")

# LightGBM
lgb = LGBMRegressor(random_state=42,verbose=-1)
lgb.fit(X_train, y_train)
lgb_pred = lgb.predict(X_val)
lgb_rmse = np.sqrt(mean_squared_error(y_val, lgb_pred))
print(f"LightGBM RMSE (default): {lgb_rmse:.5f}")

# CatBoost（降维后无需 categorical_features）
# define categorical features for CatBoost
categorical_features = ['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 
                       'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 
                       'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 
                       'Exterior2nd', 'MasVnrType', 'Foundation', 'BsmtFinType1', 
                       'BsmtFinType2', 'Heating', 'CentralAir', 'Electrical', 'GarageType', 
                       'GarageFinish', 'PavedDrive', 'Fence', 'MiscFeature', 'SaleType', 
                       'SaleCondition', 'Season']
catboost = CatBoostRegressor(random_state=42, verbose=0, cat_features=categorical_features)
catboost.fit(X_train, y_train, eval_set=(X_val, y_val), early_stopping_rounds=50)
catboost_pred = catboost.predict(X_val)
catboost_rmse = np.sqrt(mean_squared_error(y_val, catboost_pred))
print(f"CatBoost RMSE (default): {catboost_rmse:.5f}")

# enet
enet = ElasticNet(random_state=42)
enet.fit(X_train, y_train)
enet_pred = enet.predict(X_val)
enet_rmse = np.sqrt(mean_squared_error(y_val, enet_pred))
print(f"ElasticNet RMSE (default): {enet_rmse:.5f}")

# HuberRegressor
huber = HuberRegressor()
huber.fit(X_train, y_train)
huber_pred = huber.predict(X_val)
huber_rmse = np.sqrt(mean_squared_error(y_val, huber_pred))
print(f"HuberRegressor RMSE (default): {huber_rmse:.5f}")

# MLPRegressor
mlp = MLPRegressor(random_state=42, max_iter=1000)
mlp.fit(X_train, y_train)
mlp_pred = mlp.predict(X_val)
mlp_rmse = np.sqrt(mean_squared_error(y_val, mlp_pred))
print(f"MLPRegressor RMSE (default): {mlp_rmse:.5f}")

# KernelRidge
kr = KernelRidge()
kr.fit(X_train, y_train)
kr_pred = kr.predict(X_val)
kr_rmse = np.sqrt(mean_squared_error(y_val, kr_pred))
print(f"KernelRidge RMSE (default): {kr_rmse:.5f}")

# SVR
svr = SVR()
svr.fit(X_train, y_train)
svr_pred = svr.predict(X_val)
svr_rmse = np.sqrt(mean_squared_error(y_val, svr_pred))
print(f"SVR RMSE (default): {svr_rmse:.5f}")

# Ridge
ridge = Ridge()
ridge.fit(X_train, y_train)
ridge_pred = ridge.predict(X_val)
ridge_rmse = np.sqrt(mean_squared_error(y_val, ridge_pred))
print(f"Ridge RMSE (default): {ridge_rmse:.5f}")

# Lasso
lasso = Lasso()
lasso.fit(X_train, y_train)
lasso_pred = lasso.predict(X_val)
lasso_rmse = np.sqrt(mean_squared_error(y_val, lasso_pred))
print(f"Lasso RMSE (default): {lasso_rmse:.5f}")

# GBR
gbr = GradientBoostingRegressor()
gbr.fit(X_train, y_train)
gbr_pred = gbr.predict(X_val)
gbr_rmse = np.sqrt(mean_squared_error(y_val, gbr_pred))
print(f"GBR RMSE (default): {gbr_rmse:.5f}")

# bagging
bagging = BaggingRegressor()
bagging.fit(X_train, y_train)
bagging_pred = bagging.predict(X_val)
bagging_rmse = np.sqrt(mean_squared_error(y_val, bagging_pred))
print(f"Bagging RMSE (default): {bagging_rmse:.5f}")

# TabNet
tabnet = TabNetRegressor()
X_train_np = X_train.values
y_train_np = y_train.values.reshape(-1,1)
X_val_np = X_val.values
tabnet.fit(X_train_np, y_train_np)
tabnet_pred = tabnet.predict(X_val_np)
tabnet_rmse = np.sqrt(mean_squared_error(y_val, tabnet_pred))
print(f"TabNet RMSE (default): {tabnet_rmse:.5f}")


# BayesianRidge
bayesian_ridge = BayesianRidge()
bayesian_ridge.fit(X_train, y_train)
bayesian_ridge_pred = bayesian_ridge.predict(X_val)
bayesian_ridge_rmse = np.sqrt(mean_squared_error(y_val, bayesian_ridge_pred))
print(f"BayesianRidge RMSE (default): {bayesian_ridge_rmse:.5f}")


RandomForest RMSE (default): 0.14926
XGBoost RMSE (default): 0.14401
LightGBM RMSE (default): 0.14075
CatBoost RMSE (default): 0.12099
ElasticNet RMSE (default): 0.39872
HuberRegressor RMSE (default): 0.47456


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


MLPRegressor RMSE (default): 0.39593
KernelRidge RMSE (default): 0.19165
SVR RMSE (default): 0.12493
Ridge RMSE (default): 0.12647
Lasso RMSE (default): 0.39872
GBR RMSE (default): 0.12996
Bagging RMSE (default): 0.14829




epoch 0  | loss: 135.58453|  0:00:00s
epoch 1  | loss: 126.61742|  0:00:00s
epoch 2  | loss: 118.36626|  0:00:00s
epoch 3  | loss: 110.15644|  0:00:00s
epoch 4  | loss: 102.71006|  0:00:00s
epoch 5  | loss: 95.35297|  0:00:00s
epoch 6  | loss: 88.73606|  0:00:00s
epoch 7  | loss: 81.94154|  0:00:00s
epoch 8  | loss: 72.13223|  0:00:00s
epoch 9  | loss: 64.87029|  0:00:00s
epoch 10 | loss: 59.12515|  0:00:00s
epoch 11 | loss: 50.73232|  0:00:00s
epoch 12 | loss: 43.19045|  0:00:00s
epoch 13 | loss: 37.31705|  0:00:00s
epoch 14 | loss: 30.85492|  0:00:00s
epoch 15 | loss: 24.46004|  0:00:00s
epoch 16 | loss: 20.09502|  0:00:00s
epoch 17 | loss: 15.65094|  0:00:01s
epoch 18 | loss: 11.24824|  0:00:01s
epoch 19 | loss: 8.46251 |  0:00:01s
epoch 20 | loss: 5.59406 |  0:00:01s
epoch 21 | loss: 4.66188 |  0:00:01s
epoch 22 | loss: 4.18116 |  0:00:01s
epoch 23 | loss: 4.65246 |  0:00:01s
epoch 24 | loss: 5.34922 |  0:00:01s
epoch 25 | loss: 5.47355 |  0:00:01s
epoch 26 | loss: 5.46906 |  0:00:

In [None]:
n_trials = 500
n_trials_fast = 2000
n_trials_slow = 300
n_trials_verySLOW = 100

In [8]:
# RandomForest optuna 调优
def rf_objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'max_depth': trial.suggest_int('max_depth', 3, 20),
        'min_samples_split': trial.suggest_int('min_samples_split', 2, 10),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 4),
        'random_state': 42
    }
    model = RandomForestRegressor(**params)
    model.fit(X_train, y_train)
    pred = model.predict(X_val)
    return np.sqrt(mean_squared_error(y_val, pred))

rf_study = optuna.create_study(direction='minimize')
rf_study.optimize(rf_objective, n_trials=50)  # RF 调优较慢，减少 trials
rf_best_params = rf_study.best_params
print("Best RandomForest params (Optuna):", rf_best_params)

# 重训练 Random Forest
rf_best = RandomForestRegressor(**rf_best_params, random_state=42)
rf_best.fit(X_train, y_train)
rf_best_pred = rf_best.predict(X_val)
rf_best_rmse = np.sqrt(mean_squared_error(y_val, rf_best_pred))
print(f"Optuna tuned RandomForest RMSE (reduced): {rf_best_rmse:.5f}")

[I 2025-08-17 13:55:55,686] A new study created in memory with name: no-name-3bb17441-ce4a-433f-bb12-301962a9d4ea
[I 2025-08-17 13:56:09,209] Trial 0 finished with value: 0.14893528645314605 and parameters: {'n_estimators': 846, 'max_depth': 13, 'min_samples_split': 9, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.14893528645314605.
[I 2025-08-17 13:56:14,684] Trial 1 finished with value: 0.1491181422699895 and parameters: {'n_estimators': 353, 'max_depth': 10, 'min_samples_split': 10, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.14893528645314605.
[I 2025-08-17 13:56:26,831] Trial 2 finished with value: 0.1497437152381849 and parameters: {'n_estimators': 783, 'max_depth': 14, 'min_samples_split': 2, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.14893528645314605.
[I 2025-08-17 13:56:35,290] Trial 3 finished with value: 0.1561551611268414 and parameters: {'n_estimators': 773, 'max_depth': 6, 'min_samples_split': 4, 'min_samples_leaf': 4}. Best is trial 0 with v

Best RandomForest params (Optuna): {'n_estimators': 634, 'max_depth': 17, 'min_samples_split': 3, 'min_samples_leaf': 1}
Optuna tuned RandomForest RMSE (reduced): 0.14611


In [9]:
# 使用 Optuna 调优（每个模型单独定义 objective）
# XGBoost Optuna 调优
def xgb_objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1),
        'max_depth': trial.suggest_int('max_depth', 3, 7),
        'subsample': trial.suggest_float('subsample', 0.7, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.7, 1.0),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 5),
        'random_state': 42
    }
    model = XGBRegressor(**params)
    model.fit(X_train, y_train)
    pred = model.predict(X_val)
    return np.sqrt(mean_squared_error(y_val, pred))

xgb_study = optuna.create_study(direction='minimize')
xgb_study.optimize(xgb_objective, n_trials=n_trials)
xgb_best_params = xgb_study.best_params
print("Best XGBoost params (Optuna):", xgb_best_params)

# 重训练 XGBoost
xgb_best = XGBRegressor(**xgb_best_params, random_state=42)
xgb_best.fit(X_train, y_train)
xgb_best_pred = xgb_best.predict(X_val)
xgb_best_rmse = np.sqrt(mean_squared_error(y_val, xgb_best_pred))
print(f"Optuna tuned XGBoost RMSE (reduced): {xgb_best_rmse:.5f}")

[I 2025-08-17 14:04:42,413] A new study created in memory with name: no-name-f6bc5570-1056-4787-a759-dfa89927e34c
[I 2025-08-17 14:04:43,558] Trial 0 finished with value: 0.1349737097698185 and parameters: {'n_estimators': 621, 'learning_rate': 0.046288569393954954, 'max_depth': 7, 'subsample': 0.9272320249404369, 'colsample_bytree': 0.9822661503783987, 'min_child_weight': 3}. Best is trial 0 with value: 0.1349737097698185.
[I 2025-08-17 14:04:44,395] Trial 1 finished with value: 0.13312569193503435 and parameters: {'n_estimators': 585, 'learning_rate': 0.039074972328478286, 'max_depth': 6, 'subsample': 0.956428210449832, 'colsample_bytree': 0.9459690469951276, 'min_child_weight': 2}. Best is trial 1 with value: 0.13312569193503435.
[I 2025-08-17 14:04:45,090] Trial 2 finished with value: 0.12300729217740373 and parameters: {'n_estimators': 944, 'learning_rate': 0.015641567707807058, 'max_depth': 3, 'subsample': 0.8943833750045844, 'colsample_bytree': 0.8284706668835706, 'min_child_wei

Best XGBoost params (Optuna): {'n_estimators': 988, 'learning_rate': 0.019794274040249325, 'max_depth': 3, 'subsample': 0.7189765754097851, 'colsample_bytree': 0.710355096448193, 'min_child_weight': 2}
Optuna tuned XGBoost RMSE (reduced): 0.11913


In [10]:
# LightGBM Optuna 调优
def lgb_objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1),
        'max_depth': trial.suggest_int('max_depth', 3, 7),
        'num_leaves': trial.suggest_int('num_leaves', 15, 63),
        'subsample': trial.suggest_float('subsample', 0.7, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.7, 1.0),
        'random_state': 42
    }
    model = LGBMRegressor(**params)
    model.fit(X_train, y_train)
    pred = model.predict(X_val)
    return np.sqrt(mean_squared_error(y_val, pred))

lgb_study = optuna.create_study(direction='minimize')
lgb_study.optimize(lgb_objective, n_trials=n_trials)
lgb_best_params = lgb_study.best_params
print("Best LightGBM params (Optuna):", lgb_best_params)

# 重训练 LightGBM
lgb_best = LGBMRegressor(**lgb_best_params, random_state=42,verbose=-1)
lgb_best.fit(X_train, y_train)
lgb_best_pred = lgb_best.predict(X_val)
lgb_best_rmse = np.sqrt(mean_squared_error(y_val, lgb_best_pred))
print(f"Optuna tuned LightGBM RMSE (reduced): {lgb_best_rmse:.5f}")

[I 2025-08-17 14:09:43,867] A new study created in memory with name: no-name-5f6fa6ed-b82f-4f73-9160-360d026c7a52
[I 2025-08-17 14:09:44,017] Trial 0 finished with value: 0.1358173270949269 and parameters: {'n_estimators': 307, 'learning_rate': 0.06585790912933305, 'max_depth': 6, 'num_leaves': 35, 'subsample': 0.8315599488207487, 'colsample_bytree': 0.8891784470493316}. Best is trial 0 with value: 0.1358173270949269.
[I 2025-08-17 14:09:44,342] Trial 1 finished with value: 0.1371941518160453 and parameters: {'n_estimators': 650, 'learning_rate': 0.03145844471764888, 'max_depth': 7, 'num_leaves': 43, 'subsample': 0.80612289760603, 'colsample_bytree': 0.8970838618013401}. Best is trial 0 with value: 0.1358173270949269.
[I 2025-08-17 14:09:44,564] Trial 2 finished with value: 0.13501272791092808 and parameters: {'n_estimators': 331, 'learning_rate': 0.0201826511398743, 'max_depth': 7, 'num_leaves': 45, 'subsample': 0.813389752318502, 'colsample_bytree': 0.892876626279776}. Best is trial 

Best LightGBM params (Optuna): {'n_estimators': 354, 'learning_rate': 0.04776311085216704, 'max_depth': 3, 'num_leaves': 42, 'subsample': 0.9193871451479886, 'colsample_bytree': 0.7650869103903398}
Optuna tuned LightGBM RMSE (reduced): 0.12436


In [11]:
# CatBoost Optuna 调优
def catboost_objective(trial):
    params = {
        'iterations': 500,  # 初始小值，之后可增大
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.05),
        'depth': trial.suggest_int('depth', 4, 10),
        'l2_leaf_reg': trial.suggest_int('l2_leaf_reg', 1, 7),
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0, 1),
        'border_count': trial.suggest_int('border_count', 32, 128),
        'random_state': 42,
        'verbose': 0
    }
    model = CatBoostRegressor(**params)
    model.fit(X_train, y_train, eval_set=(X_val, y_val), early_stopping_rounds=50)
    pred = model.predict(X_val)
    return np.sqrt(mean_squared_error(y_val, pred))

catboost_study = optuna.create_study(direction='minimize')
catboost_study.optimize(catboost_objective, n_trials=n_trials)
catboost_best_params = catboost_study.best_params
catboost_best_params['iterations'] = 5000  # 增大 iterations 以利用最佳参数
print("Best CatBoost params (Optuna):", catboost_best_params)

# 重训练 CatBoost
catboost_best = CatBoostRegressor(**catboost_best_params, random_state=42, verbose=100)
catboost_best.fit(X_train, y_train, eval_set=(X_val, y_val), early_stopping_rounds=200)
catboost_best_pred = catboost_best.predict(X_val)
catboost_best_rmse = np.sqrt(mean_squared_error(y_val, catboost_best_pred))
print(f"Optuna tuned CatBoost RMSE (reduced): {catboost_best_rmse:.5f}")

[I 2025-08-17 14:11:25,421] A new study created in memory with name: no-name-d978143c-4a0a-40ae-a8e9-3a7c551f201e
[I 2025-08-17 14:11:25,944] Trial 0 finished with value: 0.1230552931963508 and parameters: {'learning_rate': 0.018302708555470924, 'depth': 4, 'l2_leaf_reg': 4, 'bagging_temperature': 0.1583618424531874, 'border_count': 63}. Best is trial 0 with value: 0.1230552931963508.
[I 2025-08-17 14:11:28,550] Trial 1 finished with value: 0.12758830995417908 and parameters: {'learning_rate': 0.03360080017187816, 'depth': 9, 'l2_leaf_reg': 2, 'bagging_temperature': 0.9023532415811185, 'border_count': 87}. Best is trial 0 with value: 0.1230552931963508.
[I 2025-08-17 14:11:33,235] Trial 2 finished with value: 0.13889120381284617 and parameters: {'learning_rate': 0.011140678037650611, 'depth': 10, 'l2_leaf_reg': 4, 'bagging_temperature': 0.2991059698930061, 'border_count': 67}. Best is trial 0 with value: 0.1230552931963508.
[I 2025-08-17 14:11:34,157] Trial 3 finished with value: 0.119

Best CatBoost params (Optuna): {'learning_rate': 0.047348295109695564, 'depth': 4, 'l2_leaf_reg': 7, 'bagging_temperature': 0.9179362159349008, 'border_count': 69, 'iterations': 5000}
0:	learn: 0.3844658	test: 0.3877756	best: 0.3877756 (0)	total: 1.29ms	remaining: 6.43s
100:	learn: 0.1197579	test: 0.1341631	best: 0.1341631 (100)	total: 106ms	remaining: 5.13s
200:	learn: 0.0989267	test: 0.1201247	best: 0.1201247 (200)	total: 192ms	remaining: 4.59s
300:	learn: 0.0872445	test: 0.1158456	best: 0.1158456 (300)	total: 289ms	remaining: 4.51s
400:	learn: 0.0803419	test: 0.1145097	best: 0.1144763 (393)	total: 369ms	remaining: 4.23s
500:	learn: 0.0749267	test: 0.1135765	best: 0.1134984 (488)	total: 452ms	remaining: 4.06s
600:	learn: 0.0702428	test: 0.1131120	best: 0.1131069 (599)	total: 539ms	remaining: 3.95s
700:	learn: 0.0654238	test: 0.1128547	best: 0.1128278 (698)	total: 623ms	remaining: 3.82s
800:	learn: 0.0617660	test: 0.1132055	best: 0.1128278 (698)	total: 705ms	remaining: 3.69s
Stopped b

In [12]:
#  ElasticNet 调优
def enet_objective(trial):
    params = {
        'alpha': trial.suggest_float('alpha', 0.0001, 1.0, log=True),
        'l1_ratio': trial.suggest_float('l1_ratio', 0.0, 1.0),
        'random_state': 42
    }
    model = ElasticNet(**params)
    model.fit(X_train, y_train)
    pred = model.predict(X_val)
    return np.sqrt(mean_squared_error(y_val, pred))

enet_study = optuna.create_study(direction='minimize')
enet_study.optimize(enet_objective, n_trials=n_trials)
enet_best_params = enet_study.best_params
print("Best ElasticNet params (Optuna):", enet_best_params)

# 重训练 ElasticNet
enet_best = ElasticNet(**enet_best_params, random_state=42)
enet_best.fit(X_train, y_train)
enet_best_pred = enet_best.predict(X_val)
enet_best_rmse = np.sqrt(mean_squared_error(y_val, enet_best_pred))
print(f"Optuna tuned ElasticNet RMSE: {enet_best_rmse:.5f}")

[I 2025-08-17 14:17:09,775] A new study created in memory with name: no-name-e89d6177-42dc-4a60-9332-f12be425ae78
[I 2025-08-17 14:17:09,801] Trial 0 finished with value: 0.12890545259898056 and parameters: {'alpha': 0.0073029674005703595, 'l1_ratio': 0.3589288827349081}. Best is trial 0 with value: 0.12890545259898056.
[I 2025-08-17 14:17:09,811] Trial 1 finished with value: 0.17990599314843778 and parameters: {'alpha': 0.07354117713007033, 'l1_ratio': 0.8404284712526936}. Best is trial 0 with value: 0.12890545259898056.
[I 2025-08-17 14:17:09,850] Trial 2 finished with value: 0.22811076086779686 and parameters: {'alpha': 0.6651165863598331, 'l1_ratio': 0.17868108038894936}. Best is trial 0 with value: 0.12890545259898056.
[I 2025-08-17 14:17:09,865] Trial 3 finished with value: 0.13822410118093528 and parameters: {'alpha': 0.03530125968548853, 'l1_ratio': 0.4523750049099472}. Best is trial 0 with value: 0.12890545259898056.
[I 2025-08-17 14:17:09,884] Trial 4 finished with value: 0.1

Best ElasticNet params (Optuna): {'alpha': 0.0001003776370975647, 'l1_ratio': 0.17342638714417052}
Optuna tuned ElasticNet RMSE: 0.12617


  model = cd_fast.enet_coordinate_descent(


In [13]:
# Huber 调优
def objective_huber(trial):
    params = {
        'alpha': trial.suggest_float('alpha', 1e-4, 0.1, log=True),
        'epsilon': trial.suggest_float('epsilon', 1.0, 2.0),
        'max_iter': trial.suggest_int('max_iter', 2000, 5000)
    }
    model = HuberRegressor(**params)
    model.fit(X_train, y_train)
    pred = model.predict(X_val)
    rmse = np.sqrt(mean_squared_error(y_val, pred))
    return rmse

study_huber = optuna.create_study(direction='minimize')
study_huber.optimize(objective_huber, n_trials=n_trials)
huber_best_params = study_huber.best_params
print("best Huber params:", study_huber.best_params)
print("best Huber RMSE:", study_huber.best_value)

# retrain
huber_best = HuberRegressor(**study_huber.best_params)
huber_best.fit(X_train, y_train)
huber_best_pred = huber_best.predict(X_val)
huber_best_rmse = np.sqrt(mean_squared_error(y_val, huber_best_pred))
print(f"Tuned Huber RMSE: {huber_best_rmse:.5f}")


[I 2025-08-17 14:17:39,357] A new study created in memory with name: no-name-a0590026-6e24-48d9-89b2-c46671118e08
[I 2025-08-17 14:17:41,679] Trial 0 finished with value: 0.12708972661000012 and parameters: {'alpha': 0.008592085001656733, 'epsilon': 1.1638321595371197, 'max_iter': 4226}. Best is trial 0 with value: 0.12708972661000012.
[I 2025-08-17 14:17:43,689] Trial 1 finished with value: 0.12670504824957415 and parameters: {'alpha': 0.0005344941972691237, 'epsilon': 1.2673998024820374, 'max_iter': 3630}. Best is trial 1 with value: 0.12670504824957415.
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
[I 2025-08-17 14:17:45,685] Trial 2 finished with value: 0.12625491673617759 and parameters: {'alpha': 0.00023279405857590672, 'epsilon': 1.3476644649723122, 'max_iter': 3211}. Be

best Huber params: {'alpha': 0.07858893948127713, 'epsilon': 1.965637104417994, 'max_iter': 2054}
best Huber RMSE: 0.12390182950494899
Tuned Huber RMSE: 0.12390


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


In [None]:
# MLP 调优
def objective_mlp(trial):
    params = {
        'hidden_layer_sizes': trial.suggest_categorical('hidden_layer_sizes', [(50,100,50), (50,100,200,100,50), 
                                                                               (50,100,200,300,200,100,50), (100, 100),
                                                                               (100,200,100),(100,200,300,200,100),
                                                                               (100,200,300,400,300,200,100)]),
        'alpha': trial.suggest_float('alpha', 1e-5, 1e-2, log=True),
        'learning_rate_init': trial.suggest_float('learning_rate_init', 1e-4, 1e-2, log=True),
        'max_iter': trial.suggest_int('max_iter', 200, 1000)
    }
    model = MLPRegressor(**params, random_state=42)
    model.fit(X_train, y_train)
    pred = model.predict(X_val)
    rmse = np.sqrt(mean_squared_error(y_val, pred))
    return rmse

study_mlp = optuna.create_study(direction='minimize')
study_mlp.optimize(objective_mlp, n_trials=n_trials)
mlp_best_params = study_mlp.best_params
mlp_best_params['max_iter'] = 5000  # increase iterations 
print("Best MLP params:", study_mlp.best_params)
print("Best MLP RMSE:", study_mlp.best_value)

# retrain
mlp_best = MLPRegressor(**mlp_best_params, random_state=42)
mlp_best.fit(X_train, y_train)
mlp_best_pred = mlp_best.predict(X_val)
mlp_best_rmse = np.sqrt(mean_squared_error(y_val, mlp_best_pred))
print(f"Tuned MLP RMSE: {mlp_best_rmse:.5f}")


[I 2025-08-17 14:32:59,391] A new study created in memory with name: no-name-4d74ee4f-58e7-48b3-a613-df16b1b7b68b
[I 2025-08-17 14:33:06,465] Trial 0 finished with value: 0.44874960643556633 and parameters: {'hidden_layer_sizes': (50, 100, 200, 100, 50), 'alpha': 0.0019214002916967269, 'learning_rate_init': 0.0005705423870784286, 'max_iter': 414}. Best is trial 0 with value: 0.44874960643556633.
[I 2025-08-17 14:33:23,417] Trial 1 finished with value: 0.48103714474794235 and parameters: {'hidden_layer_sizes': (50, 100, 200, 300, 200, 100, 50), 'alpha': 0.00045807755960543213, 'learning_rate_init': 0.00016571449793040835, 'max_iter': 639}. Best is trial 0 with value: 0.44874960643556633.
[I 2025-08-17 14:33:27,539] Trial 2 finished with value: 0.4389020295163371 and parameters: {'hidden_layer_sizes': (100, 100), 'alpha': 0.003906676658332898, 'learning_rate_init': 0.00018857597004687425, 'max_iter': 367}. Best is trial 2 with value: 0.4389020295163371.
[I 2025-08-17 14:33:32,620] Trial 

Best MLP params: {'hidden_layer_sizes': (100, 200, 300, 400, 300, 200, 100), 'alpha': 0.008242160412638026, 'learning_rate_init': 0.009087072551608025, 'max_iter': 677}
Best MLP RMSE: 0.15270290898143354
Tuned MLP RMSE: 0.15270


In [21]:
# retrain
mlp_best_params['max_iter'] = 5000  # increase iterations
mlp_best = MLPRegressor(**mlp_best_params, random_state=42)
mlp_best.fit(X_train, y_train)
mlp_best_pred = mlp_best.predict(X_val)
mlp_best_rmse = np.sqrt(mean_squared_error(y_val, mlp_best_pred))
print(f"Tuned MLP RMSE: {mlp_best_rmse:.5f}")

Tuned MLP RMSE: 0.15270


In [15]:
#  KernelRidge 调优
def kr_objective(trial):
    kernel = trial.suggest_categorical('kernel', ['rbf', 'linear'])
    params = {
        'alpha': trial.suggest_float('alpha', 0.1, 10.0, log=True),
        'kernel': kernel
    }
    if kernel == 'rbf':
        params['gamma'] = trial.suggest_float('gamma', 0.001, 0.1, log=True)
    model = KernelRidge(**params)
    model.fit(X_train, y_train)
    pred = model.predict(X_val)
    return np.sqrt(mean_squared_error(y_val, pred))

kr_study = optuna.create_study(direction='minimize')
kr_study.optimize(kr_objective, n_trials=n_trials)
kr_best_params = kr_study.best_params
print("Best KernelRidge params (Optuna):", kr_best_params)

# 重训练 KernelRidge
kr_best = KernelRidge(**kr_best_params)
kr_best.fit(X_train, y_train)
kr_best_pred = kr_best.predict(X_val)
kr_best_rmse = np.sqrt(mean_squared_error(y_val, kr_best_pred))
print(f"Optuna tuned KernelRidge RMSE: {kr_best_rmse:.5f}")

[I 2025-08-17 16:10:13,852] A new study created in memory with name: no-name-4358b341-e81b-49d6-a7f6-5cc21efbf535
[I 2025-08-17 16:10:13,910] Trial 0 finished with value: 0.12342998847935333 and parameters: {'kernel': 'linear', 'alpha': 0.1366345937651236}. Best is trial 0 with value: 0.12342998847935333.
[I 2025-08-17 16:10:13,952] Trial 1 finished with value: 0.12629009827489518 and parameters: {'kernel': 'linear', 'alpha': 0.20946464623038505}. Best is trial 0 with value: 0.12342998847935333.
[I 2025-08-17 16:10:14,008] Trial 2 finished with value: 1.6689621588746675 and parameters: {'kernel': 'rbf', 'alpha': 0.9189191311622981, 'gamma': 0.009421111865142143}. Best is trial 0 with value: 0.12342998847935333.
[I 2025-08-17 16:10:14,047] Trial 3 finished with value: 0.12343339078572813 and parameters: {'kernel': 'linear', 'alpha': 0.13678022185523697}. Best is trial 0 with value: 0.12342998847935333.
[I 2025-08-17 16:10:14,100] Trial 4 finished with value: 0.24823377820960807 and para

Best KernelRidge params (Optuna): {'kernel': 'linear', 'alpha': 0.10000382071679537}
Optuna tuned KernelRidge RMSE: 0.12293


In [16]:
# SVR 调优
def svr_objective(trial):
    params = {
        'C': trial.suggest_float('C', 0.1, 100.0, log=True),
        'epsilon': trial.suggest_float('epsilon', 0.01, 0.5),
        'kernel': trial.suggest_categorical('kernel', ['rbf', 'linear'])
    }
    model = SVR(**params)
    model.fit(X_train, y_train)
    pred = model.predict(X_val)
    return np.sqrt(mean_squared_error(y_val, pred))

svr_study = optuna.create_study(direction='minimize')
svr_study.optimize(svr_objective, n_trials=n_trials)
svr_best_params = svr_study.best_params
print("Best SVR params (Optuna):", svr_best_params)

# 重训练 SVR
svr_best = SVR(**svr_best_params)
svr_best.fit(X_train, y_train)
svr_best_pred = svr_best.predict(X_val)
svr_best_rmse = np.sqrt(mean_squared_error(y_val, svr_best_pred))
print(f"Optuna tuned SVR RMSE: {svr_best_rmse:.5f}")

[I 2025-08-17 16:10:35,951] A new study created in memory with name: no-name-38ed6bb5-e740-4ebf-8bf6-4a93d9423a44
[I 2025-08-17 16:10:35,963] Trial 0 finished with value: 0.19409168751910907 and parameters: {'C': 7.54630475676355, 'epsilon': 0.4875252811227908, 'kernel': 'linear'}. Best is trial 0 with value: 0.19409168751910907.
[I 2025-08-17 16:10:35,973] Trial 1 finished with value: 0.19375683401648186 and parameters: {'C': 3.9042858791275092, 'epsilon': 0.4148685796386942, 'kernel': 'rbf'}. Best is trial 1 with value: 0.19375683401648186.
[I 2025-08-17 16:10:36,036] Trial 2 finished with value: 0.12231649758205751 and parameters: {'C': 2.1177716828788324, 'epsilon': 0.08908094729720573, 'kernel': 'rbf'}. Best is trial 2 with value: 0.12231649758205751.
[I 2025-08-17 16:10:39,488] Trial 3 finished with value: 0.14766655343070093 and parameters: {'C': 7.8355781102640405, 'epsilon': 0.25319542656361627, 'kernel': 'linear'}. Best is trial 2 with value: 0.12231649758205751.
[I 2025-08-1

Best SVR params (Optuna): {'C': 1.91076115049394, 'epsilon': 0.021649017891148287, 'kernel': 'rbf'}
Optuna tuned SVR RMSE: 0.11893


In [17]:
# Optuna optimization for Ridge
def objective_ridge(trial):
    alpha = trial.suggest_float('alpha', 0.1, 10.0, log=True)
    ridge = Ridge(alpha=alpha, random_state=42)
    ridge.fit(X_train, y_train)
    pred = ridge.predict(X_val)
    rmse = np.sqrt(mean_squared_error(y_val, pred))
    return rmse

# Optimize hyperparameters with Optuna
study_ridge = optuna.create_study(direction='minimize')
study_ridge.optimize(objective_ridge, n_trials=n_trials)
print("Best Ridge params:", study_ridge.best_params)
print("Best Ridge RMSE:", study_ridge.best_value)

# retrain
ridge_best = Ridge(**study_ridge.best_params, random_state=42)
ridge_best.fit(X_train, y_train)
ridge_best_pred = ridge_best.predict(X_val)
ridge_best_rmse = np.sqrt(mean_squared_error(y_val, ridge_best_pred))
print(f"Tuned Ridge RMSE: {ridge_best_rmse:.5f}")

[I 2025-08-17 16:16:08,771] A new study created in memory with name: no-name-6b7fc230-a405-47d9-9e3d-9b4a87f7aa9a
[I 2025-08-17 16:16:08,782] Trial 0 finished with value: 0.1263303305271608 and parameters: {'alpha': 0.536955528334113}. Best is trial 0 with value: 0.1263303305271608.
[I 2025-08-17 16:16:08,789] Trial 1 finished with value: 0.12642170503455927 and parameters: {'alpha': 0.8260566898012637}. Best is trial 0 with value: 0.1263303305271608.
[I 2025-08-17 16:16:08,797] Trial 2 finished with value: 0.12617480579148105 and parameters: {'alpha': 0.12240011646972151}. Best is trial 2 with value: 0.12617480579148105.
[I 2025-08-17 16:16:08,805] Trial 3 finished with value: 0.1271165775620117 and parameters: {'alpha': 4.314018751000626}. Best is trial 2 with value: 0.12617480579148105.
[I 2025-08-17 16:16:08,812] Trial 4 finished with value: 0.126995964687263 and parameters: {'alpha': 3.4769697169088576}. Best is trial 2 with value: 0.12617480579148105.
[I 2025-08-17 16:16:08,819] 

Best Ridge params: {'alpha': 0.10000473330039265}
Best Ridge RMSE: 0.1261635855204583
Tuned Ridge RMSE: 0.12616


In [18]:
# Optuna optimization for Lasso
def objective_lasso(trial):
    alpha = trial.suggest_float('alpha', 0.0001, 0.1, log=True)
    lasso = Lasso(alpha=alpha, random_state=42)
    lasso.fit(X_train, y_train)
    pred = lasso.predict(X_val)
    rmse = np.sqrt(mean_squared_error(y_val, pred))
    return rmse

# Optimize hyperparameters with Optuna
study_lasso = optuna.create_study(direction='minimize')
study_lasso.optimize(objective_lasso, n_trials=n_trials)
print("Best Lasso params:", study_lasso.best_params)
print("Best Lasso RMSE:", study_lasso.best_value)

# retrain
lasso_best = Lasso(**study_lasso.best_params, random_state=42)
lasso_best.fit(X_train, y_train)
lasso_best_pred = lasso_best.predict(X_val)
lasso_best_rmse = np.sqrt(mean_squared_error(y_val, lasso_best_pred))
print(f"Tuned Lasso RMSE: {lasso_best_rmse:.5f}")

[I 2025-08-17 16:16:14,227] A new study created in memory with name: no-name-b53575d2-e61a-4322-bc1e-42fcca6c9722
[I 2025-08-17 16:16:14,266] Trial 0 finished with value: 0.12762694801391816 and parameters: {'alpha': 0.0006527198871845362}. Best is trial 0 with value: 0.12762694801391816.
[I 2025-08-17 16:16:14,297] Trial 1 finished with value: 0.12766954578042974 and parameters: {'alpha': 0.00069664145780777}. Best is trial 0 with value: 0.12762694801391816.
[I 2025-08-17 16:16:14,317] Trial 2 finished with value: 0.13905317452194169 and parameters: {'alpha': 0.015816031099705995}. Best is trial 0 with value: 0.12762694801391816.
[I 2025-08-17 16:16:14,338] Trial 3 finished with value: 0.13830813386221802 and parameters: {'alpha': 0.015229912208978715}. Best is trial 0 with value: 0.12762694801391816.
[I 2025-08-17 16:16:14,345] Trial 4 finished with value: 0.19495224353261154 and parameters: {'alpha': 0.08479339745672594}. Best is trial 0 with value: 0.12762694801391816.
[I 2025-08-1

Best Lasso params: {'alpha': 0.0001000058995669444}
Best Lasso RMSE: 0.12638472958333277
Tuned Lasso RMSE: 0.12638


  model = cd_fast.enet_coordinate_descent(


In [22]:
# Optuna optimization for GradientBoosting
def objective_gbr(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1, log=True),
        'max_depth': trial.suggest_int('max_depth', 3, 7),
        'min_samples_split': trial.suggest_int('min_samples_split', 2, 10),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 4),
        'subsample': trial.suggest_float('subsample', 0.7, 1.0)
    }
    gbr = GradientBoostingRegressor(**params, random_state=42)
    gbr.fit(X_train, y_train)
    pred = gbr.predict(X_val)
    rmse = np.sqrt(mean_squared_error(y_val, pred))
    return rmse

# Optimize hyperparameters with Optuna
study_gbr = optuna.create_study(direction='minimize')
study_gbr.optimize(objective_gbr, n_trials=n_trials)
print("Best GradientBoosting params:", study_gbr.best_params)
print("Best GradientBoosting RMSE:", study_gbr.best_value)

# retrain
gbr_best = GradientBoostingRegressor(**study_gbr.best_params, random_state=42)
gbr_best.fit(X_train, y_train)
gbr_best_pred = gbr_best.predict(X_val)
gbr_best_rmse = np.sqrt(mean_squared_error(y_val, gbr_best_pred))
print(f"Tuned GradientBoosting RMSE: {gbr_best_rmse:.5f}")

[I 2025-08-17 16:47:02,459] A new study created in memory with name: no-name-497d994e-ddf1-4488-afe8-a4ebcbd8cf69
[I 2025-08-17 16:47:15,250] Trial 0 finished with value: 0.13296730040686 and parameters: {'n_estimators': 847, 'learning_rate': 0.019584772954981174, 'max_depth': 6, 'min_samples_split': 9, 'min_samples_leaf': 4, 'subsample': 0.8712392872673951}. Best is trial 0 with value: 0.13296730040686.
[I 2025-08-17 16:47:19,872] Trial 1 finished with value: 0.13064949723067268 and parameters: {'n_estimators': 515, 'learning_rate': 0.09222301784486779, 'max_depth': 3, 'min_samples_split': 10, 'min_samples_leaf': 1, 'subsample': 0.9822480140359107}. Best is trial 1 with value: 0.13064949723067268.
[I 2025-08-17 16:47:24,748] Trial 2 finished with value: 0.13091164181894915 and parameters: {'n_estimators': 356, 'learning_rate': 0.04733986864157737, 'max_depth': 6, 'min_samples_split': 4, 'min_samples_leaf': 1, 'subsample': 0.7464906261029561}. Best is trial 1 with value: 0.130649497230

Best GradientBoosting params: {'n_estimators': 807, 'learning_rate': 0.03264158995949075, 'max_depth': 3, 'min_samples_split': 2, 'min_samples_leaf': 3, 'subsample': 0.7407326911978687}
Best GradientBoosting RMSE: 0.11861192174732638
Tuned GradientBoosting RMSE: 0.11861


In [26]:
# Optuna optimization for Bagging
def objective_br(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 500),
        'max_samples': trial.suggest_float('max_samples', 0.5, 1.0),
        'max_features': trial.suggest_float('max_features', 0.5, 1.0)
    }
    bagging = BaggingRegressor(**params, random_state=42)
    bagging.fit(X_train, y_train)
    pred = bagging.predict(X_val)
    rmse = np.sqrt(mean_squared_error(y_val, pred))
    return rmse

# Optimize hyperparameters with Optuna
study_bagging = optuna.create_study(direction='minimize')
study_bagging.optimize(objective_br, n_trials=n_trials)
print("Best Bagging params:", study_bagging.best_params)
print("Best Bagging RMSE:", study_bagging.best_value)

# retrain
bagging_best = BaggingRegressor(**study_bagging.best_params, random_state=42)
bagging_best.fit(X_train, y_train)
bagging_best_pred = bagging_best.predict(X_val)
bagging_best_rmse = np.sqrt(mean_squared_error(y_val, bagging_best_pred))
print(f"Tuned Bagging RMSE: {bagging_best_rmse:.5f}")

[I 2025-08-17 19:08:54,486] A new study created in memory with name: no-name-1a49b2c8-29f8-44b9-bbbf-ea9577a583a1
[I 2025-08-17 19:08:55,773] Trial 0 finished with value: 0.14573467939342635 and parameters: {'n_estimators': 60, 'max_samples': 0.9997661407545606, 'max_features': 0.7331700958027279}. Best is trial 0 with value: 0.14573467939342635.
[I 2025-08-17 19:09:00,769] Trial 1 finished with value: 0.1451526192350068 and parameters: {'n_estimators': 271, 'max_samples': 0.8535891381701377, 'max_features': 0.7380685141185868}. Best is trial 1 with value: 0.1451526192350068.
[I 2025-08-17 19:09:08,324] Trial 2 finished with value: 0.14589141465304606 and parameters: {'n_estimators': 400, 'max_samples': 0.7035398866867457, 'max_features': 0.9128540570528765}. Best is trial 1 with value: 0.1451526192350068.
[I 2025-08-17 19:09:09,608] Trial 3 finished with value: 0.14464403709513698 and parameters: {'n_estimators': 73, 'max_samples': 0.901436677395903, 'max_features': 0.7007259399996244

Best Bagging params: {'n_estimators': 220, 'max_samples': 0.7138590902061488, 'max_features': 0.5026361093709761}
Best Bagging RMSE: 0.1390399794081062
Tuned Bagging RMSE: 0.13904


In [27]:
# Optuna optimization for TabNet
def objective_tabnet(trial):
    params = {
        'n_d': trial.suggest_int('n_d', 8, 64),
        'n_a': trial.suggest_int('n_a', 8, 64),
        'n_steps': trial.suggest_int('n_steps', 3, 10),
        'gamma': trial.suggest_float('gamma', 1.0, 2.0),
        'lambda_sparse': trial.suggest_float('lambda_sparse', 0.0001, 0.1, log=True)
    }
    tabnet = TabNetRegressor(**params, seed=42)
    tabnet.fit(
        X_train.values, y_train.values.reshape(-1, 1),
        eval_set=[(X_val.values, y_val.values.reshape(-1, 1))],
        eval_metric=['rmse'],
        max_epochs=100,
        patience=10,
        batch_size=1024,
        virtual_batch_size=128
    )
    pred = tabnet.predict(X_val.values).flatten()
    rmse = np.sqrt(mean_squared_error(y_val, pred))
    return rmse

# Optimize hyperparameters with Optuna
study_tabnet = optuna.create_study(direction='minimize')
study_tabnet.optimize(objective_tabnet, n_trials=200)
print("Best TabNet params:", study_tabnet.best_params)
print("Best TabNet RMSE:", study_tabnet.best_value)

# retrain tabnet
tabnet_best = TabNetRegressor(**study_tabnet.best_params, seed=42)
tabnet_best.fit(
    X_train.values, y_train.values.reshape(-1, 1),
    eval_set=[(X_val.values, y_val.values.reshape(-1, 1))],
    eval_metric=['rmse'],
    max_epochs=100,
    patience=10,
    batch_size=1024,
    virtual_batch_size=128
)
tabnet_best_pred = tabnet_best.predict(X_val.values).flatten()
tabnet_best_rmse = np.sqrt(mean_squared_error(y_val, tabnet_best_pred))
print(f"Tuned TabNet RMSE: {tabnet_best_rmse:.5f}")

[I 2025-08-17 19:33:13,801] A new study created in memory with name: no-name-d1c1d06a-7bb7-4b46-970f-cde3b381912a


epoch 0  | loss: 191.57555| val_0_rmse: 18.06934|  0:00:00s
epoch 1  | loss: 137.81482| val_0_rmse: 11.0719 |  0:00:00s
epoch 2  | loss: 97.03248| val_0_rmse: 8.49918 |  0:00:00s
epoch 3  | loss: 66.91883| val_0_rmse: 7.56094 |  0:00:00s
epoch 4  | loss: 42.99661| val_0_rmse: 7.67537 |  0:00:00s
epoch 5  | loss: 32.21754| val_0_rmse: 7.20312 |  0:00:01s
epoch 6  | loss: 22.17604| val_0_rmse: 8.26113 |  0:00:01s
epoch 7  | loss: 15.96772| val_0_rmse: 9.30673 |  0:00:01s
epoch 8  | loss: 16.83532| val_0_rmse: 10.36075|  0:00:01s
epoch 9  | loss: 16.8701 | val_0_rmse: 8.8023  |  0:00:01s
epoch 10 | loss: 15.23916| val_0_rmse: 7.84967 |  0:00:01s
epoch 11 | loss: 11.68878| val_0_rmse: 6.93089 |  0:00:01s
epoch 12 | loss: 8.97382 | val_0_rmse: 5.97524 |  0:00:02s
epoch 13 | loss: 6.07834 | val_0_rmse: 5.18581 |  0:00:02s
epoch 14 | loss: 4.69125 | val_0_rmse: 4.6964  |  0:00:02s
epoch 15 | loss: 5.0841  | val_0_rmse: 4.85472 |  0:00:02s
epoch 16 | loss: 4.14344 | val_0_rmse: 5.66569 |  0:00

[I 2025-08-17 19:33:17,818] Trial 0 finished with value: 4.696403919871648 and parameters: {'n_d': 42, 'n_a': 53, 'n_steps': 5, 'gamma': 1.0051910353909734, 'lambda_sparse': 0.0029858944846183}. Best is trial 0 with value: 4.696403919871648.


epoch 24 | loss: 1.02547 | val_0_rmse: 5.26086 |  0:00:03s

Early stopping occurred at epoch 24 with best_epoch = 14 and best_val_0_rmse = 4.6964




epoch 0  | loss: 144.32681| val_0_rmse: 28.95302|  0:00:00s
epoch 1  | loss: 74.6981 | val_0_rmse: 12.2331 |  0:00:00s
epoch 2  | loss: 35.0218 | val_0_rmse: 14.40665|  0:00:00s
epoch 3  | loss: 26.53603| val_0_rmse: 17.8151 |  0:00:00s
epoch 4  | loss: 31.4852 | val_0_rmse: 15.9519 |  0:00:01s
epoch 5  | loss: 28.48719| val_0_rmse: 13.888  |  0:00:01s
epoch 6  | loss: 19.7334 | val_0_rmse: 12.01519|  0:00:01s
epoch 7  | loss: 14.55594| val_0_rmse: 10.85425|  0:00:01s
epoch 8  | loss: 10.06753| val_0_rmse: 10.20822|  0:00:02s
epoch 9  | loss: 11.94565| val_0_rmse: 9.40604 |  0:00:02s
epoch 10 | loss: 12.32871| val_0_rmse: 8.80764 |  0:00:02s
epoch 11 | loss: 11.57625| val_0_rmse: 9.68214 |  0:00:02s
epoch 12 | loss: 9.37135 | val_0_rmse: 10.89905|  0:00:02s
epoch 13 | loss: 5.91381 | val_0_rmse: 11.43562|  0:00:03s
epoch 14 | loss: 6.7242  | val_0_rmse: 8.17355 |  0:00:03s
epoch 15 | loss: 4.75587 | val_0_rmse: 7.21416 |  0:00:03s
epoch 16 | loss: 3.50807 | val_0_rmse: 6.27336 |  0:00:

[I 2025-08-17 19:33:33,630] Trial 1 finished with value: 1.1992572877340963 and parameters: {'n_d': 51, 'n_a': 11, 'n_steps': 9, 'gamma': 1.1951340309052676, 'lambda_sparse': 0.0004149126140386606}. Best is trial 1 with value: 1.1992572877340963.


epoch 69 | loss: 8.16204 | val_0_rmse: 3.5237  |  0:00:15s

Early stopping occurred at epoch 69 with best_epoch = 59 and best_val_0_rmse = 1.19926




epoch 0  | loss: 50.77531| val_0_rmse: 36.07009|  0:00:00s
epoch 1  | loss: 35.49865| val_0_rmse: 28.94576|  0:00:00s
epoch 2  | loss: 27.72963| val_0_rmse: 26.17005|  0:00:00s
epoch 3  | loss: 25.31642| val_0_rmse: 27.40297|  0:00:00s
epoch 4  | loss: 22.9206 | val_0_rmse: 21.52596|  0:00:01s
epoch 5  | loss: 17.77403| val_0_rmse: 20.64802|  0:00:01s
epoch 6  | loss: 14.52914| val_0_rmse: 17.10733|  0:00:01s
epoch 7  | loss: 13.33718| val_0_rmse: 15.38606|  0:00:01s
epoch 8  | loss: 11.58931| val_0_rmse: 13.67037|  0:00:02s
epoch 9  | loss: 10.13244| val_0_rmse: 14.2443 |  0:00:02s
epoch 10 | loss: 7.51136 | val_0_rmse: 14.00797|  0:00:02s
epoch 11 | loss: 6.02487 | val_0_rmse: 10.34924|  0:00:02s
epoch 12 | loss: 4.8332  | val_0_rmse: 10.77751|  0:00:02s
epoch 13 | loss: 4.17893 | val_0_rmse: 9.84025 |  0:00:03s
epoch 14 | loss: 3.48127 | val_0_rmse: 9.08472 |  0:00:03s
epoch 15 | loss: 2.82145 | val_0_rmse: 10.6402 |  0:00:03s
epoch 16 | loss: 2.46296 | val_0_rmse: 7.16622 |  0:00:0

[I 2025-08-17 19:33:42,749] Trial 2 finished with value: 2.1761342340298455 and parameters: {'n_d': 40, 'n_a': 47, 'n_steps': 8, 'gamma': 1.8712716679057149, 'lambda_sparse': 0.001876139261674982}. Best is trial 1 with value: 1.1992572877340963.



Early stopping occurred at epoch 40 with best_epoch = 30 and best_val_0_rmse = 2.17613




epoch 0  | loss: 159.69409| val_0_rmse: 10.23315|  0:00:00s
epoch 1  | loss: 128.69011| val_0_rmse: 11.52073|  0:00:00s
epoch 2  | loss: 106.17399| val_0_rmse: 11.03073|  0:00:00s
epoch 3  | loss: 85.13914| val_0_rmse: 10.06152|  0:00:00s
epoch 4  | loss: 68.34243| val_0_rmse: 8.44094 |  0:00:00s
epoch 5  | loss: 51.47614| val_0_rmse: 6.74527 |  0:00:00s
epoch 6  | loss: 38.06831| val_0_rmse: 7.58739 |  0:00:00s
epoch 7  | loss: 28.75717| val_0_rmse: 9.23387 |  0:00:00s
epoch 8  | loss: 21.16917| val_0_rmse: 8.43274 |  0:00:00s
epoch 9  | loss: 17.55791| val_0_rmse: 8.03244 |  0:00:00s
epoch 10 | loss: 14.13284| val_0_rmse: 8.83927 |  0:00:00s
epoch 11 | loss: 12.06347| val_0_rmse: 9.01861 |  0:00:00s
epoch 12 | loss: 11.6754 | val_0_rmse: 8.87925 |  0:00:01s
epoch 13 | loss: 10.00543| val_0_rmse: 8.92305 |  0:00:01s
epoch 14 | loss: 7.79768 | val_0_rmse: 8.0023  |  0:00:01s


[I 2025-08-17 19:33:44,116] Trial 3 finished with value: 6.745272010075586 and parameters: {'n_d': 36, 'n_a': 11, 'n_steps': 3, 'gamma': 1.4756001408812338, 'lambda_sparse': 0.001883834105319773}. Best is trial 1 with value: 1.1992572877340963.


epoch 15 | loss: 5.62432 | val_0_rmse: 7.27895 |  0:00:01s

Early stopping occurred at epoch 15 with best_epoch = 5 and best_val_0_rmse = 6.74527
epoch 0  | loss: 137.76257| val_0_rmse: 10.66921|  0:00:00s




epoch 1  | loss: 105.90982| val_0_rmse: 9.05918 |  0:00:00s
epoch 2  | loss: 79.85746| val_0_rmse: 9.10759 |  0:00:00s
epoch 3  | loss: 54.02629| val_0_rmse: 7.74407 |  0:00:00s
epoch 4  | loss: 34.95353| val_0_rmse: 7.65887 |  0:00:00s
epoch 5  | loss: 21.48743| val_0_rmse: 6.11445 |  0:00:00s
epoch 6  | loss: 13.16277| val_0_rmse: 5.09496 |  0:00:00s
epoch 7  | loss: 9.95355 | val_0_rmse: 5.42635 |  0:00:00s
epoch 8  | loss: 9.8044  | val_0_rmse: 5.79419 |  0:00:00s
epoch 9  | loss: 8.6495  | val_0_rmse: 4.62431 |  0:00:01s
epoch 10 | loss: 5.59361 | val_0_rmse: 3.61282 |  0:00:01s
epoch 11 | loss: 2.95439 | val_0_rmse: 2.49212 |  0:00:01s
epoch 12 | loss: 2.57496 | val_0_rmse: 2.39238 |  0:00:01s
epoch 13 | loss: 2.22721 | val_0_rmse: 2.57236 |  0:00:01s
epoch 14 | loss: 1.75574 | val_0_rmse: 2.47894 |  0:00:01s
epoch 15 | loss: 1.19851 | val_0_rmse: 2.6252  |  0:00:01s
epoch 16 | loss: 1.104   | val_0_rmse: 3.04142 |  0:00:01s
epoch 17 | loss: 1.06362 | val_0_rmse: 3.13235 |  0:00:

[I 2025-08-17 19:33:46,551] Trial 4 finished with value: 2.392377550917571 and parameters: {'n_d': 47, 'n_a': 57, 'n_steps': 3, 'gamma': 1.0621151007505532, 'lambda_sparse': 0.017570320134602847}. Best is trial 1 with value: 1.1992572877340963.


epoch 21 | loss: 0.56426 | val_0_rmse: 3.24388 |  0:00:02s
epoch 22 | loss: 0.5294  | val_0_rmse: 3.05242 |  0:00:02s

Early stopping occurred at epoch 22 with best_epoch = 12 and best_val_0_rmse = 2.39238




epoch 0  | loss: 158.95378| val_0_rmse: 11.75964|  0:00:00s
epoch 1  | loss: 123.63503| val_0_rmse: 8.68098 |  0:00:00s
epoch 2  | loss: 98.37527| val_0_rmse: 7.46475 |  0:00:00s
epoch 3  | loss: 76.991  | val_0_rmse: 7.10859 |  0:00:00s
epoch 4  | loss: 59.86636| val_0_rmse: 6.28265 |  0:00:01s
epoch 5  | loss: 43.99693| val_0_rmse: 7.14643 |  0:00:01s
epoch 6  | loss: 34.28062| val_0_rmse: 8.40945 |  0:00:01s
epoch 7  | loss: 25.57701| val_0_rmse: 9.4992  |  0:00:01s
epoch 8  | loss: 20.07405| val_0_rmse: 11.73576|  0:00:01s
epoch 9  | loss: 18.43129| val_0_rmse: 16.50879|  0:00:02s
epoch 10 | loss: 19.3324 | val_0_rmse: 18.61863|  0:00:02s
epoch 11 | loss: 20.77324| val_0_rmse: 19.14505|  0:00:02s
epoch 12 | loss: 21.11931| val_0_rmse: 19.55457|  0:00:02s
epoch 13 | loss: 20.40749| val_0_rmse: 17.61572|  0:00:02s


[I 2025-08-17 19:33:49,835] Trial 5 finished with value: 6.282652362778381 and parameters: {'n_d': 12, 'n_a': 31, 'n_steps': 9, 'gamma': 1.0609719830694833, 'lambda_sparse': 0.004854175866851097}. Best is trial 1 with value: 1.1992572877340963.


epoch 14 | loss: 18.99906| val_0_rmse: 15.16611|  0:00:03s

Early stopping occurred at epoch 14 with best_epoch = 4 and best_val_0_rmse = 6.28265




epoch 0  | loss: 190.44731| val_0_rmse: 20.73073|  0:00:00s
epoch 1  | loss: 115.32668| val_0_rmse: 9.88557 |  0:00:00s
epoch 2  | loss: 60.70376| val_0_rmse: 13.81697|  0:00:00s
epoch 3  | loss: 27.01906| val_0_rmse: 14.398  |  0:00:00s
epoch 4  | loss: 20.97422| val_0_rmse: 17.03316|  0:00:00s
epoch 5  | loss: 26.35552| val_0_rmse: 17.44463|  0:00:01s
epoch 6  | loss: 38.79903| val_0_rmse: 20.17171|  0:00:01s
epoch 7  | loss: 28.4755 | val_0_rmse: 21.26452|  0:00:01s
epoch 8  | loss: 17.98101| val_0_rmse: 15.73776|  0:00:01s
epoch 9  | loss: 12.76421| val_0_rmse: 16.26132|  0:00:01s
epoch 10 | loss: 10.30937| val_0_rmse: 13.85652|  0:00:01s
epoch 11 | loss: 11.27292| val_0_rmse: 12.74224|  0:00:02s

Early stopping occurred at epoch 11 with best_epoch = 1 and best_val_0_rmse = 9.88557


[I 2025-08-17 19:33:52,166] Trial 6 finished with value: 9.88557385023422 and parameters: {'n_d': 63, 'n_a': 9, 'n_steps': 7, 'gamma': 1.830004954184012, 'lambda_sparse': 0.00873211967680313}. Best is trial 1 with value: 1.1992572877340963.


epoch 0  | loss: 118.08408| val_0_rmse: 10.28384|  0:00:00s
epoch 1  | loss: 102.80792| val_0_rmse: 8.9404  |  0:00:00s
epoch 2  | loss: 88.50112| val_0_rmse: 8.66237 |  0:00:00s
epoch 3  | loss: 73.91519| val_0_rmse: 8.81181 |  0:00:00s
epoch 4  | loss: 65.05633| val_0_rmse: 8.44416 |  0:00:00s
epoch 5  | loss: 52.30728| val_0_rmse: 8.30715 |  0:00:00s
epoch 6  | loss: 44.54031| val_0_rmse: 7.83285 |  0:00:01s
epoch 7  | loss: 37.25948| val_0_rmse: 7.99043 |  0:00:01s
epoch 8  | loss: 32.12211| val_0_rmse: 7.84808 |  0:00:01s
epoch 9  | loss: 24.50705| val_0_rmse: 7.95729 |  0:00:01s
epoch 10 | loss: 21.81194| val_0_rmse: 9.03666 |  0:00:01s
epoch 11 | loss: 17.23704| val_0_rmse: 10.27972|  0:00:01s
epoch 12 | loss: 16.20406| val_0_rmse: 11.39024|  0:00:01s
epoch 13 | loss: 15.98707| val_0_rmse: 12.53572|  0:00:02s
epoch 14 | loss: 14.94073| val_0_rmse: 11.62512|  0:00:02s
epoch 15 | loss: 13.46798| val_0_rmse: 10.83187|  0:00:02s


[I 2025-08-17 19:33:54,743] Trial 7 finished with value: 7.832847273918028 and parameters: {'n_d': 9, 'n_a': 33, 'n_steps': 6, 'gamma': 1.2330756841820412, 'lambda_sparse': 0.016495160225693892}. Best is trial 1 with value: 1.1992572877340963.


epoch 16 | loss: 11.73773| val_0_rmse: 8.31934 |  0:00:02s

Early stopping occurred at epoch 16 with best_epoch = 6 and best_val_0_rmse = 7.83285




epoch 0  | loss: 191.49422| val_0_rmse: 13.32943|  0:00:00s
epoch 1  | loss: 143.07526| val_0_rmse: 10.94291|  0:00:00s
epoch 2  | loss: 106.45497| val_0_rmse: 9.17186 |  0:00:00s
epoch 3  | loss: 77.4409 | val_0_rmse: 7.99226 |  0:00:00s
epoch 4  | loss: 55.16489| val_0_rmse: 7.1757  |  0:00:00s
epoch 5  | loss: 38.02774| val_0_rmse: 7.2504  |  0:00:00s
epoch 6  | loss: 26.27203| val_0_rmse: 11.48401|  0:00:00s
epoch 7  | loss: 19.19323| val_0_rmse: 14.8898 |  0:00:00s
epoch 8  | loss: 15.91128| val_0_rmse: 19.88272|  0:00:01s
epoch 9  | loss: 12.68508| val_0_rmse: 19.67845|  0:00:01s
epoch 10 | loss: 12.39348| val_0_rmse: 23.4681 |  0:00:01s
epoch 11 | loss: 12.6656 | val_0_rmse: 18.93186|  0:00:01s
epoch 12 | loss: 9.93645 | val_0_rmse: 16.05198|  0:00:01s


[I 2025-08-17 19:33:56,553] Trial 8 finished with value: 7.175701866726173 and parameters: {'n_d': 32, 'n_a': 41, 'n_steps': 4, 'gamma': 1.996611418883019, 'lambda_sparse': 0.00014272189266121676}. Best is trial 1 with value: 1.1992572877340963.


epoch 13 | loss: 8.03424 | val_0_rmse: 13.96418|  0:00:01s
epoch 14 | loss: 5.80357 | val_0_rmse: 12.07296|  0:00:01s

Early stopping occurred at epoch 14 with best_epoch = 4 and best_val_0_rmse = 7.1757




epoch 0  | loss: 116.57704| val_0_rmse: 9.85085 |  0:00:00s
epoch 1  | loss: 86.37771| val_0_rmse: 9.08491 |  0:00:00s
epoch 2  | loss: 60.74739| val_0_rmse: 14.33015|  0:00:00s
epoch 3  | loss: 44.28254| val_0_rmse: 12.12318|  0:00:00s
epoch 4  | loss: 28.17515| val_0_rmse: 17.67695|  0:00:00s
epoch 5  | loss: 22.4465 | val_0_rmse: 18.92376|  0:00:01s
epoch 6  | loss: 17.4212 | val_0_rmse: 19.43575|  0:00:01s
epoch 7  | loss: 18.3806 | val_0_rmse: 16.52304|  0:00:01s
epoch 8  | loss: 19.10159| val_0_rmse: 19.61512|  0:00:01s
epoch 9  | loss: 17.94428| val_0_rmse: 20.3652 |  0:00:01s
epoch 10 | loss: 15.46594| val_0_rmse: 19.01313|  0:00:01s
epoch 11 | loss: 15.53424| val_0_rmse: 18.57091|  0:00:01s

Early stopping occurred at epoch 11 with best_epoch = 1 and best_val_0_rmse = 9.08491


[I 2025-08-17 19:33:58,694] Trial 9 finished with value: 9.084907795104149 and parameters: {'n_d': 26, 'n_a': 34, 'n_steps': 7, 'gamma': 1.591070630435904, 'lambda_sparse': 0.00012994690898027783}. Best is trial 1 with value: 1.1992572877340963.


epoch 0  | loss: 148.11705| val_0_rmse: 17.51546|  0:00:00s
epoch 1  | loss: 91.41772| val_0_rmse: 10.815  |  0:00:00s
epoch 2  | loss: 51.97108| val_0_rmse: 9.95428 |  0:00:00s
epoch 3  | loss: 30.95651| val_0_rmse: 10.19651|  0:00:01s
epoch 4  | loss: 23.53705| val_0_rmse: 11.93786|  0:00:01s
epoch 5  | loss: 26.49959| val_0_rmse: 12.78833|  0:00:01s
epoch 6  | loss: 28.70795| val_0_rmse: 11.04716|  0:00:01s
epoch 7  | loss: 27.62693| val_0_rmse: 8.60536 |  0:00:02s
epoch 8  | loss: 17.5631 | val_0_rmse: 6.48531 |  0:00:02s
epoch 9  | loss: 12.0368 | val_0_rmse: 4.85961 |  0:00:02s
epoch 10 | loss: 12.96177| val_0_rmse: 5.52153 |  0:00:02s
epoch 11 | loss: 11.58591| val_0_rmse: 5.12653 |  0:00:03s
epoch 12 | loss: 7.25067 | val_0_rmse: 7.54005 |  0:00:03s
epoch 13 | loss: 5.99757 | val_0_rmse: 7.98096 |  0:00:03s
epoch 14 | loss: 6.45195 | val_0_rmse: 8.89494 |  0:00:03s
epoch 15 | loss: 6.86438 | val_0_rmse: 6.17995 |  0:00:04s
epoch 16 | loss: 4.47749 | val_0_rmse: 4.79322 |  0:00:

[I 2025-08-17 19:34:21,699] Trial 10 finished with value: 1.0146269300768913 and parameters: {'n_d': 59, 'n_a': 21, 'n_steps': 10, 'gamma': 1.33954011998354, 'lambda_sparse': 0.000362168930242627}. Best is trial 10 with value: 1.0146269300768913.


epoch 0  | loss: 196.30321| val_0_rmse: 14.97988|  0:00:00s
epoch 1  | loss: 108.22404| val_0_rmse: 9.65793 |  0:00:00s
epoch 2  | loss: 51.75033| val_0_rmse: 9.30478 |  0:00:00s
epoch 3  | loss: 24.51808| val_0_rmse: 10.73315|  0:00:01s
epoch 4  | loss: 19.62968| val_0_rmse: 11.5473 |  0:00:01s
epoch 5  | loss: 23.4987 | val_0_rmse: 13.68943|  0:00:01s
epoch 6  | loss: 32.01181| val_0_rmse: 13.93768|  0:00:01s
epoch 7  | loss: 31.74949| val_0_rmse: 10.07374|  0:00:02s
epoch 8  | loss: 18.83738| val_0_rmse: 8.42107 |  0:00:02s
epoch 9  | loss: 12.80357| val_0_rmse: 6.65625 |  0:00:02s
epoch 10 | loss: 14.05057| val_0_rmse: 6.42567 |  0:00:03s
epoch 11 | loss: 19.84217| val_0_rmse: 5.88555 |  0:00:03s
epoch 12 | loss: 14.07368| val_0_rmse: 5.48927 |  0:00:03s
epoch 13 | loss: 11.60271| val_0_rmse: 6.07958 |  0:00:03s
epoch 14 | loss: 8.05404 | val_0_rmse: 6.60212 |  0:00:04s
epoch 15 | loss: 8.9678  | val_0_rmse: 6.87756 |  0:00:04s
epoch 16 | loss: 7.71357 | val_0_rmse: 7.21252 |  0:00

[I 2025-08-17 19:34:33,398] Trial 11 finished with value: 2.583895652370611 and parameters: {'n_d': 58, 'n_a': 17, 'n_steps': 10, 'gamma': 1.345801581516723, 'lambda_sparse': 0.00043655709405961287}. Best is trial 10 with value: 1.0146269300768913.


epoch 0  | loss: 66.33736| val_0_rmse: 13.30054|  0:00:00s
epoch 1  | loss: 37.82458| val_0_rmse: 15.41376|  0:00:00s
epoch 2  | loss: 22.68038| val_0_rmse: 12.71548|  0:00:00s
epoch 3  | loss: 20.41851| val_0_rmse: 12.4848 |  0:00:01s
epoch 4  | loss: 28.15571| val_0_rmse: 9.22194 |  0:00:01s
epoch 5  | loss: 23.69369| val_0_rmse: 8.87083 |  0:00:01s
epoch 6  | loss: 17.43086| val_0_rmse: 6.50159 |  0:00:01s
epoch 7  | loss: 12.81287| val_0_rmse: 6.02142 |  0:00:02s
epoch 8  | loss: 12.04033| val_0_rmse: 5.91608 |  0:00:02s
epoch 9  | loss: 10.51529| val_0_rmse: 5.6687  |  0:00:02s
epoch 10 | loss: 8.58327 | val_0_rmse: 7.69346 |  0:00:03s
epoch 11 | loss: 6.68902 | val_0_rmse: 7.96669 |  0:00:03s
epoch 12 | loss: 6.12296 | val_0_rmse: 7.3416  |  0:00:03s
epoch 13 | loss: 4.50867 | val_0_rmse: 6.1903  |  0:00:03s
epoch 14 | loss: 4.22936 | val_0_rmse: 7.0491  |  0:00:04s
epoch 15 | loss: 2.89401 | val_0_rmse: 6.12811 |  0:00:04s
epoch 16 | loss: 2.80651 | val_0_rmse: 4.99663 |  0:00:0

[I 2025-08-17 19:34:53,987] Trial 12 finished with value: 0.9007735895109354 and parameters: {'n_d': 53, 'n_a': 21, 'n_steps': 10, 'gamma': 1.2844120529477192, 'lambda_sparse': 0.09221209684052338}. Best is trial 12 with value: 0.9007735895109354.


epoch 0  | loss: 323.54263| val_0_rmse: 26.48445|  0:00:00s
epoch 1  | loss: 198.28615| val_0_rmse: 12.9627 |  0:00:00s
epoch 2  | loss: 122.59583| val_0_rmse: 11.26158|  0:00:00s
epoch 3  | loss: 61.08588| val_0_rmse: 9.44919 |  0:00:01s
epoch 4  | loss: 28.76486| val_0_rmse: 11.32756|  0:00:01s
epoch 5  | loss: 21.25362| val_0_rmse: 13.11955|  0:00:01s
epoch 6  | loss: 27.1171 | val_0_rmse: 15.66945|  0:00:01s
epoch 7  | loss: 41.02335| val_0_rmse: 14.83296|  0:00:02s
epoch 8  | loss: 47.62207| val_0_rmse: 17.01311|  0:00:02s
epoch 9  | loss: 41.34343| val_0_rmse: 15.09342|  0:00:02s
epoch 10 | loss: 38.15272| val_0_rmse: 13.42373|  0:00:02s
epoch 11 | loss: 27.20655| val_0_rmse: 10.45383|  0:00:03s
epoch 12 | loss: 18.84767| val_0_rmse: 9.36125 |  0:00:03s
epoch 13 | loss: 13.09336| val_0_rmse: 7.10003 |  0:00:03s
epoch 14 | loss: 11.46255| val_0_rmse: 5.84866 |  0:00:04s
epoch 15 | loss: 11.72309| val_0_rmse: 6.06537 |  0:00:04s
epoch 16 | loss: 14.09832| val_0_rmse: 5.82435 |  0:0

[I 2025-08-17 19:35:12,744] Trial 13 finished with value: 1.5622765227275865 and parameters: {'n_d': 54, 'n_a': 23, 'n_steps': 10, 'gamma': 1.4808144783199784, 'lambda_sparse': 0.08966019357305384}. Best is trial 12 with value: 0.9007735895109354.


epoch 68 | loss: 5.18835 | val_0_rmse: 2.52374 |  0:00:18s

Early stopping occurred at epoch 68 with best_epoch = 58 and best_val_0_rmse = 1.56228




epoch 0  | loss: 287.46051| val_0_rmse: 18.53653|  0:00:00s
epoch 1  | loss: 170.64044| val_0_rmse: 10.99584|  0:00:00s
epoch 2  | loss: 90.55085| val_0_rmse: 8.8675  |  0:00:00s
epoch 3  | loss: 39.45582| val_0_rmse: 9.62552 |  0:00:01s
epoch 4  | loss: 18.01213| val_0_rmse: 10.41416|  0:00:01s
epoch 5  | loss: 24.11679| val_0_rmse: 12.48482|  0:00:01s
epoch 6  | loss: 45.30685| val_0_rmse: 12.89046|  0:00:01s
epoch 7  | loss: 43.63049| val_0_rmse: 11.3515 |  0:00:02s
epoch 8  | loss: 35.70246| val_0_rmse: 10.97104|  0:00:02s
epoch 9  | loss: 24.67857| val_0_rmse: 9.18674 |  0:00:02s
epoch 10 | loss: 16.96111| val_0_rmse: 7.624   |  0:00:02s
epoch 11 | loss: 9.43187 | val_0_rmse: 5.27153 |  0:00:03s
epoch 12 | loss: 9.43129 | val_0_rmse: 5.11142 |  0:00:03s
epoch 13 | loss: 13.68888| val_0_rmse: 4.91346 |  0:00:03s
epoch 14 | loss: 16.07089| val_0_rmse: 4.59493 |  0:00:03s
epoch 15 | loss: 10.51647| val_0_rmse: 6.68082 |  0:00:04s
epoch 16 | loss: 5.26139 | val_0_rmse: 8.38053 |  0:00

[I 2025-08-17 19:35:19,251] Trial 14 finished with value: 4.594929639078096 and parameters: {'n_d': 62, 'n_a': 25, 'n_steps': 9, 'gamma': 1.3194991523186028, 'lambda_sparse': 0.09215445958817768}. Best is trial 12 with value: 0.9007735895109354.


epoch 24 | loss: 2.80591 | val_0_rmse: 6.12185 |  0:00:06s

Early stopping occurred at epoch 24 with best_epoch = 14 and best_val_0_rmse = 4.59493




epoch 0  | loss: 32.81639| val_0_rmse: 23.55909|  0:00:00s
epoch 1  | loss: 28.55363| val_0_rmse: 19.75513|  0:00:00s
epoch 2  | loss: 28.05279| val_0_rmse: 13.38619|  0:00:00s
epoch 3  | loss: 25.48366| val_0_rmse: 14.13499|  0:00:01s
epoch 4  | loss: 20.70164| val_0_rmse: 11.33057|  0:00:01s
epoch 5  | loss: 20.69143| val_0_rmse: 9.18764 |  0:00:02s
epoch 6  | loss: 19.60315| val_0_rmse: 7.56833 |  0:00:02s
epoch 7  | loss: 17.92466| val_0_rmse: 8.43691 |  0:00:02s
epoch 8  | loss: 16.70186| val_0_rmse: 7.84642 |  0:00:03s
epoch 9  | loss: 13.70489| val_0_rmse: 7.50776 |  0:00:03s
epoch 10 | loss: 13.01603| val_0_rmse: 7.21377 |  0:00:03s
epoch 11 | loss: 10.11477| val_0_rmse: 6.76366 |  0:00:03s
epoch 12 | loss: 9.23028 | val_0_rmse: 7.55381 |  0:00:04s
epoch 13 | loss: 6.88535 | val_0_rmse: 6.79218 |  0:00:04s
epoch 14 | loss: 6.80027 | val_0_rmse: 6.84571 |  0:00:04s
epoch 15 | loss: 5.42496 | val_0_rmse: 5.34885 |  0:00:04s
epoch 16 | loss: 4.20052 | val_0_rmse: 5.63775 |  0:00:0

[I 2025-08-17 19:35:36,889] Trial 15 finished with value: 1.8471308495866716 and parameters: {'n_d': 47, 'n_a': 22, 'n_steps': 10, 'gamma': 1.5657033673585847, 'lambda_sparse': 0.0006700915164235196}. Best is trial 12 with value: 0.9007735895109354.


epoch 64 | loss: 21.396  | val_0_rmse: 3.90055 |  0:00:17s

Early stopping occurred at epoch 64 with best_epoch = 54 and best_val_0_rmse = 1.84713




epoch 0  | loss: 88.21726| val_0_rmse: 12.54851|  0:00:00s
epoch 1  | loss: 47.67251| val_0_rmse: 9.63282 |  0:00:00s
epoch 2  | loss: 29.30078| val_0_rmse: 11.57672|  0:00:00s
epoch 3  | loss: 21.87548| val_0_rmse: 12.4527 |  0:00:00s
epoch 4  | loss: 26.17997| val_0_rmse: 10.75783|  0:00:01s
epoch 5  | loss: 23.99831| val_0_rmse: 11.54288|  0:00:01s
epoch 6  | loss: 18.709  | val_0_rmse: 10.75822|  0:00:01s
epoch 7  | loss: 13.81209| val_0_rmse: 9.30361 |  0:00:01s
epoch 8  | loss: 10.83841| val_0_rmse: 8.17007 |  0:00:01s
epoch 9  | loss: 8.70585 | val_0_rmse: 7.18242 |  0:00:02s
epoch 10 | loss: 8.80199 | val_0_rmse: 6.83753 |  0:00:02s
epoch 11 | loss: 6.48174 | val_0_rmse: 8.28275 |  0:00:02s
epoch 12 | loss: 5.01296 | val_0_rmse: 7.66965 |  0:00:02s
epoch 13 | loss: 3.81833 | val_0_rmse: 5.77265 |  0:00:03s
epoch 14 | loss: 2.99578 | val_0_rmse: 5.26937 |  0:00:03s
epoch 15 | loss: 3.37549 | val_0_rmse: 4.4633  |  0:00:03s
epoch 16 | loss: 2.86516 | val_0_rmse: 4.28731 |  0:00:0

[I 2025-08-17 19:35:45,577] Trial 16 finished with value: 2.7712807302027205 and parameters: {'n_d': 56, 'n_a': 18, 'n_steps': 8, 'gamma': 1.3843531097134956, 'lambda_sparse': 0.037387623789011906}. Best is trial 12 with value: 0.9007735895109354.


epoch 0  | loss: 41.56769| val_0_rmse: 29.70797|  0:00:00s
epoch 1  | loss: 31.76717| val_0_rmse: 22.48553|  0:00:00s
epoch 2  | loss: 27.12766| val_0_rmse: 24.25467|  0:00:00s
epoch 3  | loss: 21.79581| val_0_rmse: 20.07101|  0:00:00s
epoch 4  | loss: 20.01238| val_0_rmse: 17.77908|  0:00:01s
epoch 5  | loss: 20.67041| val_0_rmse: 18.67666|  0:00:01s
epoch 6  | loss: 17.88147| val_0_rmse: 16.62329|  0:00:01s
epoch 7  | loss: 14.94773| val_0_rmse: 17.74236|  0:00:01s
epoch 8  | loss: 12.1915 | val_0_rmse: 14.88035|  0:00:01s
epoch 9  | loss: 11.28153| val_0_rmse: 10.88828|  0:00:02s
epoch 10 | loss: 9.25633 | val_0_rmse: 11.37036|  0:00:02s
epoch 11 | loss: 8.19835 | val_0_rmse: 8.26611 |  0:00:02s
epoch 12 | loss: 6.31565 | val_0_rmse: 9.07998 |  0:00:02s
epoch 13 | loss: 4.44799 | val_0_rmse: 11.06895|  0:00:03s
epoch 14 | loss: 4.52023 | val_0_rmse: 10.01139|  0:00:03s
epoch 15 | loss: 3.71232 | val_0_rmse: 8.40662 |  0:00:03s
epoch 16 | loss: 2.78529 | val_0_rmse: 7.54638 |  0:00:0

[I 2025-08-17 19:35:56,381] Trial 17 finished with value: 1.9962588676683592 and parameters: {'n_d': 24, 'n_a': 64, 'n_steps': 8, 'gamma': 1.6767810875076734, 'lambda_sparse': 0.0006618581597728335}. Best is trial 12 with value: 0.9007735895109354.


epoch 0  | loss: 126.0   | val_0_rmse: 11.6128 |  0:00:00s
epoch 1  | loss: 65.75977| val_0_rmse: 11.73177|  0:00:00s
epoch 2  | loss: 33.72823| val_0_rmse: 17.55947|  0:00:00s
epoch 3  | loss: 26.03047| val_0_rmse: 11.11664|  0:00:00s
epoch 4  | loss: 27.2443 | val_0_rmse: 8.78857 |  0:00:00s
epoch 5  | loss: 24.56746| val_0_rmse: 8.32019 |  0:00:01s
epoch 6  | loss: 17.83888| val_0_rmse: 7.57775 |  0:00:01s
epoch 7  | loss: 11.32376| val_0_rmse: 6.40696 |  0:00:01s
epoch 8  | loss: 9.2517  | val_0_rmse: 5.58164 |  0:00:01s
epoch 9  | loss: 9.97748 | val_0_rmse: 5.62423 |  0:00:01s
epoch 10 | loss: 8.44279 | val_0_rmse: 5.84136 |  0:00:01s
epoch 11 | loss: 5.66998 | val_0_rmse: 6.85173 |  0:00:02s
epoch 12 | loss: 4.77496 | val_0_rmse: 7.61631 |  0:00:02s
epoch 13 | loss: 4.26319 | val_0_rmse: 6.42413 |  0:00:02s
epoch 14 | loss: 3.07351 | val_0_rmse: 5.72932 |  0:00:02s
epoch 15 | loss: 3.03243 | val_0_rmse: 5.85029 |  0:00:02s
epoch 16 | loss: 3.14274 | val_0_rmse: 5.16217 |  0:00:0

[I 2025-08-17 19:36:12,213] Trial 18 finished with value: 0.5029276590193493 and parameters: {'n_d': 64, 'n_a': 27, 'n_steps': 6, 'gamma': 1.2147537830140245, 'lambda_sparse': 0.00023542264176747897}. Best is trial 18 with value: 0.5029276590193493.


epoch 90 | loss: 0.24756 | val_0_rmse: 0.55512 |  0:00:15s

Early stopping occurred at epoch 90 with best_epoch = 80 and best_val_0_rmse = 0.50293




epoch 0  | loss: 207.22772| val_0_rmse: 15.51161|  0:00:00s
epoch 1  | loss: 128.99545| val_0_rmse: 9.58995 |  0:00:00s
epoch 2  | loss: 85.10622| val_0_rmse: 7.36175 |  0:00:00s
epoch 3  | loss: 47.41855| val_0_rmse: 7.23742 |  0:00:00s
epoch 4  | loss: 29.7753 | val_0_rmse: 8.46379 |  0:00:00s
epoch 5  | loss: 22.46356| val_0_rmse: 10.29366|  0:00:00s
epoch 6  | loss: 22.89449| val_0_rmse: 10.25882|  0:00:01s
epoch 7  | loss: 19.84531| val_0_rmse: 11.24596|  0:00:01s
epoch 8  | loss: 20.03627| val_0_rmse: 11.07363|  0:00:01s
epoch 9  | loss: 15.46434| val_0_rmse: 9.70774 |  0:00:01s
epoch 10 | loss: 10.85251| val_0_rmse: 8.23361 |  0:00:01s
epoch 11 | loss: 6.47879 | val_0_rmse: 7.12417 |  0:00:01s
epoch 12 | loss: 5.25276 | val_0_rmse: 7.30609 |  0:00:01s
epoch 13 | loss: 4.95372 | val_0_rmse: 6.52417 |  0:00:02s
epoch 14 | loss: 3.65474 | val_0_rmse: 6.00765 |  0:00:02s
epoch 15 | loss: 2.76547 | val_0_rmse: 5.43572 |  0:00:02s
epoch 16 | loss: 2.6172  | val_0_rmse: 4.84276 |  0:00

[I 2025-08-17 19:36:18,042] Trial 19 finished with value: 2.5265332890663625 and parameters: {'n_d': 64, 'n_a': 28, 'n_steps': 5, 'gamma': 1.1835899559556702, 'lambda_sparse': 0.03732413604073572}. Best is trial 18 with value: 0.5029276590193493.


epoch 38 | loss: 0.37378 | val_0_rmse: 3.17527 |  0:00:05s

Early stopping occurred at epoch 38 with best_epoch = 28 and best_val_0_rmse = 2.52653




epoch 0  | loss: 161.56042| val_0_rmse: 11.84601|  0:00:00s
epoch 1  | loss: 94.82052| val_0_rmse: 8.28009 |  0:00:00s
epoch 2  | loss: 51.30489| val_0_rmse: 9.9868  |  0:00:00s
epoch 3  | loss: 31.859  | val_0_rmse: 9.02085 |  0:00:00s
epoch 4  | loss: 25.08079| val_0_rmse: 9.00428 |  0:00:00s
epoch 5  | loss: 21.62299| val_0_rmse: 9.98871 |  0:00:01s
epoch 6  | loss: 27.50182| val_0_rmse: 8.41333 |  0:00:01s
epoch 7  | loss: 18.72823| val_0_rmse: 7.4478  |  0:00:01s
epoch 8  | loss: 12.06919| val_0_rmse: 6.38398 |  0:00:01s
epoch 9  | loss: 8.22503 | val_0_rmse: 5.37112 |  0:00:01s
epoch 10 | loss: 7.71441 | val_0_rmse: 5.52926 |  0:00:01s
epoch 11 | loss: 6.60912 | val_0_rmse: 4.92413 |  0:00:02s
epoch 12 | loss: 6.11024 | val_0_rmse: 4.14996 |  0:00:02s
epoch 13 | loss: 5.12198 | val_0_rmse: 4.78589 |  0:00:02s
epoch 14 | loss: 3.64116 | val_0_rmse: 4.95589 |  0:00:02s
epoch 15 | loss: 2.94783 | val_0_rmse: 5.23657 |  0:00:02s
epoch 16 | loss: 3.12502 | val_0_rmse: 4.72905 |  0:00:

[I 2025-08-17 19:36:31,137] Trial 20 finished with value: 0.7965238779362632 and parameters: {'n_d': 50, 'n_a': 41, 'n_steps': 6, 'gamma': 1.124252932949943, 'lambda_sparse': 0.0010627211421633418}. Best is trial 18 with value: 0.5029276590193493.


epoch 0  | loss: 130.78362| val_0_rmse: 23.27059|  0:00:00s
epoch 1  | loss: 77.15248| val_0_rmse: 13.89847|  0:00:00s
epoch 2  | loss: 41.82212| val_0_rmse: 13.28668|  0:00:00s
epoch 3  | loss: 26.34944| val_0_rmse: 12.13925|  0:00:00s
epoch 4  | loss: 26.75512| val_0_rmse: 13.64659|  0:00:00s
epoch 5  | loss: 25.26047| val_0_rmse: 19.9117 |  0:00:01s
epoch 6  | loss: 24.01398| val_0_rmse: 12.4866 |  0:00:01s
epoch 7  | loss: 22.66792| val_0_rmse: 9.22318 |  0:00:01s
epoch 8  | loss: 11.97733| val_0_rmse: 7.06613 |  0:00:01s
epoch 9  | loss: 8.53508 | val_0_rmse: 6.49351 |  0:00:01s
epoch 10 | loss: 6.28571 | val_0_rmse: 7.68543 |  0:00:01s
epoch 11 | loss: 5.85183 | val_0_rmse: 6.89779 |  0:00:02s
epoch 12 | loss: 4.99866 | val_0_rmse: 7.78678 |  0:00:02s
epoch 13 | loss: 3.57244 | val_0_rmse: 8.40323 |  0:00:02s
epoch 14 | loss: 3.19301 | val_0_rmse: 7.68054 |  0:00:02s
epoch 15 | loss: 2.88097 | val_0_rmse: 7.04314 |  0:00:02s
epoch 16 | loss: 2.31589 | val_0_rmse: 5.82274 |  0:00:

[I 2025-08-17 19:36:41,221] Trial 21 finished with value: 0.8168657355032422 and parameters: {'n_d': 51, 'n_a': 41, 'n_steps': 6, 'gamma': 1.1507670417615332, 'lambda_sparse': 0.0014983834878547829}. Best is trial 18 with value: 0.5029276590193493.


epoch 56 | loss: 0.61052 | val_0_rmse: 1.87819 |  0:00:09s

Early stopping occurred at epoch 56 with best_epoch = 46 and best_val_0_rmse = 0.81687




epoch 0  | loss: 371.91025| val_0_rmse: 26.25469|  0:00:00s
epoch 1  | loss: 271.68491| val_0_rmse: 17.76461|  0:00:00s
epoch 2  | loss: 203.34599| val_0_rmse: 11.65486|  0:00:00s
epoch 3  | loss: 149.67433| val_0_rmse: 9.08664 |  0:00:00s
epoch 4  | loss: 104.78888| val_0_rmse: 7.1629  |  0:00:00s
epoch 5  | loss: 73.19245| val_0_rmse: 6.04451 |  0:00:01s
epoch 6  | loss: 47.45477| val_0_rmse: 5.56666 |  0:00:01s
epoch 7  | loss: 30.28494| val_0_rmse: 6.04021 |  0:00:01s
epoch 8  | loss: 19.32345| val_0_rmse: 6.04697 |  0:00:01s
epoch 9  | loss: 15.73299| val_0_rmse: 6.68076 |  0:00:01s
epoch 10 | loss: 15.48692| val_0_rmse: 8.13165 |  0:00:01s
epoch 11 | loss: 16.86296| val_0_rmse: 7.9802  |  0:00:01s
epoch 12 | loss: 15.60342| val_0_rmse: 7.88585 |  0:00:02s
epoch 13 | loss: 11.61983| val_0_rmse: 6.24683 |  0:00:02s
epoch 14 | loss: 8.18809 | val_0_rmse: 5.50083 |  0:00:02s
epoch 15 | loss: 6.01029 | val_0_rmse: 5.47749 |  0:00:02s
epoch 16 | loss: 4.94983 | val_0_rmse: 4.96602 |  0

[I 2025-08-17 19:36:57,836] Trial 22 finished with value: 0.4196867714259349 and parameters: {'n_d': 48, 'n_a': 40, 'n_steps': 6, 'gamma': 1.130755972188142, 'lambda_sparse': 0.0012102672606712469}. Best is trial 22 with value: 0.4196867714259349.


epoch 0  | loss: 111.3753| val_0_rmse: 25.2387 |  0:00:00s
epoch 1  | loss: 69.15195| val_0_rmse: 17.23525|  0:00:00s
epoch 2  | loss: 44.46788| val_0_rmse: 14.91417|  0:00:00s
epoch 3  | loss: 28.64447| val_0_rmse: 13.79098|  0:00:00s
epoch 4  | loss: 21.45718| val_0_rmse: 12.05334|  0:00:00s
epoch 5  | loss: 19.74208| val_0_rmse: 12.98172|  0:00:00s
epoch 6  | loss: 20.23634| val_0_rmse: 12.7094 |  0:00:01s
epoch 7  | loss: 19.32241| val_0_rmse: 9.79405 |  0:00:01s
epoch 8  | loss: 13.78983| val_0_rmse: 8.92076 |  0:00:01s
epoch 9  | loss: 11.91487| val_0_rmse: 7.13616 |  0:00:01s
epoch 10 | loss: 6.96168 | val_0_rmse: 5.70244 |  0:00:01s
epoch 11 | loss: 5.75329 | val_0_rmse: 4.74119 |  0:00:01s
epoch 12 | loss: 5.00677 | val_0_rmse: 4.62205 |  0:00:01s
epoch 13 | loss: 3.90025 | val_0_rmse: 4.22871 |  0:00:02s
epoch 14 | loss: 2.90925 | val_0_rmse: 4.02962 |  0:00:02s
epoch 15 | loss: 2.66909 | val_0_rmse: 3.54703 |  0:00:02s
epoch 16 | loss: 2.09346 | val_0_rmse: 3.51587 |  0:00:0

[I 2025-08-17 19:37:09,240] Trial 23 finished with value: 0.3685019071588906 and parameters: {'n_d': 45, 'n_a': 39, 'n_steps': 5, 'gamma': 1.1360236456129411, 'lambda_sparse': 0.0002007297459350154}. Best is trial 23 with value: 0.3685019071588906.


epoch 0  | loss: 228.67744| val_0_rmse: 17.66335|  0:00:00s
epoch 1  | loss: 167.44463| val_0_rmse: 11.56434|  0:00:00s
epoch 2  | loss: 121.3431| val_0_rmse: 9.23107 |  0:00:00s
epoch 3  | loss: 85.35699| val_0_rmse: 7.28648 |  0:00:00s
epoch 4  | loss: 57.50399| val_0_rmse: 6.09815 |  0:00:00s
epoch 5  | loss: 39.52813| val_0_rmse: 5.65852 |  0:00:00s
epoch 6  | loss: 27.61618| val_0_rmse: 5.62658 |  0:00:01s
epoch 7  | loss: 21.79506| val_0_rmse: 5.08442 |  0:00:01s
epoch 8  | loss: 19.6198 | val_0_rmse: 6.08951 |  0:00:01s
epoch 9  | loss: 19.94263| val_0_rmse: 6.28614 |  0:00:01s
epoch 10 | loss: 16.99518| val_0_rmse: 6.23742 |  0:00:01s
epoch 11 | loss: 15.4235 | val_0_rmse: 6.2953  |  0:00:01s
epoch 12 | loss: 13.01504| val_0_rmse: 5.7517  |  0:00:01s
epoch 13 | loss: 8.65258 | val_0_rmse: 4.56938 |  0:00:02s
epoch 14 | loss: 5.70812 | val_0_rmse: 4.32481 |  0:00:02s
epoch 15 | loss: 4.22207 | val_0_rmse: 3.88014 |  0:00:02s
epoch 16 | loss: 3.88409 | val_0_rmse: 4.36211 |  0:00

[I 2025-08-17 19:37:13,242] Trial 24 finished with value: 3.8801433696116137 and parameters: {'n_d': 45, 'n_a': 38, 'n_steps': 5, 'gamma': 1.003687716507547, 'lambda_sparse': 0.00020475182120122795}. Best is trial 23 with value: 0.3685019071588906.


epoch 0  | loss: 120.79424| val_0_rmse: 9.35694 |  0:00:00s
epoch 1  | loss: 91.39552| val_0_rmse: 11.04201|  0:00:00s
epoch 2  | loss: 67.84496| val_0_rmse: 11.8319 |  0:00:00s
epoch 3  | loss: 49.9287 | val_0_rmse: 13.66225|  0:00:00s
epoch 4  | loss: 34.33249| val_0_rmse: 15.50107|  0:00:00s
epoch 5  | loss: 25.12586| val_0_rmse: 16.43703|  0:00:00s
epoch 6  | loss: 19.2705 | val_0_rmse: 17.62612|  0:00:00s
epoch 7  | loss: 14.59353| val_0_rmse: 16.95075|  0:00:00s
epoch 8  | loss: 14.63389| val_0_rmse: 16.01379|  0:00:01s
epoch 9  | loss: 12.14301| val_0_rmse: 14.8729 |  0:00:01s


[I 2025-08-17 19:37:14,615] Trial 25 finished with value: 9.356936618607586 and parameters: {'n_d': 36, 'n_a': 49, 'n_steps': 4, 'gamma': 1.243277166177376, 'lambda_sparse': 0.0002415965407390162}. Best is trial 23 with value: 0.3685019071588906.


epoch 10 | loss: 10.29745| val_0_rmse: 12.79828|  0:00:01s

Early stopping occurred at epoch 10 with best_epoch = 0 and best_val_0_rmse = 9.35694




epoch 0  | loss: 206.25301| val_0_rmse: 16.09492|  0:00:00s
epoch 1  | loss: 170.01541| val_0_rmse: 12.46818|  0:00:00s
epoch 2  | loss: 143.03793| val_0_rmse: 10.48238|  0:00:00s
epoch 3  | loss: 118.19421| val_0_rmse: 8.86096 |  0:00:00s
epoch 4  | loss: 99.05254| val_0_rmse: 7.84078 |  0:00:00s
epoch 5  | loss: 80.89357| val_0_rmse: 7.40668 |  0:00:00s
epoch 6  | loss: 66.36424| val_0_rmse: 6.80135 |  0:00:00s
epoch 7  | loss: 52.07519| val_0_rmse: 6.57373 |  0:00:00s
epoch 8  | loss: 39.57186| val_0_rmse: 6.72831 |  0:00:01s
epoch 9  | loss: 30.38114| val_0_rmse: 7.81124 |  0:00:01s
epoch 10 | loss: 19.23916| val_0_rmse: 8.16665 |  0:00:01s
epoch 11 | loss: 14.40153| val_0_rmse: 9.33003 |  0:00:01s
epoch 12 | loss: 10.00406| val_0_rmse: 10.6535 |  0:00:01s
epoch 13 | loss: 8.53274 | val_0_rmse: 12.28386|  0:00:01s
epoch 14 | loss: 9.15286 | val_0_rmse: 12.08001|  0:00:01s
epoch 15 | loss: 7.87298 | val_0_rmse: 11.39223|  0:00:01s
epoch 16 | loss: 6.09685 | val_0_rmse: 9.50942 |  0:

[I 2025-08-17 19:37:16,871] Trial 26 finished with value: 6.573728243471242 and parameters: {'n_d': 28, 'n_a': 46, 'n_steps': 4, 'gamma': 1.1108686667818006, 'lambda_sparse': 0.00020820700370813782}. Best is trial 23 with value: 0.3685019071588906.


epoch 0  | loss: 109.72502| val_0_rmse: 9.12723 |  0:00:00s
epoch 1  | loss: 65.31134| val_0_rmse: 7.13919 |  0:00:00s
epoch 2  | loss: 39.34401| val_0_rmse: 6.19488 |  0:00:00s
epoch 3  | loss: 24.02293| val_0_rmse: 6.09123 |  0:00:00s
epoch 4  | loss: 19.91253| val_0_rmse: 6.24458 |  0:00:00s
epoch 5  | loss: 26.05991| val_0_rmse: 6.28497 |  0:00:01s
epoch 6  | loss: 24.70434| val_0_rmse: 6.04626 |  0:00:01s
epoch 7  | loss: 21.41554| val_0_rmse: 6.36092 |  0:00:01s
epoch 8  | loss: 16.92964| val_0_rmse: 5.59815 |  0:00:01s
epoch 9  | loss: 11.63274| val_0_rmse: 5.48931 |  0:00:01s
epoch 10 | loss: 9.32742 | val_0_rmse: 4.96076 |  0:00:02s
epoch 11 | loss: 7.47399 | val_0_rmse: 4.26343 |  0:00:02s
epoch 12 | loss: 7.07566 | val_0_rmse: 3.86638 |  0:00:02s
epoch 13 | loss: 5.72215 | val_0_rmse: 4.1415  |  0:00:02s
epoch 14 | loss: 3.96721 | val_0_rmse: 4.85171 |  0:00:02s
epoch 15 | loss: 3.7715  | val_0_rmse: 4.58091 |  0:00:03s
epoch 16 | loss: 3.40885 | val_0_rmse: 4.79965 |  0:00:

[I 2025-08-17 19:37:21,333] Trial 27 finished with value: 3.8663838467681413 and parameters: {'n_d': 41, 'n_a': 29, 'n_steps': 7, 'gamma': 1.0874954564533712, 'lambda_sparse': 0.0008425140999242845}. Best is trial 23 with value: 0.3685019071588906.


epoch 0  | loss: 141.50716| val_0_rmse: 17.00388|  0:00:00s
epoch 1  | loss: 120.45929| val_0_rmse: 10.1192 |  0:00:00s
epoch 2  | loss: 99.70092| val_0_rmse: 9.21703 |  0:00:00s
epoch 3  | loss: 85.06916| val_0_rmse: 8.08205 |  0:00:00s
epoch 4  | loss: 67.34659| val_0_rmse: 7.12746 |  0:00:00s
epoch 5  | loss: 54.17551| val_0_rmse: 6.60465 |  0:00:00s
epoch 6  | loss: 42.97457| val_0_rmse: 6.0051  |  0:00:01s
epoch 7  | loss: 31.93841| val_0_rmse: 6.26073 |  0:00:01s
epoch 8  | loss: 23.58742| val_0_rmse: 5.77623 |  0:00:01s
epoch 9  | loss: 17.73654| val_0_rmse: 6.7554  |  0:00:01s
epoch 10 | loss: 14.18542| val_0_rmse: 7.15823 |  0:00:01s
epoch 11 | loss: 10.59334| val_0_rmse: 7.16218 |  0:00:01s
epoch 12 | loss: 10.12449| val_0_rmse: 7.27376 |  0:00:01s
epoch 13 | loss: 10.27763| val_0_rmse: 8.03104 |  0:00:01s
epoch 14 | loss: 10.1872 | val_0_rmse: 7.0902  |  0:00:02s
epoch 15 | loss: 8.10387 | val_0_rmse: 6.06322 |  0:00:02s
epoch 16 | loss: 6.30395 | val_0_rmse: 5.6121  |  0:00

[I 2025-08-17 19:37:33,246] Trial 28 finished with value: 0.3910487248887938 and parameters: {'n_d': 18, 'n_a': 37, 'n_steps': 5, 'gamma': 1.4082725951091808, 'lambda_sparse': 0.00011031348380304451}. Best is trial 23 with value: 0.3685019071588906.


epoch 0  | loss: 140.8549| val_0_rmse: 11.16262|  0:00:00s
epoch 1  | loss: 112.63194| val_0_rmse: 9.53891 |  0:00:00s
epoch 2  | loss: 88.81105| val_0_rmse: 8.63434 |  0:00:00s
epoch 3  | loss: 66.70233| val_0_rmse: 8.46025 |  0:00:00s
epoch 4  | loss: 51.65121| val_0_rmse: 7.86724 |  0:00:00s
epoch 5  | loss: 37.85587| val_0_rmse: 7.03242 |  0:00:00s
epoch 6  | loss: 27.67259| val_0_rmse: 6.78632 |  0:00:01s
epoch 7  | loss: 21.21971| val_0_rmse: 7.67406 |  0:00:01s
epoch 8  | loss: 17.04761| val_0_rmse: 6.1375  |  0:00:01s
epoch 9  | loss: 14.81435| val_0_rmse: 7.28787 |  0:00:01s
epoch 10 | loss: 12.4695 | val_0_rmse: 6.28581 |  0:00:01s
epoch 11 | loss: 11.7206 | val_0_rmse: 6.00065 |  0:00:01s
epoch 12 | loss: 9.07138 | val_0_rmse: 5.31543 |  0:00:01s
epoch 13 | loss: 8.11117 | val_0_rmse: 5.67073 |  0:00:02s
epoch 14 | loss: 6.43551 | val_0_rmse: 5.23929 |  0:00:02s
epoch 15 | loss: 4.57027 | val_0_rmse: 4.59487 |  0:00:02s
epoch 16 | loss: 4.58353 | val_0_rmse: 4.39298 |  0:00:

[I 2025-08-17 19:37:46,796] Trial 29 finished with value: 0.4778690027376191 and parameters: {'n_d': 19, 'n_a': 52, 'n_steps': 5, 'gamma': 1.426989051064621, 'lambda_sparse': 0.003987548878003807}. Best is trial 23 with value: 0.3685019071588906.


epoch 94 | loss: 0.44711 | val_0_rmse: 0.66714 |  0:00:13s

Early stopping occurred at epoch 94 with best_epoch = 84 and best_val_0_rmse = 0.47787




epoch 0  | loss: 186.29234| val_0_rmse: 14.10807|  0:00:00s
epoch 1  | loss: 163.22923| val_0_rmse: 12.16452|  0:00:00s
epoch 2  | loss: 143.6834| val_0_rmse: 11.64039|  0:00:00s
epoch 3  | loss: 129.02519| val_0_rmse: 10.80004|  0:00:00s
epoch 4  | loss: 112.09975| val_0_rmse: 10.32551|  0:00:00s
epoch 5  | loss: 101.0628| val_0_rmse: 9.52899 |  0:00:00s
epoch 6  | loss: 87.32006| val_0_rmse: 9.06341 |  0:00:00s
epoch 7  | loss: 76.50093| val_0_rmse: 8.5493  |  0:00:01s
epoch 8  | loss: 65.49304| val_0_rmse: 8.19792 |  0:00:01s
epoch 9  | loss: 55.4156 | val_0_rmse: 8.07582 |  0:00:01s
epoch 10 | loss: 46.23163| val_0_rmse: 7.73898 |  0:00:01s
epoch 11 | loss: 38.23417| val_0_rmse: 7.82234 |  0:00:01s
epoch 12 | loss: 31.41894| val_0_rmse: 7.54476 |  0:00:01s
epoch 13 | loss: 22.79627| val_0_rmse: 8.29331 |  0:00:01s
epoch 14 | loss: 18.45795| val_0_rmse: 8.96076 |  0:00:01s
epoch 15 | loss: 14.4081 | val_0_rmse: 7.59698 |  0:00:02s
epoch 16 | loss: 10.8428 | val_0_rmse: 6.86155 |  0:

[I 2025-08-17 19:37:59,547] Trial 30 finished with value: 0.5075814824848097 and parameters: {'n_d': 15, 'n_a': 37, 'n_steps': 5, 'gamma': 1.7091816154461341, 'lambda_sparse': 0.00010131539788332607}. Best is trial 23 with value: 0.3685019071588906.


epoch 0  | loss: 229.18083| val_0_rmse: 18.14286|  0:00:00s
epoch 1  | loss: 190.18808| val_0_rmse: 15.3084 |  0:00:00s
epoch 2  | loss: 156.88312| val_0_rmse: 13.07612|  0:00:00s
epoch 3  | loss: 130.76367| val_0_rmse: 12.19652|  0:00:00s
epoch 4  | loss: 107.5015| val_0_rmse: 11.6741 |  0:00:00s
epoch 5  | loss: 89.20731| val_0_rmse: 11.92452|  0:00:00s
epoch 6  | loss: 73.18825| val_0_rmse: 11.62453|  0:00:01s
epoch 7  | loss: 59.78054| val_0_rmse: 11.48775|  0:00:01s
epoch 8  | loss: 48.56524| val_0_rmse: 11.96652|  0:00:01s
epoch 9  | loss: 38.38714| val_0_rmse: 10.87067|  0:00:01s
epoch 10 | loss: 28.1808 | val_0_rmse: 11.61055|  0:00:01s
epoch 11 | loss: 20.00663| val_0_rmse: 12.02232|  0:00:01s
epoch 12 | loss: 15.16467| val_0_rmse: 12.67691|  0:00:01s
epoch 13 | loss: 12.33882| val_0_rmse: 13.1559 |  0:00:01s
epoch 14 | loss: 11.4521 | val_0_rmse: 14.25872|  0:00:02s
epoch 15 | loss: 10.00296| val_0_rmse: 13.00029|  0:00:02s
epoch 16 | loss: 9.62661 | val_0_rmse: 12.07662|  0:

[I 2025-08-17 19:38:11,294] Trial 31 finished with value: 0.6809101979952362 and parameters: {'n_d': 19, 'n_a': 54, 'n_steps': 5, 'gamma': 1.4115290413584074, 'lambda_sparse': 0.0069341580029028075}. Best is trial 23 with value: 0.3685019071588906.


epoch 80 | loss: 0.2277  | val_0_rmse: 0.71771 |  0:00:11s

Early stopping occurred at epoch 80 with best_epoch = 70 and best_val_0_rmse = 0.68091




epoch 0  | loss: 151.94122| val_0_rmse: 11.2008 |  0:00:00s
epoch 1  | loss: 124.69339| val_0_rmse: 8.93769 |  0:00:00s
epoch 2  | loss: 100.92835| val_0_rmse: 8.19465 |  0:00:00s
epoch 3  | loss: 81.08182| val_0_rmse: 7.54432 |  0:00:00s
epoch 4  | loss: 65.83509| val_0_rmse: 7.79335 |  0:00:00s
epoch 5  | loss: 50.8989 | val_0_rmse: 7.19007 |  0:00:00s
epoch 6  | loss: 38.29085| val_0_rmse: 6.83891 |  0:00:00s
epoch 7  | loss: 26.97923| val_0_rmse: 6.46101 |  0:00:00s
epoch 8  | loss: 19.51822| val_0_rmse: 6.54193 |  0:00:01s
epoch 9  | loss: 14.10955| val_0_rmse: 6.5914  |  0:00:01s
epoch 10 | loss: 10.63414| val_0_rmse: 7.25489 |  0:00:01s
epoch 11 | loss: 11.81322| val_0_rmse: 6.97273 |  0:00:01s
epoch 12 | loss: 10.6188 | val_0_rmse: 7.01899 |  0:00:01s
epoch 13 | loss: 10.68753| val_0_rmse: 6.88875 |  0:00:01s
epoch 14 | loss: 9.22244 | val_0_rmse: 5.93885 |  0:00:01s
epoch 15 | loss: 7.46957 | val_0_rmse: 5.43006 |  0:00:01s
epoch 16 | loss: 4.39216 | val_0_rmse: 4.71786 |  0:0

[I 2025-08-17 19:38:19,754] Trial 32 finished with value: 0.527837972039679 and parameters: {'n_d': 21, 'n_a': 44, 'n_steps': 4, 'gamma': 1.4043003812745538, 'lambda_sparse': 0.0034902191812816634}. Best is trial 23 with value: 0.3685019071588906.


epoch 74 | loss: 0.22608 | val_0_rmse: 0.61756 |  0:00:08s

Early stopping occurred at epoch 74 with best_epoch = 64 and best_val_0_rmse = 0.52784




epoch 0  | loss: 126.25314| val_0_rmse: 10.96627|  0:00:00s
epoch 1  | loss: 95.80392| val_0_rmse: 8.41992 |  0:00:00s
epoch 2  | loss: 72.40668| val_0_rmse: 7.51353 |  0:00:00s
epoch 3  | loss: 53.31591| val_0_rmse: 8.09823 |  0:00:00s
epoch 4  | loss: 42.95406| val_0_rmse: 8.92939 |  0:00:00s
epoch 5  | loss: 31.72871| val_0_rmse: 12.74231|  0:00:00s
epoch 6  | loss: 22.68375| val_0_rmse: 12.44428|  0:00:00s
epoch 7  | loss: 16.98362| val_0_rmse: 12.23293|  0:00:01s
epoch 8  | loss: 13.38693| val_0_rmse: 12.17042|  0:00:01s
epoch 9  | loss: 12.31417| val_0_rmse: 12.02173|  0:00:01s
epoch 10 | loss: 11.17867| val_0_rmse: 12.40758|  0:00:01s
epoch 11 | loss: 10.96285| val_0_rmse: 10.98961|  0:00:01s


[I 2025-08-17 19:38:21,664] Trial 33 finished with value: 7.513529363756573 and parameters: {'n_d': 18, 'n_a': 51, 'n_steps': 5, 'gamma': 1.5330731495577896, 'lambda_sparse': 0.0029467258003691445}. Best is trial 23 with value: 0.3685019071588906.


epoch 12 | loss: 11.09389| val_0_rmse: 10.64732|  0:00:01s

Early stopping occurred at epoch 12 with best_epoch = 2 and best_val_0_rmse = 7.51353




epoch 0  | loss: 132.83841| val_0_rmse: 14.19341|  0:00:00s
epoch 1  | loss: 92.86705| val_0_rmse: 8.00892 |  0:00:00s
epoch 2  | loss: 64.24682| val_0_rmse: 8.40488 |  0:00:00s
epoch 3  | loss: 43.8085 | val_0_rmse: 11.62234|  0:00:00s
epoch 4  | loss: 32.31911| val_0_rmse: 10.45578|  0:00:00s
epoch 5  | loss: 22.91319| val_0_rmse: 8.63396 |  0:00:01s
epoch 6  | loss: 19.35241| val_0_rmse: 6.9424  |  0:00:01s
epoch 7  | loss: 19.06371| val_0_rmse: 7.81753 |  0:00:01s
epoch 8  | loss: 15.45676| val_0_rmse: 6.342   |  0:00:01s
epoch 9  | loss: 11.53713| val_0_rmse: 6.66353 |  0:00:01s
epoch 10 | loss: 9.41385 | val_0_rmse: 5.99086 |  0:00:01s
epoch 11 | loss: 8.78159 | val_0_rmse: 4.89273 |  0:00:02s
epoch 12 | loss: 5.78064 | val_0_rmse: 4.70452 |  0:00:02s
epoch 13 | loss: 4.61044 | val_0_rmse: 5.7579  |  0:00:02s
epoch 14 | loss: 3.96945 | val_0_rmse: 4.97482 |  0:00:02s
epoch 15 | loss: 2.96341 | val_0_rmse: 4.5878  |  0:00:02s
epoch 16 | loss: 2.39553 | val_0_rmse: 4.57207 |  0:00:

[I 2025-08-17 19:38:30,402] Trial 34 finished with value: 1.0754303278043333 and parameters: {'n_d': 38, 'n_a': 56, 'n_steps': 6, 'gamma': 1.4498201323030255, 'lambda_sparse': 0.00042893639477488925}. Best is trial 23 with value: 0.3685019071588906.


epoch 0  | loss: 209.09047| val_0_rmse: 19.67817|  0:00:00s
epoch 1  | loss: 142.6254| val_0_rmse: 12.46234|  0:00:00s
epoch 2  | loss: 93.35516| val_0_rmse: 10.88936|  0:00:00s
epoch 3  | loss: 57.16436| val_0_rmse: 12.05938|  0:00:00s
epoch 4  | loss: 35.30552| val_0_rmse: 16.39048|  0:00:00s
epoch 5  | loss: 20.07232| val_0_rmse: 18.18307|  0:00:00s
epoch 6  | loss: 18.59453| val_0_rmse: 19.62814|  0:00:01s
epoch 7  | loss: 17.34037| val_0_rmse: 19.27037|  0:00:01s
epoch 8  | loss: 16.43733| val_0_rmse: 19.81044|  0:00:01s
epoch 9  | loss: 16.40735| val_0_rmse: 17.33239|  0:00:01s
epoch 10 | loss: 12.71966| val_0_rmse: 14.26532|  0:00:01s
epoch 11 | loss: 8.78262 | val_0_rmse: 13.0613 |  0:00:01s
epoch 12 | loss: 6.22809 | val_0_rmse: 10.55973|  0:00:02s
epoch 13 | loss: 4.41368 | val_0_rmse: 8.31755 |  0:00:02s
epoch 14 | loss: 4.02079 | val_0_rmse: 9.05121 |  0:00:02s
epoch 15 | loss: 4.03052 | val_0_rmse: 7.64006 |  0:00:02s
epoch 16 | loss: 3.01939 | val_0_rmse: 9.25407 |  0:00:

[I 2025-08-17 19:38:45,668] Trial 35 finished with value: 0.49386025480073015 and parameters: {'n_d': 44, 'n_a': 63, 'n_steps': 5, 'gamma': 1.2819907719765453, 'lambda_sparse': 0.0014007396466251547}. Best is trial 23 with value: 0.3685019071588906.


epoch 0  | loss: 162.74522| val_0_rmse: 12.49516|  0:00:00s
epoch 1  | loss: 130.12724| val_0_rmse: 10.39622|  0:00:00s
epoch 2  | loss: 104.68931| val_0_rmse: 8.72547 |  0:00:00s
epoch 3  | loss: 82.61601| val_0_rmse: 8.60697 |  0:00:00s
epoch 4  | loss: 65.97145| val_0_rmse: 7.54702 |  0:00:00s
epoch 5  | loss: 49.37449| val_0_rmse: 7.36329 |  0:00:00s
epoch 6  | loss: 34.59172| val_0_rmse: 8.68832 |  0:00:00s
epoch 7  | loss: 23.00875| val_0_rmse: 8.34445 |  0:00:00s
epoch 8  | loss: 15.1642 | val_0_rmse: 9.28009 |  0:00:00s
epoch 9  | loss: 9.44298 | val_0_rmse: 10.348  |  0:00:01s
epoch 10 | loss: 7.32039 | val_0_rmse: 10.4731 |  0:00:01s
epoch 11 | loss: 7.40911 | val_0_rmse: 10.56379|  0:00:01s
epoch 12 | loss: 7.70621 | val_0_rmse: 9.40003 |  0:00:01s
epoch 13 | loss: 6.92257 | val_0_rmse: 7.51688 |  0:00:01s
epoch 14 | loss: 4.69367 | val_0_rmse: 6.90946 |  0:00:01s
epoch 15 | loss: 3.31782 | val_0_rmse: 5.5711  |  0:00:01s
epoch 16 | loss: 1.78372 | val_0_rmse: 5.2665  |  0:0

[I 2025-08-17 19:38:52,900] Trial 36 finished with value: 0.4161067357132739 and parameters: {'n_d': 31, 'n_a': 60, 'n_steps': 3, 'gamma': 1.621191384074509, 'lambda_sparse': 0.0023605006917007745}. Best is trial 23 with value: 0.3685019071588906.


epoch 0  | loss: 162.68268| val_0_rmse: 15.05235|  0:00:00s
epoch 1  | loss: 131.62512| val_0_rmse: 10.60519|  0:00:00s
epoch 2  | loss: 106.08689| val_0_rmse: 9.05164 |  0:00:00s
epoch 3  | loss: 87.25091| val_0_rmse: 7.81584 |  0:00:00s
epoch 4  | loss: 68.02971| val_0_rmse: 6.95068 |  0:00:00s
epoch 5  | loss: 54.28933| val_0_rmse: 6.07329 |  0:00:00s
epoch 6  | loss: 40.39161| val_0_rmse: 5.95481 |  0:00:00s
epoch 7  | loss: 27.40804| val_0_rmse: 6.67417 |  0:00:00s
epoch 8  | loss: 18.56529| val_0_rmse: 6.53631 |  0:00:00s
epoch 9  | loss: 11.374  | val_0_rmse: 6.43651 |  0:00:01s
epoch 10 | loss: 7.64535 | val_0_rmse: 6.86605 |  0:00:01s
epoch 11 | loss: 7.44869 | val_0_rmse: 6.86215 |  0:00:01s
epoch 12 | loss: 6.67426 | val_0_rmse: 6.21539 |  0:00:01s
epoch 13 | loss: 5.31504 | val_0_rmse: 5.69473 |  0:00:01s
epoch 14 | loss: 3.44945 | val_0_rmse: 5.87118 |  0:00:01s
epoch 15 | loss: 2.16808 | val_0_rmse: 4.58717 |  0:00:01s
epoch 16 | loss: 1.70424 | val_0_rmse: 3.63785 |  0:0

[I 2025-08-17 19:39:00,490] Trial 37 finished with value: 0.29530784544094757 and parameters: {'n_d': 31, 'n_a': 60, 'n_steps': 3, 'gamma': 1.6562102724472039, 'lambda_sparse': 0.0021749542888594327}. Best is trial 37 with value: 0.29530784544094757.


epoch 73 | loss: 0.1026  | val_0_rmse: 0.37128 |  0:00:07s

Early stopping occurred at epoch 73 with best_epoch = 63 and best_val_0_rmse = 0.29531
epoch 0  | loss: 191.65622| val_0_rmse: 16.50793|  0:00:00s




epoch 1  | loss: 158.38045| val_0_rmse: 13.71939|  0:00:00s
epoch 2  | loss: 129.28583| val_0_rmse: 10.72508|  0:00:00s
epoch 3  | loss: 104.8359| val_0_rmse: 9.66141 |  0:00:00s
epoch 4  | loss: 84.41195| val_0_rmse: 8.80398 |  0:00:00s
epoch 5  | loss: 65.22377| val_0_rmse: 7.96944 |  0:00:00s
epoch 6  | loss: 50.52532| val_0_rmse: 7.15912 |  0:00:00s
epoch 7  | loss: 37.05629| val_0_rmse: 6.30179 |  0:00:00s
epoch 8  | loss: 24.96328| val_0_rmse: 5.67279 |  0:00:00s
epoch 9  | loss: 16.65826| val_0_rmse: 5.37924 |  0:00:01s
epoch 10 | loss: 9.64515 | val_0_rmse: 4.92683 |  0:00:01s
epoch 11 | loss: 6.16274 | val_0_rmse: 5.81932 |  0:00:01s
epoch 12 | loss: 6.38444 | val_0_rmse: 6.66811 |  0:00:01s
epoch 13 | loss: 6.70307 | val_0_rmse: 7.01109 |  0:00:01s
epoch 14 | loss: 6.36191 | val_0_rmse: 6.51244 |  0:00:01s
epoch 15 | loss: 4.69967 | val_0_rmse: 5.38304 |  0:00:01s
epoch 16 | loss: 2.93658 | val_0_rmse: 4.42577 |  0:00:01s
epoch 17 | loss: 1.68037 | val_0_rmse: 4.14251 |  0:00

[I 2025-08-17 19:39:10,424] Trial 38 finished with value: 0.33329267447103944 and parameters: {'n_d': 31, 'n_a': 61, 'n_steps': 3, 'gamma': 1.6525366732009186, 'lambda_sparse': 0.0022074175758674376}. Best is trial 37 with value: 0.29530784544094757.


Stop training because you reached max_epochs = 100 with best_epoch = 99 and best_val_0_rmse = 0.33329




epoch 0  | loss: 175.44493| val_0_rmse: 13.68664|  0:00:00s
epoch 1  | loss: 141.09149| val_0_rmse: 9.93158 |  0:00:00s
epoch 2  | loss: 115.73581| val_0_rmse: 9.54705 |  0:00:00s
epoch 3  | loss: 96.51439| val_0_rmse: 9.04407 |  0:00:00s
epoch 4  | loss: 76.50446| val_0_rmse: 8.67276 |  0:00:00s
epoch 5  | loss: 61.77116| val_0_rmse: 9.27706 |  0:00:00s
epoch 6  | loss: 47.48632| val_0_rmse: 11.33483|  0:00:00s
epoch 7  | loss: 35.47645| val_0_rmse: 12.51393|  0:00:00s
epoch 8  | loss: 26.02721| val_0_rmse: 14.8428 |  0:00:00s
epoch 9  | loss: 15.54327| val_0_rmse: 17.13696|  0:00:00s
epoch 10 | loss: 11.75228| val_0_rmse: 20.05778|  0:00:01s
epoch 11 | loss: 8.62223 | val_0_rmse: 21.53911|  0:00:01s
epoch 12 | loss: 7.98924 | val_0_rmse: 22.13405|  0:00:01s
epoch 13 | loss: 8.41698 | val_0_rmse: 19.12467|  0:00:01s
epoch 14 | loss: 8.2327  | val_0_rmse: 16.46241|  0:00:01s

Early stopping occurred at epoch 14 with best_epoch = 4 and best_val_0_rmse = 8.67276


[I 2025-08-17 19:39:11,923] Trial 39 finished with value: 8.672760635685732 and parameters: {'n_d': 30, 'n_a': 57, 'n_steps': 3, 'gamma': 1.8111740680442312, 'lambda_sparse': 0.006055321039965078}. Best is trial 37 with value: 0.29530784544094757.


epoch 0  | loss: 158.75847| val_0_rmse: 12.74692|  0:00:00s
epoch 1  | loss: 123.65915| val_0_rmse: 10.83828|  0:00:00s
epoch 2  | loss: 96.7674 | val_0_rmse: 9.31983 |  0:00:00s
epoch 3  | loss: 75.26656| val_0_rmse: 8.008   |  0:00:00s
epoch 4  | loss: 55.10718| val_0_rmse: 7.05502 |  0:00:00s
epoch 5  | loss: 40.5929 | val_0_rmse: 6.33072 |  0:00:00s
epoch 6  | loss: 30.53265| val_0_rmse: 7.17885 |  0:00:00s
epoch 7  | loss: 22.32988| val_0_rmse: 8.44917 |  0:00:00s
epoch 8  | loss: 11.88732| val_0_rmse: 10.1708 |  0:00:00s
epoch 9  | loss: 7.67777 | val_0_rmse: 10.39668|  0:00:01s
epoch 10 | loss: 5.5013  | val_0_rmse: 9.38614 |  0:00:01s
epoch 11 | loss: 6.11153 | val_0_rmse: 7.25218 |  0:00:01s
epoch 12 | loss: 5.67506 | val_0_rmse: 6.14881 |  0:00:01s
epoch 13 | loss: 5.81911 | val_0_rmse: 5.48737 |  0:00:01s
epoch 14 | loss: 3.33869 | val_0_rmse: 4.56161 |  0:00:01s
epoch 15 | loss: 2.02601 | val_0_rmse: 4.01834 |  0:00:01s
epoch 16 | loss: 1.61593 | val_0_rmse: 3.87985 |  0:00

[I 2025-08-17 19:39:21,971] Trial 40 finished with value: 0.2639664310728177 and parameters: {'n_d': 33, 'n_a': 61, 'n_steps': 3, 'gamma': 1.7238687248958677, 'lambda_sparse': 0.002197180634921405}. Best is trial 40 with value: 0.2639664310728177.


epoch 0  | loss: 148.33119| val_0_rmse: 13.03547|  0:00:00s
epoch 1  | loss: 113.15478| val_0_rmse: 9.60111 |  0:00:00s
epoch 2  | loss: 85.60457| val_0_rmse: 8.02388 |  0:00:00s
epoch 3  | loss: 62.52465| val_0_rmse: 7.09874 |  0:00:00s
epoch 4  | loss: 45.58688| val_0_rmse: 7.28268 |  0:00:00s
epoch 5  | loss: 31.93428| val_0_rmse: 12.61291|  0:00:00s
epoch 6  | loss: 22.49076| val_0_rmse: 21.84838|  0:00:00s
epoch 7  | loss: 15.84741| val_0_rmse: 22.30904|  0:00:00s
epoch 8  | loss: 11.52876| val_0_rmse: 20.24937|  0:00:00s
epoch 9  | loss: 8.85221 | val_0_rmse: 16.5948 |  0:00:01s
epoch 10 | loss: 8.57067 | val_0_rmse: 13.35888|  0:00:01s
epoch 11 | loss: 6.32171 | val_0_rmse: 10.77705|  0:00:01s
epoch 12 | loss: 5.22772 | val_0_rmse: 8.0968  |  0:00:01s
epoch 13 | loss: 2.86132 | val_0_rmse: 6.19892 |  0:00:01s
epoch 14 | loss: 1.90142 | val_0_rmse: 4.9121  |  0:00:01s
epoch 15 | loss: 1.71997 | val_0_rmse: 4.54043 |  0:00:01s
epoch 16 | loss: 1.7691  | val_0_rmse: 4.51551 |  0:00

[I 2025-08-17 19:39:27,805] Trial 41 finished with value: 0.5194254792340067 and parameters: {'n_d': 34, 'n_a': 61, 'n_steps': 3, 'gamma': 1.742560810731922, 'lambda_sparse': 0.001630172163699005}. Best is trial 40 with value: 0.2639664310728177.


epoch 0  | loss: 159.11667| val_0_rmse: 12.79927|  0:00:00s
epoch 1  | loss: 133.59583| val_0_rmse: 10.71519|  0:00:00s
epoch 2  | loss: 112.86459| val_0_rmse: 9.89802 |  0:00:00s
epoch 3  | loss: 92.82011| val_0_rmse: 8.93855 |  0:00:00s
epoch 4  | loss: 75.45834| val_0_rmse: 7.89561 |  0:00:00s
epoch 5  | loss: 59.59747| val_0_rmse: 7.20044 |  0:00:00s
epoch 6  | loss: 43.42952| val_0_rmse: 6.64739 |  0:00:00s
epoch 7  | loss: 32.04902| val_0_rmse: 6.3348  |  0:00:00s
epoch 8  | loss: 20.99284| val_0_rmse: 6.6827  |  0:00:00s
epoch 9  | loss: 12.57738| val_0_rmse: 7.91936 |  0:00:00s
epoch 10 | loss: 7.6256  | val_0_rmse: 8.45963 |  0:00:01s
epoch 11 | loss: 5.11171 | val_0_rmse: 9.63549 |  0:00:01s
epoch 12 | loss: 5.40216 | val_0_rmse: 10.13363|  0:00:01s
epoch 13 | loss: 6.07874 | val_0_rmse: 9.89732 |  0:00:01s
epoch 14 | loss: 6.08509 | val_0_rmse: 8.825   |  0:00:01s
epoch 15 | loss: 4.64248 | val_0_rmse: 8.48844 |  0:00:01s
epoch 16 | loss: 3.04064 | val_0_rmse: 6.65258 |  0:0

[I 2025-08-17 19:39:33,507] Trial 42 finished with value: 0.6646720591082551 and parameters: {'n_d': 24, 'n_a': 60, 'n_steps': 3, 'gamma': 1.6614147169980802, 'lambda_sparse': 0.010523566932474379}. Best is trial 40 with value: 0.2639664310728177.


epoch 59 | loss: 0.09333 | val_0_rmse: 0.84696 |  0:00:05s

Early stopping occurred at epoch 59 with best_epoch = 49 and best_val_0_rmse = 0.66467




epoch 0  | loss: 152.39224| val_0_rmse: 14.52614|  0:00:00s
epoch 1  | loss: 118.86775| val_0_rmse: 9.7017  |  0:00:00s
epoch 2  | loss: 89.07348| val_0_rmse: 8.26037 |  0:00:00s
epoch 3  | loss: 64.56274| val_0_rmse: 7.98556 |  0:00:00s
epoch 4  | loss: 47.6894 | val_0_rmse: 7.20371 |  0:00:00s
epoch 5  | loss: 30.8518 | val_0_rmse: 8.76384 |  0:00:00s
epoch 6  | loss: 24.02851| val_0_rmse: 12.03891|  0:00:00s
epoch 7  | loss: 17.54021| val_0_rmse: 13.71325|  0:00:00s
epoch 8  | loss: 14.43753| val_0_rmse: 14.71216|  0:00:01s
epoch 9  | loss: 12.22495| val_0_rmse: 15.15342|  0:00:01s
epoch 10 | loss: 14.32157| val_0_rmse: 14.85404|  0:00:01s
epoch 11 | loss: 13.11815| val_0_rmse: 14.24363|  0:00:01s
epoch 12 | loss: 10.68414| val_0_rmse: 12.56444|  0:00:01s
epoch 13 | loss: 6.94773 | val_0_rmse: 11.19375|  0:00:01s


[I 2025-08-17 19:39:35,372] Trial 43 finished with value: 7.203710513739421 and parameters: {'n_d': 34, 'n_a': 54, 'n_steps': 4, 'gamma': 1.7518212739664054, 'lambda_sparse': 0.0023616819299635164}. Best is trial 40 with value: 0.2639664310728177.


epoch 14 | loss: 4.57402 | val_0_rmse: 9.35226 |  0:00:01s

Early stopping occurred at epoch 14 with best_epoch = 4 and best_val_0_rmse = 7.20371




epoch 0  | loss: 170.30289| val_0_rmse: 12.35452|  0:00:00s
epoch 1  | loss: 136.14577| val_0_rmse: 9.14947 |  0:00:00s
epoch 2  | loss: 106.97814| val_0_rmse: 7.97651 |  0:00:00s
epoch 3  | loss: 81.70795| val_0_rmse: 6.4339  |  0:00:00s
epoch 4  | loss: 60.3955 | val_0_rmse: 6.18011 |  0:00:00s
epoch 5  | loss: 43.90375| val_0_rmse: 5.50699 |  0:00:00s
epoch 6  | loss: 29.11873| val_0_rmse: 6.0191  |  0:00:00s
epoch 7  | loss: 18.01533| val_0_rmse: 6.36657 |  0:00:00s
epoch 8  | loss: 10.70196| val_0_rmse: 7.16969 |  0:00:00s
epoch 9  | loss: 7.18137 | val_0_rmse: 9.08192 |  0:00:00s
epoch 10 | loss: 6.38476 | val_0_rmse: 10.1745 |  0:00:01s
epoch 11 | loss: 6.0735  | val_0_rmse: 9.72073 |  0:00:01s
epoch 12 | loss: 4.56942 | val_0_rmse: 8.2575  |  0:00:01s
epoch 13 | loss: 3.12379 | val_0_rmse: 6.55247 |  0:00:01s
epoch 14 | loss: 1.74593 | val_0_rmse: 5.07684 |  0:00:01s
epoch 15 | loss: 1.42264 | val_0_rmse: 4.07956 |  0:00:01s
epoch 16 | loss: 1.44675 | val_0_rmse: 3.47308 |  0:0

[I 2025-08-17 19:39:43,705] Trial 44 finished with value: 0.32540044727239986 and parameters: {'n_d': 38, 'n_a': 58, 'n_steps': 3, 'gamma': 1.9417622848931646, 'lambda_sparse': 0.00030570033317694323}. Best is trial 40 with value: 0.2639664310728177.


epoch 0  | loss: 137.67575| val_0_rmse: 9.69061 |  0:00:00s
epoch 1  | loss: 106.86685| val_0_rmse: 10.71155|  0:00:00s
epoch 2  | loss: 81.88587| val_0_rmse: 9.60986 |  0:00:00s
epoch 3  | loss: 58.31869| val_0_rmse: 8.29592 |  0:00:00s
epoch 4  | loss: 40.74234| val_0_rmse: 11.12481|  0:00:00s
epoch 5  | loss: 27.48097| val_0_rmse: 16.4842 |  0:00:00s
epoch 6  | loss: 18.76072| val_0_rmse: 22.29433|  0:00:00s
epoch 7  | loss: 13.12296| val_0_rmse: 28.98496|  0:00:00s
epoch 8  | loss: 10.48765| val_0_rmse: 32.14739|  0:00:00s
epoch 9  | loss: 9.22005 | val_0_rmse: 29.06575|  0:00:01s
epoch 10 | loss: 8.37524 | val_0_rmse: 23.79791|  0:00:01s
epoch 11 | loss: 7.41233 | val_0_rmse: 19.43674|  0:00:01s


[I 2025-08-17 19:39:45,244] Trial 45 finished with value: 8.29591652846808 and parameters: {'n_d': 39, 'n_a': 60, 'n_steps': 3, 'gamma': 1.943351254695151, 'lambda_sparse': 0.0003247125249093994}. Best is trial 40 with value: 0.2639664310728177.


epoch 12 | loss: 4.51232 | val_0_rmse: 16.00711|  0:00:01s
epoch 13 | loss: 3.02351 | val_0_rmse: 14.92842|  0:00:01s

Early stopping occurred at epoch 13 with best_epoch = 3 and best_val_0_rmse = 8.29592




epoch 0  | loss: 141.57635| val_0_rmse: 12.90954|  0:00:00s
epoch 1  | loss: 101.83645| val_0_rmse: 11.00965|  0:00:00s
epoch 2  | loss: 71.6868 | val_0_rmse: 8.74392 |  0:00:00s
epoch 3  | loss: 50.51281| val_0_rmse: 8.40928 |  0:00:00s
epoch 4  | loss: 31.30764| val_0_rmse: 7.03977 |  0:00:00s
epoch 5  | loss: 21.63235| val_0_rmse: 7.46513 |  0:00:00s
epoch 6  | loss: 11.43721| val_0_rmse: 7.4453  |  0:00:00s
epoch 7  | loss: 9.64296 | val_0_rmse: 6.53292 |  0:00:00s
epoch 8  | loss: 8.92829 | val_0_rmse: 6.05781 |  0:00:00s
epoch 9  | loss: 6.54462 | val_0_rmse: 5.07485 |  0:00:01s
epoch 10 | loss: 4.73382 | val_0_rmse: 4.77699 |  0:00:01s
epoch 11 | loss: 2.50107 | val_0_rmse: 3.83485 |  0:00:01s
epoch 12 | loss: 3.12095 | val_0_rmse: 3.5183  |  0:00:01s
epoch 13 | loss: 2.10409 | val_0_rmse: 2.93996 |  0:00:01s
epoch 14 | loss: 1.91794 | val_0_rmse: 2.59724 |  0:00:01s
epoch 15 | loss: 1.26814 | val_0_rmse: 2.36222 |  0:00:01s
epoch 16 | loss: 0.93723 | val_0_rmse: 2.31273 |  0:00

[I 2025-08-17 19:39:49,215] Trial 46 finished with value: 1.7585135986268463 and parameters: {'n_d': 43, 'n_a': 57, 'n_steps': 3, 'gamma': 1.9253719610691808, 'lambda_sparse': 0.0006224377276355489}. Best is trial 40 with value: 0.2639664310728177.


epoch 35 | loss: 0.26697 | val_0_rmse: 2.33553 |  0:00:03s
epoch 36 | loss: 0.30308 | val_0_rmse: 2.12213 |  0:00:03s

Early stopping occurred at epoch 36 with best_epoch = 26 and best_val_0_rmse = 1.75851




epoch 0  | loss: 152.21419| val_0_rmse: 13.72651|  0:00:00s
epoch 1  | loss: 122.45736| val_0_rmse: 12.10311|  0:00:00s
epoch 2  | loss: 101.05805| val_0_rmse: 9.60272 |  0:00:00s
epoch 3  | loss: 79.96857| val_0_rmse: 9.54896 |  0:00:00s
epoch 4  | loss: 60.38482| val_0_rmse: 9.35413 |  0:00:00s
epoch 5  | loss: 45.93576| val_0_rmse: 6.89961 |  0:00:00s
epoch 6  | loss: 32.29134| val_0_rmse: 7.93635 |  0:00:00s
epoch 7  | loss: 23.00091| val_0_rmse: 7.06566 |  0:00:01s
epoch 8  | loss: 14.60931| val_0_rmse: 8.05179 |  0:00:01s
epoch 9  | loss: 11.93679| val_0_rmse: 9.17662 |  0:00:01s
epoch 10 | loss: 9.87416 | val_0_rmse: 9.7471  |  0:00:01s
epoch 11 | loss: 11.16529| val_0_rmse: 10.36265|  0:00:01s
epoch 12 | loss: 9.71997 | val_0_rmse: 12.21108|  0:00:01s
epoch 13 | loss: 6.51328 | val_0_rmse: 10.71227|  0:00:01s
epoch 14 | loss: 4.52051 | val_0_rmse: 8.38181 |  0:00:01s
epoch 15 | loss: 3.04321 | val_0_rmse: 6.69484 |  0:00:01s
epoch 16 | loss: 2.99241 | val_0_rmse: 5.36391 |  0:0

[I 2025-08-17 19:40:01,304] Trial 47 finished with value: 0.3657705663978025 and parameters: {'n_d': 28, 'n_a': 64, 'n_steps': 4, 'gamma': 1.810059361946077, 'lambda_sparse': 0.0009490851539972393}. Best is trial 40 with value: 0.2639664310728177.


epoch 99 | loss: 0.07883 | val_0_rmse: 0.36577 |  0:00:11s
Stop training because you reached max_epochs = 100 with best_epoch = 99 and best_val_0_rmse = 0.36577




epoch 0  | loss: 152.23701| val_0_rmse: 13.91119|  0:00:00s
epoch 1  | loss: 122.94118| val_0_rmse: 10.54967|  0:00:00s
epoch 2  | loss: 100.04948| val_0_rmse: 9.73467 |  0:00:00s
epoch 3  | loss: 80.83589| val_0_rmse: 8.60841 |  0:00:00s
epoch 4  | loss: 61.52314| val_0_rmse: 7.79247 |  0:00:00s
epoch 5  | loss: 45.65245| val_0_rmse: 7.64971 |  0:00:00s
epoch 6  | loss: 37.78901| val_0_rmse: 7.66277 |  0:00:00s
epoch 7  | loss: 27.08948| val_0_rmse: 7.94484 |  0:00:01s
epoch 8  | loss: 19.22925| val_0_rmse: 8.80298 |  0:00:01s
epoch 9  | loss: 14.36826| val_0_rmse: 10.14395|  0:00:01s
epoch 10 | loss: 10.71908| val_0_rmse: 10.54408|  0:00:01s
epoch 11 | loss: 9.84486 | val_0_rmse: 9.53567 |  0:00:01s
epoch 12 | loss: 9.96511 | val_0_rmse: 8.22103 |  0:00:01s
epoch 13 | loss: 9.48215 | val_0_rmse: 6.71293 |  0:00:01s
epoch 14 | loss: 5.68186 | val_0_rmse: 5.09657 |  0:00:01s
epoch 15 | loss: 4.33102 | val_0_rmse: 3.86138 |  0:00:01s
epoch 16 | loss: 3.21994 | val_0_rmse: 3.30074 |  0:0

[I 2025-08-17 19:40:07,515] Trial 48 finished with value: 0.8058452086802067 and parameters: {'n_d': 28, 'n_a': 64, 'n_steps': 4, 'gamma': 1.8065973339703134, 'lambda_sparse': 0.0021396861867927068}. Best is trial 40 with value: 0.2639664310728177.


epoch 48 | loss: 0.18219 | val_0_rmse: 1.2192  |  0:00:05s
epoch 49 | loss: 0.18424 | val_0_rmse: 1.18644 |  0:00:06s

Early stopping occurred at epoch 49 with best_epoch = 39 and best_val_0_rmse = 0.80585




epoch 0  | loss: 101.00606| val_0_rmse: 12.12561|  0:00:00s
epoch 1  | loss: 78.74389| val_0_rmse: 10.02225|  0:00:00s
epoch 2  | loss: 59.34572| val_0_rmse: 9.15991 |  0:00:00s
epoch 3  | loss: 43.71106| val_0_rmse: 11.02315|  0:00:00s
epoch 4  | loss: 30.80919| val_0_rmse: 12.82843|  0:00:00s
epoch 5  | loss: 21.00282| val_0_rmse: 15.37731|  0:00:00s
epoch 6  | loss: 13.34216| val_0_rmse: 23.16512|  0:00:00s
epoch 7  | loss: 8.81272 | val_0_rmse: 20.93828|  0:00:00s
epoch 8  | loss: 6.79999 | val_0_rmse: 14.84088|  0:00:00s
epoch 9  | loss: 5.75125 | val_0_rmse: 11.80292|  0:00:01s
epoch 10 | loss: 3.93541 | val_0_rmse: 9.00433 |  0:00:01s
epoch 11 | loss: 2.84751 | val_0_rmse: 7.03676 |  0:00:01s
epoch 12 | loss: 2.49333 | val_0_rmse: 7.24374 |  0:00:01s
epoch 13 | loss: 1.50538 | val_0_rmse: 5.68392 |  0:00:01s
epoch 14 | loss: 1.11288 | val_0_rmse: 4.82034 |  0:00:01s
epoch 15 | loss: 0.92558 | val_0_rmse: 5.02851 |  0:00:01s
epoch 16 | loss: 0.85973 | val_0_rmse: 4.20026 |  0:00:

[I 2025-08-17 19:40:16,170] Trial 49 finished with value: 0.3552946245098392 and parameters: {'n_d': 33, 'n_a': 62, 'n_steps': 3, 'gamma': 1.860769415221843, 'lambda_sparse': 0.0009290893825292577}. Best is trial 40 with value: 0.2639664310728177.


epoch 83 | loss: 0.11005 | val_0_rmse: 0.37982 |  0:00:08s

Early stopping occurred at epoch 83 with best_epoch = 73 and best_val_0_rmse = 0.35529
epoch 0  | loss: 166.10585| val_0_rmse: 11.4861 |  0:00:00s




epoch 1  | loss: 130.37524| val_0_rmse: 9.4621  |  0:00:00s
epoch 2  | loss: 100.57149| val_0_rmse: 8.69483 |  0:00:00s
epoch 3  | loss: 76.99218| val_0_rmse: 8.0696  |  0:00:00s
epoch 4  | loss: 55.97556| val_0_rmse: 7.37229 |  0:00:00s
epoch 5  | loss: 40.43989| val_0_rmse: 6.5864  |  0:00:00s
epoch 6  | loss: 26.66966| val_0_rmse: 6.2765  |  0:00:00s
epoch 7  | loss: 19.51555| val_0_rmse: 6.60129 |  0:00:00s
epoch 8  | loss: 13.43242| val_0_rmse: 6.66193 |  0:00:00s
epoch 9  | loss: 9.46755 | val_0_rmse: 7.10959 |  0:00:01s
epoch 10 | loss: 8.10336 | val_0_rmse: 7.25257 |  0:00:01s
epoch 11 | loss: 7.47788 | val_0_rmse: 6.76084 |  0:00:01s
epoch 12 | loss: 5.84783 | val_0_rmse: 6.68383 |  0:00:01s
epoch 13 | loss: 4.68294 | val_0_rmse: 6.55419 |  0:00:01s
epoch 14 | loss: 2.48211 | val_0_rmse: 5.82941 |  0:00:01s
epoch 15 | loss: 2.08586 | val_0_rmse: 4.99317 |  0:00:01s
epoch 16 | loss: 2.37591 | val_0_rmse: 4.09638 |  0:00:01s
epoch 17 | loss: 1.70682 | val_0_rmse: 3.81189 |  0:00

[I 2025-08-17 19:40:22,311] Trial 50 finished with value: 0.9531537508120534 and parameters: {'n_d': 36, 'n_a': 57, 'n_steps': 3, 'gamma': 1.9069925768139837, 'lambda_sparse': 0.004552521158755705}. Best is trial 40 with value: 0.2639664310728177.


epoch 56 | loss: 0.19472 | val_0_rmse: 0.97635 |  0:00:06s

Early stopping occurred at epoch 56 with best_epoch = 46 and best_val_0_rmse = 0.95315
epoch 0  | loss: 139.32623| val_0_rmse: 14.16398|  0:00:00s




epoch 1  | loss: 107.12428| val_0_rmse: 10.57531|  0:00:00s
epoch 2  | loss: 78.09476| val_0_rmse: 12.50611|  0:00:00s
epoch 3  | loss: 54.46288| val_0_rmse: 12.11963|  0:00:00s
epoch 4  | loss: 38.14888| val_0_rmse: 12.33067|  0:00:00s
epoch 5  | loss: 24.23144| val_0_rmse: 11.0643 |  0:00:00s
epoch 6  | loss: 14.85916| val_0_rmse: 10.89706|  0:00:00s
epoch 7  | loss: 8.93091 | val_0_rmse: 11.67353|  0:00:00s
epoch 8  | loss: 7.30354 | val_0_rmse: 13.27864|  0:00:00s
epoch 9  | loss: 6.46024 | val_0_rmse: 12.6383 |  0:00:00s


[I 2025-08-17 19:40:23,578] Trial 51 finished with value: 10.575308159190348 and parameters: {'n_d': 32, 'n_a': 61, 'n_steps': 3, 'gamma': 1.8751437041274928, 'lambda_sparse': 0.000913140443575733}. Best is trial 40 with value: 0.2639664310728177.


epoch 10 | loss: 4.97487 | val_0_rmse: 13.87902|  0:00:01s
epoch 11 | loss: 3.33687 | val_0_rmse: 12.36068|  0:00:01s

Early stopping occurred at epoch 11 with best_epoch = 1 and best_val_0_rmse = 10.57531




epoch 0  | loss: 167.69853| val_0_rmse: 11.97746|  0:00:00s
epoch 1  | loss: 139.62222| val_0_rmse: 9.84537 |  0:00:00s
epoch 2  | loss: 114.05087| val_0_rmse: 8.84785 |  0:00:00s
epoch 3  | loss: 92.36495| val_0_rmse: 7.55805 |  0:00:00s
epoch 4  | loss: 73.30866| val_0_rmse: 6.51332 |  0:00:00s
epoch 5  | loss: 57.65882| val_0_rmse: 5.9516  |  0:00:00s
epoch 6  | loss: 44.67475| val_0_rmse: 5.54653 |  0:00:00s
epoch 7  | loss: 34.34739| val_0_rmse: 5.60317 |  0:00:00s
epoch 8  | loss: 25.7086 | val_0_rmse: 6.22281 |  0:00:01s
epoch 9  | loss: 18.02591| val_0_rmse: 6.81978 |  0:00:01s
epoch 10 | loss: 13.22006| val_0_rmse: 8.24507 |  0:00:01s
epoch 11 | loss: 10.70487| val_0_rmse: 8.79267 |  0:00:01s
epoch 12 | loss: 9.25635 | val_0_rmse: 9.5723  |  0:00:01s
epoch 13 | loss: 11.58828| val_0_rmse: 9.20821 |  0:00:01s
epoch 14 | loss: 9.7879  | val_0_rmse: 8.39551 |  0:00:01s
epoch 15 | loss: 7.89227 | val_0_rmse: 7.70536 |  0:00:01s


[I 2025-08-17 19:40:25,702] Trial 52 finished with value: 5.546530400806853 and parameters: {'n_d': 28, 'n_a': 62, 'n_steps': 4, 'gamma': 1.9950042024350438, 'lambda_sparse': 0.0029565133075159025}. Best is trial 40 with value: 0.2639664310728177.


epoch 16 | loss: 5.71288 | val_0_rmse: 5.95583 |  0:00:01s

Early stopping occurred at epoch 16 with best_epoch = 6 and best_val_0_rmse = 5.54653




epoch 0  | loss: 128.16933| val_0_rmse: 9.348   |  0:00:00s
epoch 1  | loss: 102.53072| val_0_rmse: 8.26588 |  0:00:00s
epoch 2  | loss: 78.70252| val_0_rmse: 7.29063 |  0:00:00s
epoch 3  | loss: 59.24394| val_0_rmse: 7.22486 |  0:00:00s
epoch 4  | loss: 43.05541| val_0_rmse: 6.99443 |  0:00:00s
epoch 5  | loss: 27.86505| val_0_rmse: 7.65151 |  0:00:00s
epoch 6  | loss: 17.9696 | val_0_rmse: 8.07673 |  0:00:00s
epoch 7  | loss: 11.38583| val_0_rmse: 7.76444 |  0:00:00s
epoch 8  | loss: 9.67399 | val_0_rmse: 6.71184 |  0:00:00s
epoch 9  | loss: 8.28121 | val_0_rmse: 6.97586 |  0:00:00s
epoch 10 | loss: 7.21641 | val_0_rmse: 7.21664 |  0:00:01s
epoch 11 | loss: 5.47759 | val_0_rmse: 6.86871 |  0:00:01s
epoch 12 | loss: 3.62386 | val_0_rmse: 6.39882 |  0:00:01s
epoch 13 | loss: 2.94459 | val_0_rmse: 6.00546 |  0:00:01s
epoch 14 | loss: 2.28489 | val_0_rmse: 4.99983 |  0:00:01s
epoch 15 | loss: 1.8809  | val_0_rmse: 4.05103 |  0:00:01s
epoch 16 | loss: 1.38827 | val_0_rmse: 3.69832 |  0:00

[I 2025-08-17 19:40:34,491] Trial 53 finished with value: 0.3470274196178177 and parameters: {'n_d': 34, 'n_a': 59, 'n_steps': 3, 'gamma': 1.8421565275179792, 'lambda_sparse': 0.0006190488606812319}. Best is trial 40 with value: 0.2639664310728177.


epoch 86 | loss: 0.06789 | val_0_rmse: 0.85406 |  0:00:08s

Early stopping occurred at epoch 86 with best_epoch = 76 and best_val_0_rmse = 0.34703
epoch 0  | loss: 125.29149| val_0_rmse: 12.87961|  0:00:00s




epoch 1  | loss: 104.58548| val_0_rmse: 8.53162 |  0:00:00s
epoch 2  | loss: 86.02704| val_0_rmse: 8.35761 |  0:00:00s
epoch 3  | loss: 70.82623| val_0_rmse: 7.52663 |  0:00:00s
epoch 4  | loss: 52.0542 | val_0_rmse: 6.9284  |  0:00:00s
epoch 5  | loss: 38.38104| val_0_rmse: 6.99353 |  0:00:00s
epoch 6  | loss: 26.41932| val_0_rmse: 7.60869 |  0:00:00s
epoch 7  | loss: 17.21557| val_0_rmse: 9.66494 |  0:00:00s
epoch 8  | loss: 12.96338| val_0_rmse: 10.62095|  0:00:00s
epoch 9  | loss: 8.7982  | val_0_rmse: 11.69278|  0:00:00s
epoch 10 | loss: 7.90204 | val_0_rmse: 11.67994|  0:00:01s
epoch 11 | loss: 7.54534 | val_0_rmse: 10.95502|  0:00:01s
epoch 12 | loss: 6.49347 | val_0_rmse: 9.74173 |  0:00:01s
epoch 13 | loss: 4.42149 | val_0_rmse: 8.38542 |  0:00:01s
epoch 14 | loss: 2.90655 | val_0_rmse: 6.64708 |  0:00:01s
epoch 15 | loss: 1.85944 | val_0_rmse: 4.94033 |  0:00:01s
epoch 16 | loss: 1.97052 | val_0_rmse: 4.09206 |  0:00:01s
epoch 17 | loss: 1.8817  | val_0_rmse: 3.52925 |  0:00:

[I 2025-08-17 19:40:41,538] Trial 54 finished with value: 0.37144456080908755 and parameters: {'n_d': 34, 'n_a': 58, 'n_steps': 3, 'gamma': 1.8536264929886461, 'lambda_sparse': 0.0005430999632445875}. Best is trial 40 with value: 0.2639664310728177.


epoch 70 | loss: 0.10938 | val_0_rmse: 0.44232 |  0:00:06s

Early stopping occurred at epoch 70 with best_epoch = 60 and best_val_0_rmse = 0.37144




epoch 0  | loss: 120.14315| val_0_rmse: 8.94186 |  0:00:00s
epoch 1  | loss: 95.59184| val_0_rmse: 8.7954  |  0:00:00s
epoch 2  | loss: 75.23191| val_0_rmse: 8.08474 |  0:00:00s
epoch 3  | loss: 57.60065| val_0_rmse: 7.2301  |  0:00:00s
epoch 4  | loss: 40.74349| val_0_rmse: 6.64062 |  0:00:00s
epoch 5  | loss: 29.121  | val_0_rmse: 5.45369 |  0:00:00s
epoch 6  | loss: 19.42733| val_0_rmse: 5.14631 |  0:00:00s
epoch 7  | loss: 11.5751 | val_0_rmse: 4.49687 |  0:00:00s
epoch 8  | loss: 8.24261 | val_0_rmse: 5.49014 |  0:00:00s
epoch 9  | loss: 8.8856  | val_0_rmse: 4.85048 |  0:00:00s
epoch 10 | loss: 9.13224 | val_0_rmse: 4.28044 |  0:00:01s
epoch 11 | loss: 7.93941 | val_0_rmse: 3.7135  |  0:00:01s
epoch 12 | loss: 5.14538 | val_0_rmse: 3.52134 |  0:00:01s
epoch 13 | loss: 2.8309  | val_0_rmse: 3.43203 |  0:00:01s
epoch 14 | loss: 1.90393 | val_0_rmse: 3.43262 |  0:00:01s
epoch 15 | loss: 2.88527 | val_0_rmse: 3.39726 |  0:00:01s
epoch 16 | loss: 3.25042 | val_0_rmse: 3.22936 |  0:00:

[I 2025-08-17 19:40:51,200] Trial 55 finished with value: 0.3049313314573446 and parameters: {'n_d': 37, 'n_a': 55, 'n_steps': 3, 'gamma': 1.7684080276736582, 'lambda_sparse': 0.0018476762866062593}. Best is trial 40 with value: 0.2639664310728177.


epoch 95 | loss: 0.07387 | val_0_rmse: 0.33594 |  0:00:09s

Early stopping occurred at epoch 95 with best_epoch = 85 and best_val_0_rmse = 0.30493
epoch 0  | loss: 229.56996| val_0_rmse: 17.95926|  0:00:00s




epoch 1  | loss: 186.92693| val_0_rmse: 13.07532|  0:00:00s
epoch 2  | loss: 145.34938| val_0_rmse: 10.97772|  0:00:00s
epoch 3  | loss: 115.88838| val_0_rmse: 9.69742 |  0:00:00s
epoch 4  | loss: 91.49883| val_0_rmse: 8.06511 |  0:00:00s
epoch 5  | loss: 69.77181| val_0_rmse: 7.0018  |  0:00:00s
epoch 6  | loss: 53.12048| val_0_rmse: 6.80672 |  0:00:00s
epoch 7  | loss: 37.81772| val_0_rmse: 8.06604 |  0:00:00s
epoch 8  | loss: 26.27136| val_0_rmse: 10.126  |  0:00:00s
epoch 9  | loss: 17.06455| val_0_rmse: 12.85189|  0:00:00s
epoch 10 | loss: 10.60487| val_0_rmse: 13.98261|  0:00:01s
epoch 11 | loss: 7.79649 | val_0_rmse: 14.20266|  0:00:01s
epoch 12 | loss: 7.26645 | val_0_rmse: 15.04807|  0:00:01s
epoch 13 | loss: 7.1451  | val_0_rmse: 14.56131|  0:00:01s
epoch 14 | loss: 6.78747 | val_0_rmse: 13.6093 |  0:00:01s
epoch 15 | loss: 5.66715 | val_0_rmse: 12.39739|  0:00:01s


[I 2025-08-17 19:40:52,917] Trial 56 finished with value: 6.806715254985464 and parameters: {'n_d': 38, 'n_a': 50, 'n_steps': 3, 'gamma': 1.7755189657525619, 'lambda_sparse': 0.0012781234522203852}. Best is trial 40 with value: 0.2639664310728177.


epoch 16 | loss: 3.82708 | val_0_rmse: 10.56226|  0:00:01s

Early stopping occurred at epoch 16 with best_epoch = 6 and best_val_0_rmse = 6.80672




epoch 0  | loss: 148.98526| val_0_rmse: 10.64683|  0:00:00s
epoch 1  | loss: 109.96524| val_0_rmse: 9.1269  |  0:00:00s
epoch 2  | loss: 77.83904| val_0_rmse: 7.53138 |  0:00:00s
epoch 3  | loss: 53.83655| val_0_rmse: 8.38131 |  0:00:00s
epoch 4  | loss: 36.92612| val_0_rmse: 7.8729  |  0:00:00s
epoch 5  | loss: 28.04017| val_0_rmse: 8.77827 |  0:00:00s
epoch 6  | loss: 19.65769| val_0_rmse: 7.78431 |  0:00:00s
epoch 7  | loss: 16.53663| val_0_rmse: 8.3118  |  0:00:00s
epoch 8  | loss: 14.99905| val_0_rmse: 8.69604 |  0:00:01s
epoch 9  | loss: 13.38033| val_0_rmse: 9.05494 |  0:00:01s
epoch 10 | loss: 15.79475| val_0_rmse: 9.04691 |  0:00:01s
epoch 11 | loss: 14.84047| val_0_rmse: 9.18917 |  0:00:01s


[I 2025-08-17 19:40:54,569] Trial 57 finished with value: 7.53138071558708 and parameters: {'n_d': 41, 'n_a': 55, 'n_steps': 4, 'gamma': 1.6317725457905725, 'lambda_sparse': 0.0019308019882806364}. Best is trial 40 with value: 0.2639664310728177.


epoch 12 | loss: 10.34932| val_0_rmse: 8.63719 |  0:00:01s

Early stopping occurred at epoch 12 with best_epoch = 2 and best_val_0_rmse = 7.53138




epoch 0  | loss: 136.30879| val_0_rmse: 16.35493|  0:00:00s
epoch 1  | loss: 106.71509| val_0_rmse: 11.95357|  0:00:00s
epoch 2  | loss: 80.48041| val_0_rmse: 8.45483 |  0:00:00s
epoch 3  | loss: 56.51001| val_0_rmse: 9.11535 |  0:00:00s
epoch 4  | loss: 41.82206| val_0_rmse: 7.98358 |  0:00:00s
epoch 5  | loss: 26.75774| val_0_rmse: 9.80746 |  0:00:00s
epoch 6  | loss: 18.77842| val_0_rmse: 11.14954|  0:00:00s
epoch 7  | loss: 12.48425| val_0_rmse: 12.68371|  0:00:00s
epoch 8  | loss: 12.07082| val_0_rmse: 11.7074 |  0:00:00s
epoch 9  | loss: 13.30342| val_0_rmse: 9.71984 |  0:00:01s
epoch 10 | loss: 12.47259| val_0_rmse: 7.65247 |  0:00:01s
epoch 11 | loss: 9.18798 | val_0_rmse: 6.09642 |  0:00:01s
epoch 12 | loss: 5.84197 | val_0_rmse: 4.82479 |  0:00:01s
epoch 13 | loss: 2.84398 | val_0_rmse: 4.25282 |  0:00:01s
epoch 14 | loss: 2.45278 | val_0_rmse: 3.60079 |  0:00:01s
epoch 15 | loss: 2.71512 | val_0_rmse: 3.24819 |  0:00:01s
epoch 16 | loss: 2.68712 | val_0_rmse: 3.18577 |  0:00

[I 2025-08-17 19:41:04,717] Trial 58 finished with value: 0.32122921195850035 and parameters: {'n_d': 37, 'n_a': 59, 'n_steps': 3, 'gamma': 1.696010941827681, 'lambda_sparse': 0.0055978382042400215}. Best is trial 40 with value: 0.2639664310728177.


epoch 99 | loss: 0.05502 | val_0_rmse: 0.32123 |  0:00:10s
Stop training because you reached max_epochs = 100 with best_epoch = 99 and best_val_0_rmse = 0.32123




epoch 0  | loss: 130.15388| val_0_rmse: 10.0734 |  0:00:00s
epoch 1  | loss: 97.59814| val_0_rmse: 8.83405 |  0:00:00s
epoch 2  | loss: 71.80061| val_0_rmse: 7.89187 |  0:00:00s
epoch 3  | loss: 53.16329| val_0_rmse: 7.20034 |  0:00:00s
epoch 4  | loss: 37.25058| val_0_rmse: 7.63609 |  0:00:00s
epoch 5  | loss: 24.42702| val_0_rmse: 8.88387 |  0:00:00s
epoch 6  | loss: 16.83134| val_0_rmse: 9.80417 |  0:00:00s
epoch 7  | loss: 11.8948 | val_0_rmse: 11.38537|  0:00:00s
epoch 8  | loss: 10.51788| val_0_rmse: 11.45411|  0:00:00s
epoch 9  | loss: 10.27067| val_0_rmse: 10.07226|  0:00:00s
epoch 10 | loss: 8.92307 | val_0_rmse: 9.41421 |  0:00:01s
epoch 11 | loss: 6.68151 | val_0_rmse: 7.99668 |  0:00:01s
epoch 12 | loss: 4.80466 | val_0_rmse: 6.89217 |  0:00:01s
epoch 13 | loss: 2.48607 | val_0_rmse: 6.12739 |  0:00:01s
epoch 14 | loss: 2.24871 | val_0_rmse: 5.90392 |  0:00:01s
epoch 15 | loss: 1.88104 | val_0_rmse: 5.60083 |  0:00:01s
epoch 16 | loss: 1.66539 | val_0_rmse: 5.09296 |  0:00:

[I 2025-08-17 19:41:13,577] Trial 59 finished with value: 0.2823088902599419 and parameters: {'n_d': 36, 'n_a': 53, 'n_steps': 3, 'gamma': 1.576768767650289, 'lambda_sparse': 0.009863768128441185}. Best is trial 40 with value: 0.2639664310728177.


epoch 0  | loss: 124.45277| val_0_rmse: 10.48944|  0:00:00s
epoch 1  | loss: 90.08551| val_0_rmse: 8.13848 |  0:00:00s
epoch 2  | loss: 70.54375| val_0_rmse: 9.60361 |  0:00:00s
epoch 3  | loss: 52.83863| val_0_rmse: 9.20964 |  0:00:00s
epoch 4  | loss: 38.50239| val_0_rmse: 10.87662|  0:00:00s
epoch 5  | loss: 27.58503| val_0_rmse: 10.83124|  0:00:00s
epoch 6  | loss: 19.6745 | val_0_rmse: 10.19697|  0:00:00s
epoch 7  | loss: 15.32333| val_0_rmse: 10.48806|  0:00:00s
epoch 8  | loss: 13.74669| val_0_rmse: 10.69642|  0:00:01s
epoch 9  | loss: 13.21384| val_0_rmse: 10.38161|  0:00:01s


[I 2025-08-17 19:41:15,121] Trial 60 finished with value: 8.138478709311373 and parameters: {'n_d': 36, 'n_a': 47, 'n_steps': 4, 'gamma': 1.5665185924898037, 'lambda_sparse': 0.020336666972421622}. Best is trial 40 with value: 0.2639664310728177.


epoch 10 | loss: 11.03955| val_0_rmse: 10.68166|  0:00:01s
epoch 11 | loss: 9.60877 | val_0_rmse: 9.49559 |  0:00:01s

Early stopping occurred at epoch 11 with best_epoch = 1 and best_val_0_rmse = 8.13848




epoch 0  | loss: 179.01848| val_0_rmse: 13.83601|  0:00:00s
epoch 1  | loss: 146.08292| val_0_rmse: 11.55999|  0:00:00s
epoch 2  | loss: 116.58231| val_0_rmse: 10.55679|  0:00:00s
epoch 3  | loss: 94.31721| val_0_rmse: 9.39215 |  0:00:00s
epoch 4  | loss: 75.51264| val_0_rmse: 8.49389 |  0:00:00s
epoch 5  | loss: 58.94164| val_0_rmse: 7.69006 |  0:00:00s
epoch 6  | loss: 47.5336 | val_0_rmse: 7.0547  |  0:00:00s
epoch 7  | loss: 33.25889| val_0_rmse: 6.65305 |  0:00:00s
epoch 8  | loss: 23.56525| val_0_rmse: 6.13845 |  0:00:00s
epoch 9  | loss: 15.31861| val_0_rmse: 5.98624 |  0:00:00s
epoch 10 | loss: 10.65508| val_0_rmse: 5.47677 |  0:00:01s
epoch 11 | loss: 8.63194 | val_0_rmse: 4.74468 |  0:00:01s
epoch 12 | loss: 7.40628 | val_0_rmse: 4.81146 |  0:00:01s
epoch 13 | loss: 7.62727 | val_0_rmse: 4.72998 |  0:00:01s
epoch 14 | loss: 7.3344  | val_0_rmse: 4.28401 |  0:00:01s
epoch 15 | loss: 6.20314 | val_0_rmse: 3.65737 |  0:00:01s
epoch 16 | loss: 3.97166 | val_0_rmse: 3.37159 |  0:0

[I 2025-08-17 19:41:24,542] Trial 61 finished with value: 0.46254384447673996 and parameters: {'n_d': 30, 'n_a': 53, 'n_steps': 3, 'gamma': 1.6994445792293054, 'lambda_sparse': 0.011720683154204707}. Best is trial 40 with value: 0.2639664310728177.


epoch 98 | loss: 0.05622 | val_0_rmse: 0.46509 |  0:00:09s
epoch 99 | loss: 0.05593 | val_0_rmse: 0.48173 |  0:00:09s
Stop training because you reached max_epochs = 100 with best_epoch = 95 and best_val_0_rmse = 0.46254




epoch 0  | loss: 195.97607| val_0_rmse: 20.16164|  0:00:00s
epoch 1  | loss: 150.29291| val_0_rmse: 11.16845|  0:00:00s
epoch 2  | loss: 115.67361| val_0_rmse: 9.18242 |  0:00:00s
epoch 3  | loss: 85.08141| val_0_rmse: 8.02396 |  0:00:00s
epoch 4  | loss: 64.26047| val_0_rmse: 7.10721 |  0:00:00s
epoch 5  | loss: 46.37804| val_0_rmse: 6.03646 |  0:00:00s
epoch 6  | loss: 31.30064| val_0_rmse: 5.53315 |  0:00:00s
epoch 7  | loss: 19.97297| val_0_rmse: 5.78789 |  0:00:00s
epoch 8  | loss: 12.15916| val_0_rmse: 5.6706  |  0:00:00s
epoch 9  | loss: 7.50696 | val_0_rmse: 5.55138 |  0:00:01s
epoch 10 | loss: 6.189   | val_0_rmse: 5.49426 |  0:00:01s
epoch 11 | loss: 5.78585 | val_0_rmse: 5.18899 |  0:00:01s
epoch 12 | loss: 5.35266 | val_0_rmse: 4.92488 |  0:00:01s
epoch 13 | loss: 4.10863 | val_0_rmse: 4.60986 |  0:00:01s
epoch 14 | loss: 2.29652 | val_0_rmse: 4.32264 |  0:00:01s
epoch 15 | loss: 1.65896 | val_0_rmse: 3.94729 |  0:00:01s
epoch 16 | loss: 1.72831 | val_0_rmse: 3.76303 |  0:0

[I 2025-08-17 19:41:34,689] Trial 62 finished with value: 0.28251902917459715 and parameters: {'n_d': 39, 'n_a': 58, 'n_steps': 3, 'gamma': 1.6103740426291007, 'lambda_sparse': 0.00561344985179938}. Best is trial 40 with value: 0.2639664310728177.


epoch 99 | loss: 0.07372 | val_0_rmse: 0.29579 |  0:00:10s
Stop training because you reached max_epochs = 100 with best_epoch = 96 and best_val_0_rmse = 0.28252




epoch 0  | loss: 118.02361| val_0_rmse: 10.90852|  0:00:00s
epoch 1  | loss: 85.01109| val_0_rmse: 11.21047|  0:00:00s
epoch 2  | loss: 60.15995| val_0_rmse: 11.77821|  0:00:00s
epoch 3  | loss: 41.08532| val_0_rmse: 12.56909|  0:00:00s
epoch 4  | loss: 27.45485| val_0_rmse: 12.33473|  0:00:00s
epoch 5  | loss: 17.42363| val_0_rmse: 11.95231|  0:00:00s
epoch 6  | loss: 15.40201| val_0_rmse: 13.5608 |  0:00:00s
epoch 7  | loss: 15.61686| val_0_rmse: 11.23746|  0:00:01s
epoch 8  | loss: 14.77969| val_0_rmse: 11.62769|  0:00:01s
epoch 9  | loss: 13.26118| val_0_rmse: 9.43822 |  0:00:01s
epoch 10 | loss: 8.63496 | val_0_rmse: 8.9529  |  0:00:01s
epoch 11 | loss: 6.27399 | val_0_rmse: 8.22459 |  0:00:01s
epoch 12 | loss: 4.42908 | val_0_rmse: 7.09591 |  0:00:01s
epoch 13 | loss: 4.11396 | val_0_rmse: 6.19423 |  0:00:01s
epoch 14 | loss: 3.4747  | val_0_rmse: 6.56488 |  0:00:01s
epoch 15 | loss: 2.40864 | val_0_rmse: 6.37738 |  0:00:02s
epoch 16 | loss: 2.29326 | val_0_rmse: 3.6774  |  0:00:

[I 2025-08-17 19:41:45,583] Trial 63 finished with value: 0.3228914642927039 and parameters: {'n_d': 40, 'n_a': 58, 'n_steps': 4, 'gamma': 1.5070243784869024, 'lambda_sparse': 0.00574895723295483}. Best is trial 40 with value: 0.2639664310728177.


epoch 86 | loss: 0.1217  | val_0_rmse: 0.38724 |  0:00:10s

Early stopping occurred at epoch 86 with best_epoch = 76 and best_val_0_rmse = 0.32289




epoch 0  | loss: 222.2252| val_0_rmse: 20.18111|  0:00:00s
epoch 1  | loss: 172.18071| val_0_rmse: 12.51942|  0:00:00s
epoch 2  | loss: 129.94003| val_0_rmse: 10.1003 |  0:00:00s
epoch 3  | loss: 96.57996| val_0_rmse: 8.86835 |  0:00:00s
epoch 4  | loss: 70.4317 | val_0_rmse: 8.62095 |  0:00:00s
epoch 5  | loss: 54.40716| val_0_rmse: 9.42267 |  0:00:00s
epoch 6  | loss: 38.40594| val_0_rmse: 8.65444 |  0:00:00s
epoch 7  | loss: 28.21419| val_0_rmse: 12.41888|  0:00:01s
epoch 8  | loss: 18.5923 | val_0_rmse: 14.88507|  0:00:01s
epoch 9  | loss: 13.82522| val_0_rmse: 15.47854|  0:00:01s
epoch 10 | loss: 10.82842| val_0_rmse: 19.16239|  0:00:01s
epoch 11 | loss: 10.38046| val_0_rmse: 18.05452|  0:00:01s
epoch 12 | loss: 8.19926 | val_0_rmse: 14.33779|  0:00:01s
epoch 13 | loss: 5.86245 | val_0_rmse: 12.98972|  0:00:01s


[I 2025-08-17 19:41:47,540] Trial 64 finished with value: 8.620954460716154 and parameters: {'n_d': 40, 'n_a': 55, 'n_steps': 4, 'gamma': 1.4947433545051947, 'lambda_sparse': 0.00612095808894819}. Best is trial 40 with value: 0.2639664310728177.


epoch 14 | loss: 4.16165 | val_0_rmse: 10.48041|  0:00:01s

Early stopping occurred at epoch 14 with best_epoch = 4 and best_val_0_rmse = 8.62095




epoch 0  | loss: 151.21036| val_0_rmse: 17.59214|  0:00:00s
epoch 1  | loss: 115.06052| val_0_rmse: 13.83052|  0:00:00s
epoch 2  | loss: 86.21273| val_0_rmse: 13.19269|  0:00:00s
epoch 3  | loss: 63.09324| val_0_rmse: 11.38982|  0:00:00s
epoch 4  | loss: 42.98838| val_0_rmse: 10.38874|  0:00:00s
epoch 5  | loss: 27.37643| val_0_rmse: 13.63107|  0:00:00s
epoch 6  | loss: 17.8322 | val_0_rmse: 15.44598|  0:00:00s
epoch 7  | loss: 12.11851| val_0_rmse: 15.52265|  0:00:00s
epoch 8  | loss: 9.7649  | val_0_rmse: 15.05186|  0:00:00s
epoch 9  | loss: 11.77321| val_0_rmse: 13.746  |  0:00:00s
epoch 10 | loss: 11.21767| val_0_rmse: 12.88969|  0:00:01s
epoch 11 | loss: 9.06934 | val_0_rmse: 12.03408|  0:00:01s
epoch 12 | loss: 5.41739 | val_0_rmse: 9.71087 |  0:00:01s
epoch 13 | loss: 3.29106 | val_0_rmse: 7.50629 |  0:00:01s
epoch 14 | loss: 2.32785 | val_0_rmse: 7.11746 |  0:00:01s
epoch 15 | loss: 2.24326 | val_0_rmse: 6.70406 |  0:00:01s
epoch 16 | loss: 2.24037 | val_0_rmse: 5.20559 |  0:00

[I 2025-08-17 19:41:54,792] Trial 65 finished with value: 0.3475305435760437 and parameters: {'n_d': 42, 'n_a': 48, 'n_steps': 3, 'gamma': 1.5272060057336525, 'lambda_sparse': 0.00868204792651574}. Best is trial 40 with value: 0.2639664310728177.


epoch 72 | loss: 0.12528 | val_0_rmse: 0.40162 |  0:00:07s
epoch 73 | loss: 0.09759 | val_0_rmse: 0.38728 |  0:00:07s

Early stopping occurred at epoch 73 with best_epoch = 63 and best_val_0_rmse = 0.34753




epoch 0  | loss: 200.08395| val_0_rmse: 13.80158|  0:00:00s
epoch 1  | loss: 149.64162| val_0_rmse: 10.46481|  0:00:00s
epoch 2  | loss: 109.2524| val_0_rmse: 8.8015  |  0:00:00s
epoch 3  | loss: 77.58421| val_0_rmse: 7.76454 |  0:00:00s
epoch 4  | loss: 53.53698| val_0_rmse: 6.36994 |  0:00:00s
epoch 5  | loss: 35.85286| val_0_rmse: 6.21998 |  0:00:00s
epoch 6  | loss: 23.48822| val_0_rmse: 7.10337 |  0:00:00s
epoch 7  | loss: 17.03886| val_0_rmse: 11.47557|  0:00:01s
epoch 8  | loss: 15.21955| val_0_rmse: 10.41248|  0:00:01s
epoch 9  | loss: 14.84862| val_0_rmse: 10.48551|  0:00:01s
epoch 10 | loss: 12.11198| val_0_rmse: 10.921  |  0:00:01s
epoch 11 | loss: 10.09932| val_0_rmse: 8.57827 |  0:00:01s
epoch 12 | loss: 5.64702 | val_0_rmse: 7.41162 |  0:00:01s
epoch 13 | loss: 4.014   | val_0_rmse: 6.04277 |  0:00:01s
epoch 14 | loss: 4.20989 | val_0_rmse: 4.91631 |  0:00:01s
epoch 15 | loss: 3.74297 | val_0_rmse: 5.13109 |  0:00:02s
epoch 16 | loss: 3.77194 | val_0_rmse: 4.958   |  0:00

[I 2025-08-17 19:42:07,088] Trial 66 finished with value: 0.290705071602855 and parameters: {'n_d': 46, 'n_a': 53, 'n_steps': 4, 'gamma': 1.5938436347206344, 'lambda_sparse': 0.015220748932344081}. Best is trial 40 with value: 0.2639664310728177.


epoch 94 | loss: 0.09922 | val_0_rmse: 0.56989 |  0:00:12s
epoch 95 | loss: 0.37539 | val_0_rmse: 0.73955 |  0:00:12s

Early stopping occurred at epoch 95 with best_epoch = 85 and best_val_0_rmse = 0.29071




epoch 0  | loss: 168.4369| val_0_rmse: 14.35333|  0:00:00s
epoch 1  | loss: 135.33618| val_0_rmse: 10.56858|  0:00:00s
epoch 2  | loss: 109.09116| val_0_rmse: 9.44013 |  0:00:00s
epoch 3  | loss: 87.21996| val_0_rmse: 8.03857 |  0:00:00s
epoch 4  | loss: 68.73252| val_0_rmse: 7.3394  |  0:00:00s
epoch 5  | loss: 52.30248| val_0_rmse: 7.19538 |  0:00:00s
epoch 6  | loss: 37.05962| val_0_rmse: 6.51012 |  0:00:00s
epoch 7  | loss: 23.63882| val_0_rmse: 6.61227 |  0:00:00s
epoch 8  | loss: 14.04093| val_0_rmse: 9.39037 |  0:00:00s
epoch 9  | loss: 8.98544 | val_0_rmse: 7.67579 |  0:00:01s
epoch 10 | loss: 6.1492  | val_0_rmse: 7.03158 |  0:00:01s
epoch 11 | loss: 6.22116 | val_0_rmse: 5.99389 |  0:00:01s
epoch 12 | loss: 6.20721 | val_0_rmse: 5.5065  |  0:00:01s
epoch 13 | loss: 5.21902 | val_0_rmse: 4.57259 |  0:00:01s
epoch 14 | loss: 3.27564 | val_0_rmse: 4.03365 |  0:00:01s
epoch 15 | loss: 1.61913 | val_0_rmse: 3.68044 |  0:00:01s
epoch 16 | loss: 1.60479 | val_0_rmse: 3.46844 |  0:00

[I 2025-08-17 19:42:17,055] Trial 67 finished with value: 0.32019669506459664 and parameters: {'n_d': 37, 'n_a': 52, 'n_steps': 3, 'gamma': 1.5951862413174513, 'lambda_sparse': 0.014430684288972876}. Best is trial 40 with value: 0.2639664310728177.


Stop training because you reached max_epochs = 100 with best_epoch = 98 and best_val_0_rmse = 0.3202
epoch 0  | loss: 71.98529| val_0_rmse: 31.79049|  0:00:00s




epoch 1  | loss: 46.38922| val_0_rmse: 17.65197|  0:00:00s
epoch 2  | loss: 29.72931| val_0_rmse: 13.00256|  0:00:00s
epoch 3  | loss: 18.8658 | val_0_rmse: 13.18764|  0:00:00s
epoch 4  | loss: 12.53987| val_0_rmse: 12.39356|  0:00:00s
epoch 5  | loss: 12.92599| val_0_rmse: 10.54133|  0:00:00s
epoch 6  | loss: 9.32833 | val_0_rmse: 10.26326|  0:00:00s
epoch 7  | loss: 5.40718 | val_0_rmse: 8.75404 |  0:00:00s
epoch 8  | loss: 4.02915 | val_0_rmse: 5.6007  |  0:00:00s
epoch 9  | loss: 3.5358  | val_0_rmse: 4.33803 |  0:00:01s
epoch 10 | loss: 2.30175 | val_0_rmse: 4.22498 |  0:00:01s
epoch 11 | loss: 1.61889 | val_0_rmse: 4.99968 |  0:00:01s
epoch 12 | loss: 1.48557 | val_0_rmse: 4.36586 |  0:00:01s
epoch 13 | loss: 1.1208  | val_0_rmse: 3.38373 |  0:00:01s
epoch 14 | loss: 0.99649 | val_0_rmse: 3.03259 |  0:00:01s
epoch 15 | loss: 0.97883 | val_0_rmse: 2.28467 |  0:00:01s
epoch 16 | loss: 0.85835 | val_0_rmse: 1.98008 |  0:00:01s
epoch 17 | loss: 0.76857 | val_0_rmse: 1.99874 |  0:00:0

[I 2025-08-17 19:42:25,710] Trial 68 finished with value: 0.3974391935263237 and parameters: {'n_d': 49, 'n_a': 52, 'n_steps': 3, 'gamma': 1.594468509512586, 'lambda_sparse': 0.023233926066975906}. Best is trial 40 with value: 0.2639664310728177.


epoch 78 | loss: 0.11045 | val_0_rmse: 0.45117 |  0:00:08s
epoch 79 | loss: 0.13031 | val_0_rmse: 0.48984 |  0:00:08s

Early stopping occurred at epoch 79 with best_epoch = 69 and best_val_0_rmse = 0.39744




epoch 0  | loss: 78.27457| val_0_rmse: 20.82879|  0:00:00s
epoch 1  | loss: 54.79969| val_0_rmse: 10.70844|  0:00:00s
epoch 2  | loss: 33.72924| val_0_rmse: 8.91854 |  0:00:00s
epoch 3  | loss: 24.72463| val_0_rmse: 8.40643 |  0:00:00s
epoch 4  | loss: 21.04033| val_0_rmse: 7.90901 |  0:00:00s
epoch 5  | loss: 15.9837 | val_0_rmse: 9.13926 |  0:00:00s
epoch 6  | loss: 16.46938| val_0_rmse: 9.28624 |  0:00:00s
epoch 7  | loss: 14.77798| val_0_rmse: 9.9132  |  0:00:01s
epoch 8  | loss: 9.48545 | val_0_rmse: 11.18453|  0:00:01s
epoch 9  | loss: 7.96212 | val_0_rmse: 9.851   |  0:00:01s
epoch 10 | loss: 6.5131  | val_0_rmse: 9.33767 |  0:00:01s
epoch 11 | loss: 4.5677  | val_0_rmse: 8.53507 |  0:00:01s
epoch 12 | loss: 3.74761 | val_0_rmse: 7.44847 |  0:00:01s
epoch 13 | loss: 2.52363 | val_0_rmse: 5.71656 |  0:00:01s
epoch 14 | loss: 2.54652 | val_0_rmse: 4.42715 |  0:00:01s
epoch 15 | loss: 1.66392 | val_0_rmse: 3.32543 |  0:00:02s
epoch 16 | loss: 1.44638 | val_0_rmse: 2.87042 |  0:00:0

[I 2025-08-17 19:42:34,593] Trial 69 finished with value: 0.6393179567573479 and parameters: {'n_d': 46, 'n_a': 43, 'n_steps': 4, 'gamma': 1.595681431082614, 'lambda_sparse': 0.016230584088600557}. Best is trial 40 with value: 0.2639664310728177.


epoch 68 | loss: 0.20746 | val_0_rmse: 0.7775  |  0:00:08s
epoch 69 | loss: 0.17197 | val_0_rmse: 0.67764 |  0:00:08s

Early stopping occurred at epoch 69 with best_epoch = 59 and best_val_0_rmse = 0.63932




epoch 0  | loss: 145.6544| val_0_rmse: 11.4517 |  0:00:00s
epoch 1  | loss: 96.88374| val_0_rmse: 8.86103 |  0:00:00s
epoch 2  | loss: 67.55666| val_0_rmse: 10.22317|  0:00:00s
epoch 3  | loss: 39.97048| val_0_rmse: 9.1463  |  0:00:00s
epoch 4  | loss: 26.99648| val_0_rmse: 12.64936|  0:00:00s
epoch 5  | loss: 19.49418| val_0_rmse: 8.92003 |  0:00:00s
epoch 6  | loss: 18.05901| val_0_rmse: 7.73346 |  0:00:00s
epoch 7  | loss: 17.16778| val_0_rmse: 8.08628 |  0:00:01s
epoch 8  | loss: 16.06368| val_0_rmse: 7.56133 |  0:00:01s
epoch 9  | loss: 13.54153| val_0_rmse: 6.26027 |  0:00:01s
epoch 10 | loss: 10.19348| val_0_rmse: 5.86571 |  0:00:01s
epoch 11 | loss: 6.30759 | val_0_rmse: 4.75495 |  0:00:01s
epoch 12 | loss: 4.84948 | val_0_rmse: 3.52831 |  0:00:01s
epoch 13 | loss: 5.11452 | val_0_rmse: 3.65005 |  0:00:01s
epoch 14 | loss: 4.35455 | val_0_rmse: 3.69167 |  0:00:01s
epoch 15 | loss: 3.46833 | val_0_rmse: 4.78992 |  0:00:01s
epoch 16 | loss: 2.43948 | val_0_rmse: 4.94222 |  0:00:0

[I 2025-08-17 19:42:41,580] Trial 70 finished with value: 0.870819039443463 and parameters: {'n_d': 43, 'n_a': 50, 'n_steps': 4, 'gamma': 1.7262545009725911, 'lambda_sparse': 0.02743154515226315}. Best is trial 40 with value: 0.2639664310728177.


epoch 54 | loss: 0.41849 | val_0_rmse: 1.0624  |  0:00:06s
epoch 55 | loss: 0.35198 | val_0_rmse: 1.10993 |  0:00:06s

Early stopping occurred at epoch 55 with best_epoch = 45 and best_val_0_rmse = 0.87082




epoch 0  | loss: 172.67397| val_0_rmse: 19.33276|  0:00:00s
epoch 1  | loss: 134.81505| val_0_rmse: 17.61069|  0:00:00s
epoch 2  | loss: 102.76156| val_0_rmse: 15.07551|  0:00:00s
epoch 3  | loss: 78.24013| val_0_rmse: 11.98827|  0:00:00s
epoch 4  | loss: 58.28088| val_0_rmse: 11.4837 |  0:00:00s
epoch 5  | loss: 43.38652| val_0_rmse: 9.80189 |  0:00:00s
epoch 6  | loss: 28.73806| val_0_rmse: 6.66343 |  0:00:00s
epoch 7  | loss: 20.21866| val_0_rmse: 6.53722 |  0:00:00s
epoch 8  | loss: 13.20493| val_0_rmse: 6.87007 |  0:00:00s
epoch 9  | loss: 9.65411 | val_0_rmse: 6.61943 |  0:00:01s
epoch 10 | loss: 7.89323 | val_0_rmse: 6.61335 |  0:00:01s
epoch 11 | loss: 6.72156 | val_0_rmse: 6.79371 |  0:00:01s
epoch 12 | loss: 5.80823 | val_0_rmse: 5.4381  |  0:00:01s
epoch 13 | loss: 3.21784 | val_0_rmse: 4.70562 |  0:00:01s
epoch 14 | loss: 2.01705 | val_0_rmse: 4.25532 |  0:00:01s
epoch 15 | loss: 1.59367 | val_0_rmse: 3.75083 |  0:00:01s
epoch 16 | loss: 1.49549 | val_0_rmse: 4.06587 |  0:0

[I 2025-08-17 19:42:45,548] Trial 71 finished with value: 1.9686925135247832 and parameters: {'n_d': 37, 'n_a': 54, 'n_steps': 3, 'gamma': 1.6821257352628411, 'lambda_sparse': 0.013885994909664846}. Best is trial 40 with value: 0.2639664310728177.


epoch 0  | loss: 148.98293| val_0_rmse: 13.85384|  0:00:00s
epoch 1  | loss: 120.82323| val_0_rmse: 10.1894 |  0:00:00s
epoch 2  | loss: 100.42685| val_0_rmse: 8.2665  |  0:00:00s
epoch 3  | loss: 81.83464| val_0_rmse: 8.81189 |  0:00:00s
epoch 4  | loss: 63.58017| val_0_rmse: 8.16757 |  0:00:00s
epoch 5  | loss: 47.40696| val_0_rmse: 8.76653 |  0:00:00s
epoch 6  | loss: 33.94653| val_0_rmse: 10.33375|  0:00:00s
epoch 7  | loss: 22.97878| val_0_rmse: 11.90519|  0:00:00s
epoch 8  | loss: 14.48011| val_0_rmse: 12.59429|  0:00:00s
epoch 9  | loss: 8.18229 | val_0_rmse: 13.92151|  0:00:00s
epoch 10 | loss: 4.98971 | val_0_rmse: 14.53935|  0:00:01s
epoch 11 | loss: 4.15448 | val_0_rmse: 13.59601|  0:00:01s
epoch 12 | loss: 5.49006 | val_0_rmse: 12.21032|  0:00:01s
epoch 13 | loss: 5.18684 | val_0_rmse: 9.91555 |  0:00:01s
epoch 14 | loss: 4.46783 | val_0_rmse: 8.17831 |  0:00:01s

Early stopping occurred at epoch 14 with best_epoch = 4 and best_val_0_rmse = 8.16757


[I 2025-08-17 19:42:47,025] Trial 72 finished with value: 8.167574061908649 and parameters: {'n_d': 26, 'n_a': 52, 'n_steps': 3, 'gamma': 1.559308638413579, 'lambda_sparse': 0.008306444643507565}. Best is trial 40 with value: 0.2639664310728177.


epoch 0  | loss: 152.6506| val_0_rmse: 12.07003|  0:00:00s
epoch 1  | loss: 121.08865| val_0_rmse: 13.10865|  0:00:00s
epoch 2  | loss: 94.08969| val_0_rmse: 12.44265|  0:00:00s
epoch 3  | loss: 73.14499| val_0_rmse: 10.79428|  0:00:00s
epoch 4  | loss: 52.56534| val_0_rmse: 10.47063|  0:00:00s
epoch 5  | loss: 38.67261| val_0_rmse: 9.31103 |  0:00:00s
epoch 6  | loss: 26.5679 | val_0_rmse: 8.86922 |  0:00:00s
epoch 7  | loss: 17.30418| val_0_rmse: 10.51493|  0:00:00s
epoch 8  | loss: 10.55321| val_0_rmse: 9.94253 |  0:00:00s
epoch 9  | loss: 8.81484 | val_0_rmse: 9.05988 |  0:00:01s
epoch 10 | loss: 8.39472 | val_0_rmse: 9.60175 |  0:00:01s
epoch 11 | loss: 8.92801 | val_0_rmse: 10.51584|  0:00:01s
epoch 12 | loss: 6.85349 | val_0_rmse: 9.63489 |  0:00:01s
epoch 13 | loss: 4.73686 | val_0_rmse: 7.93008 |  0:00:01s
epoch 14 | loss: 2.50223 | val_0_rmse: 6.27311 |  0:00:01s
epoch 15 | loss: 1.65782 | val_0_rmse: 5.53975 |  0:00:01s
epoch 16 | loss: 1.899   | val_0_rmse: 3.31786 |  0:00:

[I 2025-08-17 19:42:50,536] Trial 73 finished with value: 2.3945047560007278 and parameters: {'n_d': 36, 'n_a': 56, 'n_steps': 3, 'gamma': 1.6179086973644086, 'lambda_sparse': 0.004664326585274342}. Best is trial 40 with value: 0.2639664310728177.


epoch 34 | loss: 0.23147 | val_0_rmse: 2.95978 |  0:00:03s

Early stopping occurred at epoch 34 with best_epoch = 24 and best_val_0_rmse = 2.3945




epoch 0  | loss: 128.90218| val_0_rmse: 9.23235 |  0:00:00s
epoch 1  | loss: 95.24429| val_0_rmse: 12.22297|  0:00:00s
epoch 2  | loss: 69.33789| val_0_rmse: 11.64008|  0:00:00s
epoch 3  | loss: 51.15078| val_0_rmse: 10.94111|  0:00:00s
epoch 4  | loss: 34.97104| val_0_rmse: 12.71097|  0:00:00s
epoch 5  | loss: 25.64413| val_0_rmse: 12.47245|  0:00:00s
epoch 6  | loss: 19.87118| val_0_rmse: 14.69447|  0:00:00s
epoch 7  | loss: 14.32324| val_0_rmse: 14.88847|  0:00:00s
epoch 8  | loss: 12.41734| val_0_rmse: 14.79973|  0:00:00s


[I 2025-08-17 19:42:51,676] Trial 74 finished with value: 9.232346423587185 and parameters: {'n_d': 39, 'n_a': 34, 'n_steps': 3, 'gamma': 1.654575945478484, 'lambda_sparse': 0.010927433161702593}. Best is trial 40 with value: 0.2639664310728177.


epoch 9  | loss: 11.47787| val_0_rmse: 13.94046|  0:00:00s
epoch 10 | loss: 9.56361 | val_0_rmse: 13.45292|  0:00:01s

Early stopping occurred at epoch 10 with best_epoch = 0 and best_val_0_rmse = 9.23235




epoch 0  | loss: 96.23322| val_0_rmse: 11.02466|  0:00:00s
epoch 1  | loss: 73.29095| val_0_rmse: 9.29039 |  0:00:00s
epoch 2  | loss: 58.24886| val_0_rmse: 8.0293  |  0:00:00s
epoch 3  | loss: 40.21998| val_0_rmse: 8.90723 |  0:00:00s
epoch 4  | loss: 28.61697| val_0_rmse: 11.4798 |  0:00:00s
epoch 5  | loss: 17.91187| val_0_rmse: 11.95066|  0:00:00s
epoch 6  | loss: 13.83337| val_0_rmse: 12.99844|  0:00:00s
epoch 7  | loss: 10.21149| val_0_rmse: 12.21621|  0:00:00s
epoch 8  | loss: 8.91742 | val_0_rmse: 11.33242|  0:00:00s
epoch 9  | loss: 6.87997 | val_0_rmse: 11.58823|  0:00:00s
epoch 10 | loss: 4.629   | val_0_rmse: 10.27399|  0:00:01s
epoch 11 | loss: 2.93377 | val_0_rmse: 8.03612 |  0:00:01s
epoch 12 | loss: 2.27792 | val_0_rmse: 7.25994 |  0:00:01s
epoch 13 | loss: 1.87704 | val_0_rmse: 6.78794 |  0:00:01s
epoch 14 | loss: 1.41829 | val_0_rmse: 6.35829 |  0:00:01s
epoch 15 | loss: 1.27358 | val_0_rmse: 5.97758 |  0:00:01s
epoch 16 | loss: 1.60549 | val_0_rmse: 5.46803 |  0:00:0

[I 2025-08-17 19:43:01,605] Trial 75 finished with value: 0.3566655179249918 and parameters: {'n_d': 33, 'n_a': 59, 'n_steps': 3, 'gamma': 1.7672492662812, 'lambda_sparse': 0.037355913073933676}. Best is trial 40 with value: 0.2639664310728177.


epoch 98 | loss: 0.0873  | val_0_rmse: 0.36989 |  0:00:09s
epoch 99 | loss: 0.08035 | val_0_rmse: 0.36815 |  0:00:09s
Stop training because you reached max_epochs = 100 with best_epoch = 97 and best_val_0_rmse = 0.35667




epoch 0  | loss: 169.55344| val_0_rmse: 12.01079|  0:00:00s
epoch 1  | loss: 132.26662| val_0_rmse: 9.75455 |  0:00:00s
epoch 2  | loss: 102.59495| val_0_rmse: 8.18507 |  0:00:00s
epoch 3  | loss: 76.23416| val_0_rmse: 7.10169 |  0:00:00s
epoch 4  | loss: 55.59871| val_0_rmse: 8.23208 |  0:00:00s
epoch 5  | loss: 39.71348| val_0_rmse: 9.92668 |  0:00:00s
epoch 6  | loss: 27.48252| val_0_rmse: 10.99708|  0:00:00s
epoch 7  | loss: 18.86916| val_0_rmse: 12.2424 |  0:00:00s
epoch 8  | loss: 13.68671| val_0_rmse: 12.27226|  0:00:01s
epoch 9  | loss: 10.3763 | val_0_rmse: 10.42681|  0:00:01s
epoch 10 | loss: 9.89983 | val_0_rmse: 9.1922  |  0:00:01s
epoch 11 | loss: 10.5506 | val_0_rmse: 9.1698  |  0:00:01s


[I 2025-08-17 19:43:03,416] Trial 76 finished with value: 7.101690617304156 and parameters: {'n_d': 36, 'n_a': 55, 'n_steps': 4, 'gamma': 1.7153100330067392, 'lambda_sparse': 0.0035202181033422283}. Best is trial 40 with value: 0.2639664310728177.


epoch 12 | loss: 7.74142 | val_0_rmse: 8.94519 |  0:00:01s
epoch 13 | loss: 5.31908 | val_0_rmse: 8.13957 |  0:00:01s

Early stopping occurred at epoch 13 with best_epoch = 3 and best_val_0_rmse = 7.10169




epoch 0  | loss: 71.92462| val_0_rmse: 24.64374|  0:00:00s
epoch 1  | loss: 43.74915| val_0_rmse: 21.52331|  0:00:00s
epoch 2  | loss: 29.04123| val_0_rmse: 16.13197|  0:00:00s
epoch 3  | loss: 24.5774 | val_0_rmse: 12.86019|  0:00:00s
epoch 4  | loss: 19.63797| val_0_rmse: 9.81868 |  0:00:01s
epoch 5  | loss: 12.95008| val_0_rmse: 6.73184 |  0:00:01s
epoch 6  | loss: 10.63876| val_0_rmse: 6.1592  |  0:00:01s
epoch 7  | loss: 9.11215 | val_0_rmse: 8.7009  |  0:00:01s
epoch 8  | loss: 7.87598 | val_0_rmse: 8.60164 |  0:00:02s
epoch 9  | loss: 5.95179 | val_0_rmse: 7.85307 |  0:00:02s
epoch 10 | loss: 5.41843 | val_0_rmse: 10.01453|  0:00:02s
epoch 11 | loss: 4.07841 | val_0_rmse: 6.75635 |  0:00:02s
epoch 12 | loss: 3.45842 | val_0_rmse: 7.36161 |  0:00:03s
epoch 13 | loss: 3.28076 | val_0_rmse: 7.29639 |  0:00:03s
epoch 14 | loss: 2.95915 | val_0_rmse: 7.21839 |  0:00:03s
epoch 15 | loss: 2.26047 | val_0_rmse: 5.86961 |  0:00:03s
epoch 16 | loss: 3.20702 | val_0_rmse: 6.72172 |  0:00:0

[I 2025-08-17 19:43:15,190] Trial 77 finished with value: 1.8008885078252348 and parameters: {'n_d': 53, 'n_a': 49, 'n_steps': 8, 'gamma': 1.6810735395216554, 'lambda_sparse': 0.007538754011238554}. Best is trial 40 with value: 0.2639664310728177.


epoch 48 | loss: 1.15422 | val_0_rmse: 2.1789  |  0:00:11s

Early stopping occurred at epoch 48 with best_epoch = 38 and best_val_0_rmse = 1.80089




epoch 0  | loss: 135.80185| val_0_rmse: 9.43956 |  0:00:00s
epoch 1  | loss: 103.31916| val_0_rmse: 9.72502 |  0:00:00s
epoch 2  | loss: 75.77159| val_0_rmse: 9.66772 |  0:00:00s
epoch 3  | loss: 54.96507| val_0_rmse: 11.86475|  0:00:00s
epoch 4  | loss: 37.78662| val_0_rmse: 14.6204 |  0:00:00s
epoch 5  | loss: 21.14443| val_0_rmse: 13.33691|  0:00:00s
epoch 6  | loss: 14.15607| val_0_rmse: 11.96443|  0:00:00s
epoch 7  | loss: 9.49412 | val_0_rmse: 12.21906|  0:00:00s
epoch 8  | loss: 8.63896 | val_0_rmse: 9.93467 |  0:00:00s
epoch 9  | loss: 7.30509 | val_0_rmse: 7.57455 |  0:00:00s
epoch 10 | loss: 4.70022 | val_0_rmse: 6.27572 |  0:00:01s
epoch 11 | loss: 2.85682 | val_0_rmse: 5.96268 |  0:00:01s
epoch 12 | loss: 2.22367 | val_0_rmse: 5.48962 |  0:00:01s
epoch 13 | loss: 1.7258  | val_0_rmse: 5.37472 |  0:00:01s
epoch 14 | loss: 1.17231 | val_0_rmse: 6.01458 |  0:00:01s
epoch 15 | loss: 1.09823 | val_0_rmse: 6.44374 |  0:00:01s
epoch 16 | loss: 0.84333 | val_0_rmse: 5.91417 |  0:00

[I 2025-08-17 19:43:23,395] Trial 78 finished with value: 0.40782272859164687 and parameters: {'n_d': 47, 'n_a': 53, 'n_steps': 3, 'gamma': 1.5588199266231402, 'lambda_sparse': 0.060785208496218254}. Best is trial 40 with value: 0.2639664310728177.


epoch 0  | loss: 136.78455| val_0_rmse: 11.76207|  0:00:00s
epoch 1  | loss: 128.34958| val_0_rmse: 11.33971|  0:00:00s
epoch 2  | loss: 120.95755| val_0_rmse: 11.01041|  0:00:00s
epoch 3  | loss: 112.4883| val_0_rmse: 10.73707|  0:00:00s
epoch 4  | loss: 104.78641| val_0_rmse: 10.43029|  0:00:00s
epoch 5  | loss: 98.19225| val_0_rmse: 10.24739|  0:00:00s
epoch 6  | loss: 90.80478| val_0_rmse: 9.90428 |  0:00:00s
epoch 7  | loss: 83.86289| val_0_rmse: 9.50186 |  0:00:00s
epoch 8  | loss: 75.8456 | val_0_rmse: 8.99514 |  0:00:00s
epoch 9  | loss: 68.39626| val_0_rmse: 8.33801 |  0:00:00s
epoch 10 | loss: 61.27941| val_0_rmse: 7.89626 |  0:00:00s
epoch 11 | loss: 53.97691| val_0_rmse: 7.40633 |  0:00:00s
epoch 12 | loss: 47.19888| val_0_rmse: 6.84602 |  0:00:01s
epoch 13 | loss: 40.29264| val_0_rmse: 6.5323  |  0:00:01s
epoch 14 | loss: 34.42407| val_0_rmse: 6.23242 |  0:00:01s
epoch 15 | loss: 30.62111| val_0_rmse: 6.14165 |  0:00:01s
epoch 16 | loss: 23.82321| val_0_rmse: 6.16903 |  0:

[I 2025-08-17 19:43:25,860] Trial 79 finished with value: 4.258610814444632 and parameters: {'n_d': 8, 'n_a': 13, 'n_steps': 3, 'gamma': 1.6277271968069964, 'lambda_sparse': 0.014209858041490978}. Best is trial 40 with value: 0.2639664310728177.


epoch 27 | loss: 4.70707 | val_0_rmse: 8.06811 |  0:00:02s
epoch 28 | loss: 4.47286 | val_0_rmse: 7.33046 |  0:00:02s
epoch 29 | loss: 3.67651 | val_0_rmse: 6.4162  |  0:00:02s

Early stopping occurred at epoch 29 with best_epoch = 19 and best_val_0_rmse = 4.25861




epoch 0  | loss: 160.86447| val_0_rmse: 13.16686|  0:00:00s
epoch 1  | loss: 121.47786| val_0_rmse: 9.82201 |  0:00:00s
epoch 2  | loss: 91.95168| val_0_rmse: 8.2261  |  0:00:00s
epoch 3  | loss: 69.70661| val_0_rmse: 6.77375 |  0:00:00s
epoch 4  | loss: 52.32661| val_0_rmse: 5.80776 |  0:00:00s
epoch 5  | loss: 37.77043| val_0_rmse: 6.05277 |  0:00:00s
epoch 6  | loss: 29.29948| val_0_rmse: 6.23825 |  0:00:00s
epoch 7  | loss: 21.11744| val_0_rmse: 7.69611 |  0:00:01s
epoch 8  | loss: 13.38927| val_0_rmse: 7.18252 |  0:00:01s
epoch 9  | loss: 10.99801| val_0_rmse: 8.33816 |  0:00:01s
epoch 10 | loss: 9.87585 | val_0_rmse: 9.33284 |  0:00:01s
epoch 11 | loss: 11.08044| val_0_rmse: 8.66917 |  0:00:01s
epoch 12 | loss: 8.45169 | val_0_rmse: 7.415   |  0:00:01s
epoch 13 | loss: 6.09787 | val_0_rmse: 6.255   |  0:00:01s
epoch 14 | loss: 3.66722 | val_0_rmse: 5.40976 |  0:00:02s
epoch 15 | loss: 2.93162 | val_0_rmse: 4.40262 |  0:00:02s
epoch 16 | loss: 3.29864 | val_0_rmse: 3.84245 |  0:00

[I 2025-08-17 19:43:30,512] Trial 80 finished with value: 3.287899469575121 and parameters: {'n_d': 41, 'n_a': 62, 'n_steps': 4, 'gamma': 1.5827293093321526, 'lambda_sparse': 0.002768942155820014}. Best is trial 40 with value: 0.2639664310728177.


epoch 0  | loss: 118.16544| val_0_rmse: 11.28245|  0:00:00s
epoch 1  | loss: 84.96107| val_0_rmse: 11.20511|  0:00:00s
epoch 2  | loss: 56.25023| val_0_rmse: 10.54828|  0:00:00s
epoch 3  | loss: 39.88008| val_0_rmse: 13.26538|  0:00:00s
epoch 4  | loss: 27.84482| val_0_rmse: 16.21243|  0:00:00s
epoch 5  | loss: 18.84672| val_0_rmse: 17.0824 |  0:00:00s
epoch 6  | loss: 16.2782 | val_0_rmse: 17.46537|  0:00:00s
epoch 7  | loss: 14.46051| val_0_rmse: 13.92189|  0:00:01s
epoch 8  | loss: 12.74128| val_0_rmse: 11.77022|  0:00:01s
epoch 9  | loss: 8.70146 | val_0_rmse: 8.83909 |  0:00:01s
epoch 10 | loss: 6.89775 | val_0_rmse: 6.35149 |  0:00:01s
epoch 11 | loss: 4.13476 | val_0_rmse: 5.24192 |  0:00:01s
epoch 12 | loss: 3.32357 | val_0_rmse: 4.9779  |  0:00:01s
epoch 13 | loss: 2.91008 | val_0_rmse: 5.07511 |  0:00:01s
epoch 14 | loss: 2.74654 | val_0_rmse: 5.62104 |  0:00:01s
epoch 15 | loss: 1.87462 | val_0_rmse: 8.78746 |  0:00:02s
epoch 16 | loss: 1.64817 | val_0_rmse: 9.25348 |  0:00:

[I 2025-08-17 19:43:40,288] Trial 81 finished with value: 0.3428733304056217 and parameters: {'n_d': 40, 'n_a': 58, 'n_steps': 4, 'gamma': 1.5316167079741096, 'lambda_sparse': 0.005843076134323758}. Best is trial 40 with value: 0.2639664310728177.


epoch 72 | loss: 0.10788 | val_0_rmse: 0.55254 |  0:00:09s

Early stopping occurred at epoch 72 with best_epoch = 62 and best_val_0_rmse = 0.34287




epoch 0  | loss: 172.24573| val_0_rmse: 13.95605|  0:00:00s
epoch 1  | loss: 134.19199| val_0_rmse: 9.77212 |  0:00:00s
epoch 2  | loss: 106.03666| val_0_rmse: 8.40061 |  0:00:00s
epoch 3  | loss: 83.609  | val_0_rmse: 7.77552 |  0:00:00s
epoch 4  | loss: 64.88351| val_0_rmse: 7.0966  |  0:00:00s
epoch 5  | loss: 48.53358| val_0_rmse: 6.57141 |  0:00:00s
epoch 6  | loss: 35.72467| val_0_rmse: 6.23622 |  0:00:00s
epoch 7  | loss: 23.70197| val_0_rmse: 6.57169 |  0:00:00s
epoch 8  | loss: 16.70281| val_0_rmse: 6.87073 |  0:00:00s
epoch 9  | loss: 9.77113 | val_0_rmse: 7.56174 |  0:00:01s
epoch 10 | loss: 7.93083 | val_0_rmse: 8.05951 |  0:00:01s
epoch 11 | loss: 6.43072 | val_0_rmse: 7.74683 |  0:00:01s
epoch 12 | loss: 5.971   | val_0_rmse: 7.39733 |  0:00:01s
epoch 13 | loss: 4.42086 | val_0_rmse: 6.59849 |  0:00:01s
epoch 14 | loss: 3.19206 | val_0_rmse: 6.11125 |  0:00:01s
epoch 15 | loss: 1.7572  | val_0_rmse: 5.74927 |  0:00:01s
epoch 16 | loss: 1.41731 | val_0_rmse: 5.31672 |  0:0

[I 2025-08-17 19:43:50,039] Trial 82 finished with value: 0.30725038196994336 and parameters: {'n_d': 35, 'n_a': 59, 'n_steps': 3, 'gamma': 1.5201275482638996, 'lambda_sparse': 0.0052414302245868995}. Best is trial 40 with value: 0.2639664310728177.


epoch 95 | loss: 0.07983 | val_0_rmse: 0.35383 |  0:00:09s
epoch 96 | loss: 0.06869 | val_0_rmse: 0.39383 |  0:00:09s

Early stopping occurred at epoch 96 with best_epoch = 86 and best_val_0_rmse = 0.30725




epoch 0  | loss: 110.50978| val_0_rmse: 11.79005|  0:00:00s
epoch 1  | loss: 88.24528| val_0_rmse: 12.04173|  0:00:00s
epoch 2  | loss: 65.00174| val_0_rmse: 9.22428 |  0:00:00s
epoch 3  | loss: 48.08646| val_0_rmse: 9.58583 |  0:00:00s
epoch 4  | loss: 32.92528| val_0_rmse: 10.95586|  0:00:00s
epoch 5  | loss: 22.43625| val_0_rmse: 13.08944|  0:00:00s
epoch 6  | loss: 16.48815| val_0_rmse: 14.88624|  0:00:00s
epoch 7  | loss: 11.8642 | val_0_rmse: 14.93923|  0:00:00s
epoch 8  | loss: 9.57826 | val_0_rmse: 16.98793|  0:00:00s
epoch 9  | loss: 8.47448 | val_0_rmse: 18.22097|  0:00:00s
epoch 10 | loss: 7.53126 | val_0_rmse: 17.06563|  0:00:01s
epoch 11 | loss: 5.29861 | val_0_rmse: 11.48263|  0:00:01s


[I 2025-08-17 19:43:51,384] Trial 83 finished with value: 9.224275274857531 and parameters: {'n_d': 35, 'n_a': 56, 'n_steps': 3, 'gamma': 1.461408352531438, 'lambda_sparse': 0.0038026830469434477}. Best is trial 40 with value: 0.2639664310728177.


epoch 12 | loss: 3.07489 | val_0_rmse: 10.07515|  0:00:01s

Early stopping occurred at epoch 12 with best_epoch = 2 and best_val_0_rmse = 9.22428




epoch 0  | loss: 148.55078| val_0_rmse: 10.88763|  0:00:00s
epoch 1  | loss: 122.13133| val_0_rmse: 10.18911|  0:00:00s
epoch 2  | loss: 98.61735| val_0_rmse: 9.6063  |  0:00:00s
epoch 3  | loss: 79.51908| val_0_rmse: 8.20279 |  0:00:00s
epoch 4  | loss: 60.97599| val_0_rmse: 7.79088 |  0:00:00s
epoch 5  | loss: 45.22115| val_0_rmse: 6.98178 |  0:00:00s
epoch 6  | loss: 29.51799| val_0_rmse: 5.6292  |  0:00:00s
epoch 7  | loss: 21.25348| val_0_rmse: 6.83849 |  0:00:00s
epoch 8  | loss: 12.9922 | val_0_rmse: 6.45574 |  0:00:00s
epoch 9  | loss: 7.971   | val_0_rmse: 4.64623 |  0:00:01s
epoch 10 | loss: 6.9773  | val_0_rmse: 4.182   |  0:00:01s
epoch 11 | loss: 6.34247 | val_0_rmse: 3.93548 |  0:00:01s
epoch 12 | loss: 6.48608 | val_0_rmse: 3.60642 |  0:00:01s
epoch 13 | loss: 5.14454 | val_0_rmse: 3.18696 |  0:00:01s
epoch 14 | loss: 3.29711 | val_0_rmse: 2.91912 |  0:00:01s
epoch 15 | loss: 1.69381 | val_0_rmse: 2.99179 |  0:00:01s
epoch 16 | loss: 1.42453 | val_0_rmse: 3.1523  |  0:00

[I 2025-08-17 19:44:01,091] Trial 84 finished with value: 0.26834573193561256 and parameters: {'n_d': 31, 'n_a': 59, 'n_steps': 3, 'gamma': 1.647834219706881, 'lambda_sparse': 0.0017473217234739383}. Best is trial 40 with value: 0.2639664310728177.


epoch 99 | loss: 0.05218 | val_0_rmse: 0.29241 |  0:00:09s
Stop training because you reached max_epochs = 100 with best_epoch = 93 and best_val_0_rmse = 0.26835
epoch 0  | loss: 134.53029| val_0_rmse: 12.34581|  0:00:00s




epoch 1  | loss: 102.49996| val_0_rmse: 9.52414 |  0:00:00s
epoch 2  | loss: 78.25053| val_0_rmse: 8.21103 |  0:00:00s
epoch 3  | loss: 57.15343| val_0_rmse: 8.50487 |  0:00:00s
epoch 4  | loss: 40.34015| val_0_rmse: 7.00195 |  0:00:00s
epoch 5  | loss: 27.37743| val_0_rmse: 7.23819 |  0:00:00s
epoch 6  | loss: 16.57861| val_0_rmse: 8.16257 |  0:00:00s
epoch 7  | loss: 11.62829| val_0_rmse: 10.04892|  0:00:00s
epoch 8  | loss: 8.98957 | val_0_rmse: 11.44485|  0:00:00s
epoch 9  | loss: 7.74825 | val_0_rmse: 10.83983|  0:00:00s
epoch 10 | loss: 6.49809 | val_0_rmse: 8.99998 |  0:00:01s
epoch 11 | loss: 4.90952 | val_0_rmse: 7.91807 |  0:00:01s
epoch 12 | loss: 3.25131 | val_0_rmse: 7.3512  |  0:00:01s
epoch 13 | loss: 2.15767 | val_0_rmse: 5.62683 |  0:00:01s
epoch 14 | loss: 2.04081 | val_0_rmse: 5.0233  |  0:00:01s
epoch 15 | loss: 1.62041 | val_0_rmse: 4.39899 |  0:00:01s
epoch 16 | loss: 1.10166 | val_0_rmse: 3.86232 |  0:00:01s
epoch 17 | loss: 0.91043 | val_0_rmse: 3.54194 |  0:00:

[I 2025-08-17 19:44:05,727] Trial 85 finished with value: 1.2335518203518057 and parameters: {'n_d': 30, 'n_a': 63, 'n_steps': 3, 'gamma': 1.6380210545713103, 'lambda_sparse': 0.0017787600112544669}. Best is trial 40 with value: 0.2639664310728177.


epoch 44 | loss: 0.15636 | val_0_rmse: 1.54351 |  0:00:04s
epoch 45 | loss: 0.13113 | val_0_rmse: 1.53969 |  0:00:04s

Early stopping occurred at epoch 45 with best_epoch = 35 and best_val_0_rmse = 1.23355




epoch 0  | loss: 183.62463| val_0_rmse: 15.40851|  0:00:00s
epoch 1  | loss: 158.14993| val_0_rmse: 12.34232|  0:00:00s
epoch 2  | loss: 136.81685| val_0_rmse: 10.61269|  0:00:00s
epoch 3  | loss: 120.19614| val_0_rmse: 9.1062  |  0:00:00s
epoch 4  | loss: 103.97956| val_0_rmse: 7.9295  |  0:00:00s
epoch 5  | loss: 88.13405| val_0_rmse: 7.17115 |  0:00:00s
epoch 6  | loss: 73.62189| val_0_rmse: 6.59076 |  0:00:00s
epoch 7  | loss: 59.4592 | val_0_rmse: 5.85465 |  0:00:00s
epoch 8  | loss: 46.70465| val_0_rmse: 4.86522 |  0:00:00s
epoch 9  | loss: 32.51374| val_0_rmse: 4.02175 |  0:00:00s
epoch 10 | loss: 22.34216| val_0_rmse: 3.9923  |  0:00:01s
epoch 11 | loss: 12.80869| val_0_rmse: 5.47112 |  0:00:01s
epoch 12 | loss: 6.04169 | val_0_rmse: 6.02967 |  0:00:01s
epoch 13 | loss: 3.76128 | val_0_rmse: 6.38947 |  0:00:01s
epoch 14 | loss: 4.4672  | val_0_rmse: 6.15519 |  0:00:01s
epoch 15 | loss: 5.58983 | val_0_rmse: 4.77674 |  0:00:01s
epoch 16 | loss: 5.87575 | val_0_rmse: 4.62426 |  0

[I 2025-08-17 19:44:15,014] Trial 86 finished with value: 0.2940714477775585 and parameters: {'n_d': 32, 'n_a': 45, 'n_steps': 3, 'gamma': 1.6073410139355055, 'lambda_sparse': 0.002632516095224076}. Best is trial 40 with value: 0.2639664310728177.


epoch 99 | loss: 0.05405 | val_0_rmse: 0.29407 |  0:00:09s
Stop training because you reached max_epochs = 100 with best_epoch = 99 and best_val_0_rmse = 0.29407




epoch 0  | loss: 160.40976| val_0_rmse: 16.21329|  0:00:00s
epoch 1  | loss: 139.12514| val_0_rmse: 12.53212|  0:00:00s
epoch 2  | loss: 119.95447| val_0_rmse: 10.74914|  0:00:00s
epoch 3  | loss: 103.67297| val_0_rmse: 8.98821 |  0:00:00s
epoch 4  | loss: 86.60776| val_0_rmse: 7.24254 |  0:00:00s
epoch 5  | loss: 72.57921| val_0_rmse: 5.90658 |  0:00:00s
epoch 6  | loss: 59.42841| val_0_rmse: 5.63641 |  0:00:00s
epoch 7  | loss: 46.61805| val_0_rmse: 5.97954 |  0:00:00s
epoch 8  | loss: 34.27777| val_0_rmse: 6.22781 |  0:00:00s
epoch 9  | loss: 23.81761| val_0_rmse: 6.11699 |  0:00:00s
epoch 10 | loss: 15.48858| val_0_rmse: 5.89956 |  0:00:01s
epoch 11 | loss: 9.51188 | val_0_rmse: 6.57715 |  0:00:01s
epoch 12 | loss: 5.98898 | val_0_rmse: 7.21807 |  0:00:01s
epoch 13 | loss: 5.10977 | val_0_rmse: 6.85137 |  0:00:01s
epoch 14 | loss: 6.03726 | val_0_rmse: 6.02367 |  0:00:01s
epoch 15 | loss: 6.71838 | val_0_rmse: 5.50654 |  0:00:01s
epoch 16 | loss: 6.96617 | val_0_rmse: 4.79659 |  0:

[I 2025-08-17 19:44:19,437] Trial 87 finished with value: 1.2946935843273637 and parameters: {'n_d': 26, 'n_a': 45, 'n_steps': 3, 'gamma': 1.6692511172886018, 'lambda_sparse': 0.0026309292050488197}. Best is trial 40 with value: 0.2639664310728177.


epoch 43 | loss: 0.1673  | val_0_rmse: 1.31026 |  0:00:04s

Early stopping occurred at epoch 43 with best_epoch = 33 and best_val_0_rmse = 1.29469




epoch 0  | loss: 165.43349| val_0_rmse: 11.75698|  0:00:00s
epoch 1  | loss: 134.63252| val_0_rmse: 10.129  |  0:00:00s
epoch 2  | loss: 107.32712| val_0_rmse: 8.8668  |  0:00:00s
epoch 3  | loss: 85.00359| val_0_rmse: 8.07959 |  0:00:00s
epoch 4  | loss: 70.53799| val_0_rmse: 7.67803 |  0:00:00s
epoch 5  | loss: 55.88588| val_0_rmse: 7.51716 |  0:00:00s
epoch 6  | loss: 43.00422| val_0_rmse: 6.98809 |  0:00:00s
epoch 7  | loss: 31.06853| val_0_rmse: 6.57915 |  0:00:00s
epoch 8  | loss: 20.028  | val_0_rmse: 6.64252 |  0:00:00s
epoch 9  | loss: 11.78029| val_0_rmse: 6.39944 |  0:00:01s
epoch 10 | loss: 7.95224 | val_0_rmse: 6.76517 |  0:00:01s
epoch 11 | loss: 7.27221 | val_0_rmse: 6.49094 |  0:00:01s
epoch 12 | loss: 7.63868 | val_0_rmse: 6.14937 |  0:00:01s
epoch 13 | loss: 7.3477  | val_0_rmse: 4.99272 |  0:00:01s
epoch 14 | loss: 6.37186 | val_0_rmse: 4.06751 |  0:00:01s
epoch 15 | loss: 4.5567  | val_0_rmse: 3.27686 |  0:00:01s
epoch 16 | loss: 2.30159 | val_0_rmse: 3.79068 |  0:0

[I 2025-08-17 19:44:25,707] Trial 88 finished with value: 0.4525219144495968 and parameters: {'n_d': 32, 'n_a': 60, 'n_steps': 3, 'gamma': 1.738441038234146, 'lambda_sparse': 0.0015086579001115067}. Best is trial 40 with value: 0.2639664310728177.


epoch 0  | loss: 124.87427| val_0_rmse: 11.10307|  0:00:00s
epoch 1  | loss: 97.13705| val_0_rmse: 11.68583|  0:00:00s
epoch 2  | loss: 76.79884| val_0_rmse: 8.75988 |  0:00:00s
epoch 3  | loss: 56.89523| val_0_rmse: 6.88967 |  0:00:00s
epoch 4  | loss: 43.93412| val_0_rmse: 6.91933 |  0:00:00s
epoch 5  | loss: 29.49904| val_0_rmse: 6.3843  |  0:00:00s
epoch 6  | loss: 21.38057| val_0_rmse: 7.90567 |  0:00:00s
epoch 7  | loss: 14.54855| val_0_rmse: 9.62716 |  0:00:01s
epoch 8  | loss: 11.83316| val_0_rmse: 9.90736 |  0:00:01s
epoch 9  | loss: 11.2429 | val_0_rmse: 8.09741 |  0:00:01s
epoch 10 | loss: 10.18527| val_0_rmse: 7.24705 |  0:00:01s
epoch 11 | loss: 8.62664 | val_0_rmse: 6.60285 |  0:00:01s
epoch 12 | loss: 6.3063  | val_0_rmse: 6.49554 |  0:00:01s
epoch 13 | loss: 4.40635 | val_0_rmse: 5.83776 |  0:00:01s
epoch 14 | loss: 3.59703 | val_0_rmse: 5.34783 |  0:00:01s
epoch 15 | loss: 3.5411  | val_0_rmse: 4.96508 |  0:00:01s
epoch 16 | loss: 2.88415 | val_0_rmse: 4.64021 |  0:00:

[I 2025-08-17 19:44:34,512] Trial 89 finished with value: 0.5398985894190671 and parameters: {'n_d': 29, 'n_a': 56, 'n_steps': 4, 'gamma': 1.7820122516121473, 'lambda_sparse': 0.004143514118884736}. Best is trial 40 with value: 0.2639664310728177.


epoch 68 | loss: 0.21585 | val_0_rmse: 0.58166 |  0:00:08s

Early stopping occurred at epoch 68 with best_epoch = 58 and best_val_0_rmse = 0.5399




epoch 0  | loss: 98.76894| val_0_rmse: 14.94017|  0:00:00s
epoch 1  | loss: 72.95548| val_0_rmse: 11.60921|  0:00:00s
epoch 2  | loss: 57.93557| val_0_rmse: 10.22825|  0:00:00s
epoch 3  | loss: 42.51963| val_0_rmse: 10.29373|  0:00:00s
epoch 4  | loss: 30.80268| val_0_rmse: 11.09322|  0:00:01s
epoch 5  | loss: 23.9073 | val_0_rmse: 12.0944 |  0:00:01s
epoch 6  | loss: 17.97064| val_0_rmse: 13.68712|  0:00:01s
epoch 7  | loss: 14.32181| val_0_rmse: 14.78287|  0:00:01s
epoch 8  | loss: 15.26905| val_0_rmse: 12.12495|  0:00:01s
epoch 9  | loss: 17.9403 | val_0_rmse: 12.30654|  0:00:01s
epoch 10 | loss: 16.82512| val_0_rmse: 11.87845|  0:00:02s
epoch 11 | loss: 15.12415| val_0_rmse: 11.90321|  0:00:02s
epoch 12 | loss: 10.70057| val_0_rmse: 7.88854 |  0:00:02s
epoch 13 | loss: 7.96066 | val_0_rmse: 7.11461 |  0:00:02s
epoch 14 | loss: 6.74998 | val_0_rmse: 6.57137 |  0:00:02s
epoch 15 | loss: 6.54965 | val_0_rmse: 7.29565 |  0:00:03s
epoch 16 | loss: 7.2064  | val_0_rmse: 6.87139 |  0:00:0

[I 2025-08-17 19:44:51,273] Trial 90 finished with value: 0.5793357064118441 and parameters: {'n_d': 24, 'n_a': 63, 'n_steps': 7, 'gamma': 1.5198356800074921, 'lambda_sparse': 0.0011959378393364155}. Best is trial 40 with value: 0.2639664310728177.


epoch 0  | loss: 115.47412| val_0_rmse: 8.32961 |  0:00:00s
epoch 1  | loss: 92.78995| val_0_rmse: 7.43407 |  0:00:00s
epoch 2  | loss: 68.91173| val_0_rmse: 6.58786 |  0:00:00s
epoch 3  | loss: 51.73821| val_0_rmse: 6.43797 |  0:00:00s
epoch 4  | loss: 39.42607| val_0_rmse: 8.17739 |  0:00:00s
epoch 5  | loss: 25.66847| val_0_rmse: 6.76102 |  0:00:00s
epoch 6  | loss: 16.6978 | val_0_rmse: 6.68216 |  0:00:00s
epoch 7  | loss: 10.30403| val_0_rmse: 6.55242 |  0:00:00s
epoch 8  | loss: 10.2058 | val_0_rmse: 6.32771 |  0:00:00s
epoch 9  | loss: 9.11396 | val_0_rmse: 6.56058 |  0:00:01s
epoch 10 | loss: 8.32427 | val_0_rmse: 5.50064 |  0:00:01s
epoch 11 | loss: 6.28279 | val_0_rmse: 5.52126 |  0:00:01s
epoch 12 | loss: 3.73885 | val_0_rmse: 5.42724 |  0:00:01s
epoch 13 | loss: 2.58693 | val_0_rmse: 5.24796 |  0:00:01s
epoch 14 | loss: 1.9564  | val_0_rmse: 5.00024 |  0:00:01s
epoch 15 | loss: 1.65137 | val_0_rmse: 4.69461 |  0:00:01s
epoch 16 | loss: 1.48691 | val_0_rmse: 4.21103 |  0:00:

[I 2025-08-17 19:45:01,235] Trial 91 finished with value: 0.31469308905640136 and parameters: {'n_d': 32, 'n_a': 50, 'n_steps': 3, 'gamma': 1.6079047346551536, 'lambda_sparse': 0.003261690183773946}. Best is trial 40 with value: 0.2639664310728177.


epoch 99 | loss: 0.06178 | val_0_rmse: 0.31469 |  0:00:09s
Stop training because you reached max_epochs = 100 with best_epoch = 99 and best_val_0_rmse = 0.31469




epoch 0  | loss: 115.39876| val_0_rmse: 8.41098 |  0:00:00s
epoch 1  | loss: 92.64079| val_0_rmse: 7.7115  |  0:00:00s
epoch 2  | loss: 71.5246 | val_0_rmse: 7.17427 |  0:00:00s
epoch 3  | loss: 54.43129| val_0_rmse: 7.21101 |  0:00:00s
epoch 4  | loss: 37.04046| val_0_rmse: 6.91977 |  0:00:00s
epoch 5  | loss: 25.15144| val_0_rmse: 9.23959 |  0:00:00s
epoch 6  | loss: 16.90155| val_0_rmse: 12.56152|  0:00:00s
epoch 7  | loss: 12.90031| val_0_rmse: 13.82946|  0:00:00s
epoch 8  | loss: 10.5192 | val_0_rmse: 14.40801|  0:00:00s
epoch 9  | loss: 10.49816| val_0_rmse: 12.20543|  0:00:00s
epoch 10 | loss: 8.02522 | val_0_rmse: 10.71676|  0:00:01s
epoch 11 | loss: 7.46634 | val_0_rmse: 9.32988 |  0:00:01s
epoch 12 | loss: 4.31254 | val_0_rmse: 7.73648 |  0:00:01s
epoch 13 | loss: 2.61778 | val_0_rmse: 6.47403 |  0:00:01s
epoch 14 | loss: 1.89962 | val_0_rmse: 5.22593 |  0:00:01s
epoch 15 | loss: 2.00707 | val_0_rmse: 5.11916 |  0:00:01s
epoch 16 | loss: 1.57945 | val_0_rmse: 4.59725 |  0:00:

[I 2025-08-17 19:45:05,860] Trial 92 finished with value: 0.787314831292685 and parameters: {'n_d': 32, 'n_a': 50, 'n_steps': 3, 'gamma': 1.5473899973043197, 'lambda_sparse': 0.0018753188635891494}. Best is trial 40 with value: 0.2639664310728177.


epoch 0  | loss: 152.08588| val_0_rmse: 13.05373|  0:00:00s
epoch 1  | loss: 125.14495| val_0_rmse: 10.72327|  0:00:00s
epoch 2  | loss: 102.77271| val_0_rmse: 8.99851 |  0:00:00s
epoch 3  | loss: 82.94756| val_0_rmse: 8.06099 |  0:00:00s
epoch 4  | loss: 65.87858| val_0_rmse: 7.76347 |  0:00:00s
epoch 5  | loss: 49.66322| val_0_rmse: 7.70399 |  0:00:00s
epoch 6  | loss: 36.59298| val_0_rmse: 7.90613 |  0:00:00s
epoch 7  | loss: 25.11654| val_0_rmse: 7.88502 |  0:00:00s
epoch 8  | loss: 18.72691| val_0_rmse: 7.85739 |  0:00:00s
epoch 9  | loss: 12.12624| val_0_rmse: 7.8861  |  0:00:00s
epoch 10 | loss: 8.85224 | val_0_rmse: 7.98011 |  0:00:01s
epoch 11 | loss: 8.43323 | val_0_rmse: 7.92923 |  0:00:01s
epoch 12 | loss: 8.35106 | val_0_rmse: 7.80658 |  0:00:01s
epoch 13 | loss: 7.61771 | val_0_rmse: 7.63372 |  0:00:01s
epoch 14 | loss: 6.17949 | val_0_rmse: 6.90882 |  0:00:01s
epoch 15 | loss: 3.87998 | val_0_rmse: 6.03878 |  0:00:01s
epoch 16 | loss: 2.54122 | val_0_rmse: 5.19586 |  0:0

[I 2025-08-17 19:45:15,558] Trial 93 finished with value: 0.30119873022767735 and parameters: {'n_d': 31, 'n_a': 43, 'n_steps': 3, 'gamma': 1.6110334121943448, 'lambda_sparse': 0.0032020318542691426}. Best is trial 40 with value: 0.2639664310728177.


epoch 0  | loss: 119.12384| val_0_rmse: 16.07275|  0:00:00s
epoch 1  | loss: 99.84648| val_0_rmse: 21.50646|  0:00:00s
epoch 2  | loss: 80.91894| val_0_rmse: 8.96108 |  0:00:00s
epoch 3  | loss: 64.22393| val_0_rmse: 11.46665|  0:00:00s
epoch 4  | loss: 46.67377| val_0_rmse: 9.56865 |  0:00:00s
epoch 5  | loss: 34.11749| val_0_rmse: 11.30102|  0:00:00s
epoch 6  | loss: 24.099  | val_0_rmse: 11.02007|  0:00:00s
epoch 7  | loss: 16.30578| val_0_rmse: 9.29024 |  0:00:00s
epoch 8  | loss: 11.11011| val_0_rmse: 13.55715|  0:00:00s
epoch 9  | loss: 9.22152 | val_0_rmse: 15.82604|  0:00:00s
epoch 10 | loss: 9.77113 | val_0_rmse: 15.95239|  0:00:00s
epoch 11 | loss: 8.89518 | val_0_rmse: 15.90992|  0:00:01s


[I 2025-08-17 19:45:16,838] Trial 94 finished with value: 8.96107571335446 and parameters: {'n_d': 34, 'n_a': 42, 'n_steps': 3, 'gamma': 1.6484261316892412, 'lambda_sparse': 0.002453954884586287}. Best is trial 40 with value: 0.2639664310728177.


epoch 12 | loss: 6.89127 | val_0_rmse: 12.70232|  0:00:01s

Early stopping occurred at epoch 12 with best_epoch = 2 and best_val_0_rmse = 8.96108




epoch 0  | loss: 156.64729| val_0_rmse: 11.61848|  0:00:00s
epoch 1  | loss: 125.05363| val_0_rmse: 9.53432 |  0:00:00s
epoch 2  | loss: 102.70132| val_0_rmse: 8.67481 |  0:00:00s
epoch 3  | loss: 81.55572| val_0_rmse: 7.96158 |  0:00:00s
epoch 4  | loss: 64.71581| val_0_rmse: 7.49661 |  0:00:00s
epoch 5  | loss: 50.65486| val_0_rmse: 7.15058 |  0:00:00s
epoch 6  | loss: 38.54187| val_0_rmse: 7.51902 |  0:00:00s
epoch 7  | loss: 29.31696| val_0_rmse: 8.88491 |  0:00:00s
epoch 8  | loss: 20.41882| val_0_rmse: 9.28406 |  0:00:00s
epoch 9  | loss: 16.12643| val_0_rmse: 10.67985|  0:00:00s
epoch 10 | loss: 11.16609| val_0_rmse: 11.17513|  0:00:01s
epoch 11 | loss: 8.96264 | val_0_rmse: 11.88544|  0:00:01s
epoch 12 | loss: 9.8433  | val_0_rmse: 13.86201|  0:00:01s
epoch 13 | loss: 7.92213 | val_0_rmse: 13.87031|  0:00:01s
epoch 14 | loss: 5.94401 | val_0_rmse: 12.2047 |  0:00:01s


[I 2025-08-17 19:45:18,417] Trial 95 finished with value: 7.150575696927902 and parameters: {'n_d': 31, 'n_a': 39, 'n_steps': 3, 'gamma': 1.5728077874045387, 'lambda_sparse': 0.009747681612458087}. Best is trial 40 with value: 0.2639664310728177.


epoch 15 | loss: 4.33559 | val_0_rmse: 10.48524|  0:00:01s

Early stopping occurred at epoch 15 with best_epoch = 5 and best_val_0_rmse = 7.15058
epoch 0  | loss: 129.65575| val_0_rmse: 10.10991|  0:00:00s




epoch 1  | loss: 104.30206| val_0_rmse: 9.0004  |  0:00:00s
epoch 2  | loss: 83.80273| val_0_rmse: 7.42062 |  0:00:00s
epoch 3  | loss: 64.75527| val_0_rmse: 7.10827 |  0:00:00s
epoch 4  | loss: 51.20811| val_0_rmse: 7.08413 |  0:00:00s
epoch 5  | loss: 35.52636| val_0_rmse: 9.20115 |  0:00:00s
epoch 6  | loss: 22.90825| val_0_rmse: 11.41811|  0:00:00s
epoch 7  | loss: 16.23037| val_0_rmse: 10.26895|  0:00:00s
epoch 8  | loss: 10.1438 | val_0_rmse: 10.5535 |  0:00:00s
epoch 9  | loss: 7.94766 | val_0_rmse: 10.48543|  0:00:00s
epoch 10 | loss: 6.55393 | val_0_rmse: 11.0617 |  0:00:01s
epoch 11 | loss: 6.49898 | val_0_rmse: 8.53328 |  0:00:01s
epoch 12 | loss: 5.43475 | val_0_rmse: 6.34694 |  0:00:01s
epoch 13 | loss: 3.76511 | val_0_rmse: 5.50853 |  0:00:01s
epoch 14 | loss: 2.20737 | val_0_rmse: 4.4394  |  0:00:01s
epoch 15 | loss: 2.20937 | val_0_rmse: 3.84152 |  0:00:01s
epoch 16 | loss: 1.89164 | val_0_rmse: 3.47036 |  0:00:01s
epoch 17 | loss: 1.56078 | val_0_rmse: 3.25728 |  0:00:

[I 2025-08-17 19:45:25,262] Trial 96 finished with value: 0.5937459261773886 and parameters: {'n_d': 27, 'n_a': 61, 'n_steps': 3, 'gamma': 1.474035229291845, 'lambda_sparse': 0.0031337450921322357}. Best is trial 40 with value: 0.2639664310728177.


epoch 70 | loss: 0.08766 | val_0_rmse: 0.64824 |  0:00:06s
epoch 71 | loss: 0.11554 | val_0_rmse: 0.67559 |  0:00:06s

Early stopping occurred at epoch 71 with best_epoch = 61 and best_val_0_rmse = 0.59375




epoch 0  | loss: 126.12437| val_0_rmse: 9.7821  |  0:00:00s
epoch 1  | loss: 101.95779| val_0_rmse: 8.77189 |  0:00:00s
epoch 2  | loss: 82.76072| val_0_rmse: 8.31093 |  0:00:00s
epoch 3  | loss: 65.16192| val_0_rmse: 11.56496|  0:00:00s
epoch 4  | loss: 50.94838| val_0_rmse: 10.57506|  0:00:00s
epoch 5  | loss: 37.04783| val_0_rmse: 9.66054 |  0:00:00s
epoch 6  | loss: 25.13017| val_0_rmse: 13.40246|  0:00:00s
epoch 7  | loss: 16.47781| val_0_rmse: 12.49856|  0:00:00s
epoch 8  | loss: 11.69958| val_0_rmse: 11.43168|  0:00:00s
epoch 9  | loss: 8.98211 | val_0_rmse: 10.97376|  0:00:00s
epoch 10 | loss: 8.88422 | val_0_rmse: 10.63175|  0:00:00s
epoch 11 | loss: 8.83051 | val_0_rmse: 9.39995 |  0:00:01s


[I 2025-08-17 19:45:26,562] Trial 97 finished with value: 8.310934742321 and parameters: {'n_d': 33, 'n_a': 48, 'n_steps': 3, 'gamma': 1.4959035426447718, 'lambda_sparse': 0.005007015277371566}. Best is trial 40 with value: 0.2639664310728177.


epoch 12 | loss: 6.95626 | val_0_rmse: 8.8612  |  0:00:01s

Early stopping occurred at epoch 12 with best_epoch = 2 and best_val_0_rmse = 8.31093




epoch 0  | loss: 157.41046| val_0_rmse: 10.41832|  0:00:00s
epoch 1  | loss: 118.82742| val_0_rmse: 9.50896 |  0:00:00s
epoch 2  | loss: 88.77991| val_0_rmse: 7.81288 |  0:00:00s
epoch 3  | loss: 64.73195| val_0_rmse: 6.91339 |  0:00:00s
epoch 4  | loss: 46.45475| val_0_rmse: 6.3771  |  0:00:00s
epoch 5  | loss: 35.22025| val_0_rmse: 6.48098 |  0:00:00s
epoch 6  | loss: 26.60439| val_0_rmse: 5.96997 |  0:00:00s
epoch 7  | loss: 20.61796| val_0_rmse: 5.56811 |  0:00:00s
epoch 8  | loss: 20.5813 | val_0_rmse: 7.20513 |  0:00:01s
epoch 9  | loss: 18.09195| val_0_rmse: 8.49457 |  0:00:01s
epoch 10 | loss: 17.90335| val_0_rmse: 8.62377 |  0:00:01s
epoch 11 | loss: 15.2926 | val_0_rmse: 9.14136 |  0:00:01s
epoch 12 | loss: 11.88184| val_0_rmse: 8.7326  |  0:00:01s
epoch 13 | loss: 7.27422 | val_0_rmse: 8.25519 |  0:00:01s
epoch 14 | loss: 4.86144 | val_0_rmse: 6.19343 |  0:00:01s
epoch 15 | loss: 4.13996 | val_0_rmse: 5.42096 |  0:00:01s
epoch 16 | loss: 3.63754 | val_0_rmse: 4.93719 |  0:00

[I 2025-08-17 19:45:35,847] Trial 98 finished with value: 0.4796636710892022 and parameters: {'n_d': 35, 'n_a': 32, 'n_steps': 4, 'gamma': 1.6076396872055596, 'lambda_sparse': 0.0070120234776034265}. Best is trial 40 with value: 0.2639664310728177.


epoch 79 | loss: 0.12111 | val_0_rmse: 0.52748 |  0:00:09s

Early stopping occurred at epoch 79 with best_epoch = 69 and best_val_0_rmse = 0.47966




epoch 0  | loss: 165.83331| val_0_rmse: 16.10142|  0:00:00s
epoch 1  | loss: 140.04424| val_0_rmse: 10.63473|  0:00:00s
epoch 2  | loss: 118.34063| val_0_rmse: 9.65968 |  0:00:00s
epoch 3  | loss: 98.99709| val_0_rmse: 8.55179 |  0:00:00s
epoch 4  | loss: 82.17847| val_0_rmse: 7.7652  |  0:00:00s
epoch 5  | loss: 67.48679| val_0_rmse: 7.31652 |  0:00:00s
epoch 6  | loss: 52.48619| val_0_rmse: 6.85583 |  0:00:00s
epoch 7  | loss: 39.99509| val_0_rmse: 6.69915 |  0:00:00s
epoch 8  | loss: 28.41899| val_0_rmse: 7.22544 |  0:00:00s
epoch 9  | loss: 20.25446| val_0_rmse: 7.69866 |  0:00:00s
epoch 10 | loss: 14.36896| val_0_rmse: 7.36886 |  0:00:01s
epoch 11 | loss: 10.59318| val_0_rmse: 8.88724 |  0:00:01s
epoch 12 | loss: 7.46833 | val_0_rmse: 9.42101 |  0:00:01s
epoch 13 | loss: 7.32863 | val_0_rmse: 9.01138 |  0:00:01s
epoch 14 | loss: 6.32826 | val_0_rmse: 8.58314 |  0:00:01s
epoch 15 | loss: 5.75667 | val_0_rmse: 7.49595 |  0:00:01s
epoch 16 | loss: 4.14366 | val_0_rmse: 6.04271 |  0:0

[I 2025-08-17 19:45:42,228] Trial 99 finished with value: 0.4608474219523032 and parameters: {'n_d': 30, 'n_a': 46, 'n_steps': 3, 'gamma': 1.6987611760788268, 'lambda_sparse': 0.0020735600236147416}. Best is trial 40 with value: 0.2639664310728177.


epoch 66 | loss: 0.12519 | val_0_rmse: 0.52484 |  0:00:06s
epoch 67 | loss: 0.12652 | val_0_rmse: 0.55138 |  0:00:06s

Early stopping occurred at epoch 67 with best_epoch = 57 and best_val_0_rmse = 0.46085




epoch 0  | loss: 107.10595| val_0_rmse: 10.13962|  0:00:00s
epoch 1  | loss: 81.47892| val_0_rmse: 9.48112 |  0:00:00s
epoch 2  | loss: 58.38862| val_0_rmse: 8.68255 |  0:00:00s
epoch 3  | loss: 40.16377| val_0_rmse: 11.81158|  0:00:00s
epoch 4  | loss: 28.26181| val_0_rmse: 12.14413|  0:00:01s
epoch 5  | loss: 20.93298| val_0_rmse: 12.28332|  0:00:01s
epoch 6  | loss: 20.78527| val_0_rmse: 12.98948|  0:00:01s
epoch 7  | loss: 20.76692| val_0_rmse: 13.45706|  0:00:01s
epoch 8  | loss: 21.24028| val_0_rmse: 13.92879|  0:00:02s
epoch 9  | loss: 20.77742| val_0_rmse: 13.26271|  0:00:02s
epoch 10 | loss: 18.75412| val_0_rmse: 13.18772|  0:00:02s
epoch 11 | loss: 16.33465| val_0_rmse: 11.86925|  0:00:02s


[I 2025-08-17 19:45:45,545] Trial 100 finished with value: 8.682554256101119 and parameters: {'n_d': 29, 'n_a': 54, 'n_steps': 9, 'gamma': 1.6626544588786014, 'lambda_sparse': 0.001555457350522371}. Best is trial 40 with value: 0.2639664310728177.


epoch 12 | loss: 12.43694| val_0_rmse: 9.99138 |  0:00:03s

Early stopping occurred at epoch 12 with best_epoch = 2 and best_val_0_rmse = 8.68255




epoch 0  | loss: 106.95834| val_0_rmse: 21.69334|  0:00:00s
epoch 1  | loss: 85.30077| val_0_rmse: 11.1697 |  0:00:00s
epoch 2  | loss: 67.38562| val_0_rmse: 8.65507 |  0:00:00s
epoch 3  | loss: 49.05122| val_0_rmse: 8.27476 |  0:00:00s
epoch 4  | loss: 32.52716| val_0_rmse: 6.98711 |  0:00:00s
epoch 5  | loss: 25.02976| val_0_rmse: 6.88446 |  0:00:00s
epoch 6  | loss: 15.3146 | val_0_rmse: 7.01888 |  0:00:00s
epoch 7  | loss: 11.14814| val_0_rmse: 5.96388 |  0:00:00s
epoch 8  | loss: 13.2019 | val_0_rmse: 4.62348 |  0:00:00s
epoch 9  | loss: 10.82867| val_0_rmse: 3.91714 |  0:00:00s
epoch 10 | loss: 7.80132 | val_0_rmse: 3.9741  |  0:00:01s
epoch 11 | loss: 5.88659 | val_0_rmse: 4.46895 |  0:00:01s
epoch 12 | loss: 3.63308 | val_0_rmse: 3.67372 |  0:00:01s
epoch 13 | loss: 3.17236 | val_0_rmse: 3.55161 |  0:00:01s
epoch 14 | loss: 2.89571 | val_0_rmse: 3.09977 |  0:00:01s
epoch 15 | loss: 2.15016 | val_0_rmse: 3.2646  |  0:00:01s
epoch 16 | loss: 1.31912 | val_0_rmse: 3.35991 |  0:00:

[I 2025-08-17 19:45:50,572] Trial 101 finished with value: 0.6858513385985564 and parameters: {'n_d': 32, 'n_a': 51, 'n_steps': 3, 'gamma': 1.6093092568344045, 'lambda_sparse': 0.004144733580681424}. Best is trial 40 with value: 0.2639664310728177.


epoch 50 | loss: 0.15389 | val_0_rmse: 0.70574 |  0:00:04s

Early stopping occurred at epoch 50 with best_epoch = 40 and best_val_0_rmse = 0.68585




epoch 0  | loss: 106.26431| val_0_rmse: 8.35902 |  0:00:00s
epoch 1  | loss: 86.30022| val_0_rmse: 7.97771 |  0:00:00s
epoch 2  | loss: 68.58223| val_0_rmse: 7.86014 |  0:00:00s
epoch 3  | loss: 54.04552| val_0_rmse: 8.39107 |  0:00:00s
epoch 4  | loss: 42.14543| val_0_rmse: 8.24378 |  0:00:00s
epoch 5  | loss: 28.72306| val_0_rmse: 9.02386 |  0:00:00s
epoch 6  | loss: 19.70187| val_0_rmse: 8.72667 |  0:00:00s
epoch 7  | loss: 14.20665| val_0_rmse: 9.26786 |  0:00:00s
epoch 8  | loss: 11.38482| val_0_rmse: 9.34927 |  0:00:00s
epoch 9  | loss: 9.98951 | val_0_rmse: 9.47828 |  0:00:00s
epoch 10 | loss: 8.0754  | val_0_rmse: 9.55302 |  0:00:01s
epoch 11 | loss: 5.95725 | val_0_rmse: 8.29936 |  0:00:01s
epoch 12 | loss: 3.57922 | val_0_rmse: 7.81288 |  0:00:01s
epoch 13 | loss: 2.67507 | val_0_rmse: 6.9933  |  0:00:01s
epoch 14 | loss: 2.39769 | val_0_rmse: 6.29285 |  0:00:01s
epoch 15 | loss: 1.90376 | val_0_rmse: 5.59155 |  0:00:01s
epoch 16 | loss: 1.11571 | val_0_rmse: 4.92216 |  0:00:

[I 2025-08-17 19:46:00,308] Trial 102 finished with value: 0.2998476682623165 and parameters: {'n_d': 31, 'n_a': 44, 'n_steps': 3, 'gamma': 1.5448451756365473, 'lambda_sparse': 0.003418482259260065}. Best is trial 40 with value: 0.2639664310728177.


epoch 0  | loss: 172.24474| val_0_rmse: 15.19492|  0:00:00s
epoch 1  | loss: 135.01495| val_0_rmse: 10.94452|  0:00:00s
epoch 2  | loss: 107.3882| val_0_rmse: 8.86596 |  0:00:00s
epoch 3  | loss: 84.17097| val_0_rmse: 7.18231 |  0:00:00s
epoch 4  | loss: 65.04195| val_0_rmse: 6.56261 |  0:00:00s
epoch 5  | loss: 48.48987| val_0_rmse: 6.01192 |  0:00:00s
epoch 6  | loss: 33.47099| val_0_rmse: 6.13034 |  0:00:00s
epoch 7  | loss: 21.40911| val_0_rmse: 6.37898 |  0:00:00s
epoch 8  | loss: 13.07242| val_0_rmse: 8.63597 |  0:00:01s
epoch 9  | loss: 8.78219 | val_0_rmse: 10.75569|  0:00:01s
epoch 10 | loss: 6.50702 | val_0_rmse: 11.62477|  0:00:01s
epoch 11 | loss: 6.01974 | val_0_rmse: 11.69555|  0:00:01s
epoch 12 | loss: 6.41024 | val_0_rmse: 10.81951|  0:00:01s
epoch 13 | loss: 5.71429 | val_0_rmse: 10.51781|  0:00:01s
epoch 14 | loss: 3.66284 | val_0_rmse: 11.02487|  0:00:01s


[I 2025-08-17 19:46:02,124] Trial 103 finished with value: 6.011915173125729 and parameters: {'n_d': 35, 'n_a': 59, 'n_steps': 3, 'gamma': 1.5446818406797576, 'lambda_sparse': 0.0025685299319904006}. Best is trial 40 with value: 0.2639664310728177.


epoch 15 | loss: 2.18539 | val_0_rmse: 14.11401|  0:00:01s

Early stopping occurred at epoch 15 with best_epoch = 5 and best_val_0_rmse = 6.01192




epoch 0  | loss: 134.72673| val_0_rmse: 12.76759|  0:00:00s
epoch 1  | loss: 114.34944| val_0_rmse: 9.02437 |  0:00:00s
epoch 2  | loss: 96.5106 | val_0_rmse: 12.92471|  0:00:00s
epoch 3  | loss: 81.36492| val_0_rmse: 10.54607|  0:00:00s
epoch 4  | loss: 68.10445| val_0_rmse: 7.35793 |  0:00:00s
epoch 5  | loss: 54.30448| val_0_rmse: 6.78069 |  0:00:00s
epoch 6  | loss: 41.06035| val_0_rmse: 7.14451 |  0:00:00s
epoch 7  | loss: 31.32113| val_0_rmse: 6.24844 |  0:00:00s
epoch 8  | loss: 23.85954| val_0_rmse: 6.51628 |  0:00:00s
epoch 9  | loss: 14.54684| val_0_rmse: 7.92477 |  0:00:00s
epoch 10 | loss: 9.55123 | val_0_rmse: 7.85636 |  0:00:01s
epoch 11 | loss: 7.90782 | val_0_rmse: 7.23023 |  0:00:01s
epoch 12 | loss: 7.60834 | val_0_rmse: 7.69043 |  0:00:01s
epoch 13 | loss: 7.65652 | val_0_rmse: 7.32117 |  0:00:01s
epoch 14 | loss: 6.48278 | val_0_rmse: 8.24105 |  0:00:01s
epoch 15 | loss: 4.83071 | val_0_rmse: 6.63207 |  0:00:01s
epoch 16 | loss: 3.23337 | val_0_rmse: 5.73724 |  0:00

[I 2025-08-17 19:46:07,093] Trial 104 finished with value: 0.7233174103229056 and parameters: {'n_d': 25, 'n_a': 44, 'n_steps': 3, 'gamma': 1.5762380811763885, 'lambda_sparse': 0.0016899268569888965}. Best is trial 40 with value: 0.2639664310728177.


epoch 51 | loss: 0.12281 | val_0_rmse: 0.77943 |  0:00:04s
epoch 52 | loss: 0.11767 | val_0_rmse: 0.8137  |  0:00:04s

Early stopping occurred at epoch 52 with best_epoch = 42 and best_val_0_rmse = 0.72332




epoch 0  | loss: 128.99879| val_0_rmse: 9.10292 |  0:00:00s
epoch 1  | loss: 99.28419| val_0_rmse: 8.64324 |  0:00:00s
epoch 2  | loss: 75.34394| val_0_rmse: 7.72037 |  0:00:00s
epoch 3  | loss: 54.85408| val_0_rmse: 7.29367 |  0:00:00s
epoch 4  | loss: 36.55044| val_0_rmse: 7.42576 |  0:00:00s
epoch 5  | loss: 24.10927| val_0_rmse: 7.62966 |  0:00:00s
epoch 6  | loss: 16.46303| val_0_rmse: 8.41078 |  0:00:00s
epoch 7  | loss: 12.00995| val_0_rmse: 9.59221 |  0:00:00s
epoch 8  | loss: 10.32459| val_0_rmse: 11.07228|  0:00:00s
epoch 9  | loss: 10.29117| val_0_rmse: 11.14699|  0:00:00s
epoch 10 | loss: 9.00614 | val_0_rmse: 11.99179|  0:00:01s
epoch 11 | loss: 6.80018 | val_0_rmse: 10.50893|  0:00:01s
epoch 12 | loss: 4.14461 | val_0_rmse: 8.37436 |  0:00:01s
epoch 13 | loss: 2.8068  | val_0_rmse: 6.07433 |  0:00:01s
epoch 14 | loss: 2.35409 | val_0_rmse: 4.78589 |  0:00:01s
epoch 15 | loss: 1.73462 | val_0_rmse: 4.2267  |  0:00:01s
epoch 16 | loss: 1.14159 | val_0_rmse: 3.82978 |  0:00:

[I 2025-08-17 19:46:12,393] Trial 105 finished with value: 0.6424764958098733 and parameters: {'n_d': 38, 'n_a': 41, 'n_steps': 3, 'gamma': 1.6337433591868358, 'lambda_sparse': 0.005069507003176822}. Best is trial 40 with value: 0.2639664310728177.


epoch 51 | loss: 0.19601 | val_0_rmse: 0.65701 |  0:00:05s
epoch 52 | loss: 0.13864 | val_0_rmse: 0.69012 |  0:00:05s

Early stopping occurred at epoch 52 with best_epoch = 42 and best_val_0_rmse = 0.64248




epoch 0  | loss: 204.67503| val_0_rmse: 15.2672 |  0:00:00s
epoch 1  | loss: 152.10561| val_0_rmse: 10.65798|  0:00:00s
epoch 2  | loss: 108.44897| val_0_rmse: 8.67671 |  0:00:00s
epoch 3  | loss: 78.72215| val_0_rmse: 7.17427 |  0:00:00s
epoch 4  | loss: 54.75698| val_0_rmse: 5.56028 |  0:00:00s
epoch 5  | loss: 37.47242| val_0_rmse: 6.0446  |  0:00:00s
epoch 6  | loss: 22.84349| val_0_rmse: 6.31015 |  0:00:00s
epoch 7  | loss: 17.7112 | val_0_rmse: 7.31272 |  0:00:01s
epoch 8  | loss: 13.34175| val_0_rmse: 8.50857 |  0:00:01s
epoch 9  | loss: 14.59507| val_0_rmse: 8.92259 |  0:00:01s
epoch 10 | loss: 14.5427 | val_0_rmse: 8.1281  |  0:00:01s
epoch 11 | loss: 14.43472| val_0_rmse: 7.76806 |  0:00:01s
epoch 12 | loss: 10.36738| val_0_rmse: 5.64129 |  0:00:01s
epoch 13 | loss: 6.97612 | val_0_rmse: 5.18813 |  0:00:01s
epoch 14 | loss: 3.88684 | val_0_rmse: 4.45961 |  0:00:01s
epoch 15 | loss: 3.91221 | val_0_rmse: 5.20659 |  0:00:01s
epoch 16 | loss: 4.72546 | val_0_rmse: 4.79344 |  0:0

[I 2025-08-17 19:46:18,528] Trial 106 finished with value: 0.9867422366346991 and parameters: {'n_d': 56, 'n_a': 36, 'n_steps': 4, 'gamma': 1.729963354836628, 'lambda_sparse': 0.0013609923671747482}. Best is trial 40 with value: 0.2639664310728177.


epoch 48 | loss: 0.22275 | val_0_rmse: 1.00585 |  0:00:05s

Early stopping occurred at epoch 48 with best_epoch = 38 and best_val_0_rmse = 0.98674




epoch 0  | loss: 142.81882| val_0_rmse: 12.97818|  0:00:00s
epoch 1  | loss: 118.35074| val_0_rmse: 9.87181 |  0:00:00s
epoch 2  | loss: 96.27534| val_0_rmse: 8.53211 |  0:00:00s
epoch 3  | loss: 76.3738 | val_0_rmse: 7.73643 |  0:00:00s
epoch 4  | loss: 59.07222| val_0_rmse: 8.27294 |  0:00:00s
epoch 5  | loss: 43.49097| val_0_rmse: 9.00313 |  0:00:00s
epoch 6  | loss: 30.07962| val_0_rmse: 11.38518|  0:00:00s
epoch 7  | loss: 21.24025| val_0_rmse: 12.0436 |  0:00:00s
epoch 8  | loss: 14.06825| val_0_rmse: 12.11914|  0:00:00s
epoch 9  | loss: 8.64686 | val_0_rmse: 11.35709|  0:00:00s
epoch 10 | loss: 7.85654 | val_0_rmse: 10.54845|  0:00:01s
epoch 11 | loss: 7.48698 | val_0_rmse: 10.87473|  0:00:01s
epoch 12 | loss: 6.81805 | val_0_rmse: 9.05625 |  0:00:01s
epoch 13 | loss: 4.87396 | val_0_rmse: 6.51326 |  0:00:01s
epoch 14 | loss: 2.67514 | val_0_rmse: 5.30411 |  0:00:01s
epoch 15 | loss: 1.78013 | val_0_rmse: 4.28712 |  0:00:01s
epoch 16 | loss: 1.64411 | val_0_rmse: 3.90537 |  0:00

[I 2025-08-17 19:46:22,571] Trial 107 finished with value: 1.195880763927036 and parameters: {'n_d': 31, 'n_a': 57, 'n_steps': 3, 'gamma': 1.5113484404944886, 'lambda_sparse': 0.0035271980938762627}. Best is trial 40 with value: 0.2639664310728177.


epoch 0  | loss: 150.23825| val_0_rmse: 11.61744|  0:00:00s
epoch 1  | loss: 125.86171| val_0_rmse: 10.19696|  0:00:00s
epoch 2  | loss: 105.84102| val_0_rmse: 9.88865 |  0:00:00s
epoch 3  | loss: 87.16316| val_0_rmse: 11.42955|  0:00:00s
epoch 4  | loss: 71.50816| val_0_rmse: 14.41479|  0:00:00s
epoch 5  | loss: 57.21299| val_0_rmse: 16.69275|  0:00:00s
epoch 6  | loss: 43.32052| val_0_rmse: 17.66323|  0:00:00s
epoch 7  | loss: 31.166  | val_0_rmse: 18.34583|  0:00:00s
epoch 8  | loss: 20.49867| val_0_rmse: 17.26802|  0:00:00s
epoch 9  | loss: 13.21353| val_0_rmse: 12.5228 |  0:00:00s
epoch 10 | loss: 7.98334 | val_0_rmse: 9.2546  |  0:00:01s
epoch 11 | loss: 6.06511 | val_0_rmse: 10.31508|  0:00:01s
epoch 12 | loss: 6.75974 | val_0_rmse: 12.51352|  0:00:01s
epoch 13 | loss: 6.92446 | val_0_rmse: 14.34179|  0:00:01s
epoch 14 | loss: 5.50925 | val_0_rmse: 13.85085|  0:00:01s
epoch 15 | loss: 3.79195 | val_0_rmse: 13.19803|  0:00:01s
epoch 16 | loss: 2.21678 | val_0_rmse: 10.86535|  0:0

[I 2025-08-17 19:46:30,405] Trial 108 finished with value: 0.40566964416682766 and parameters: {'n_d': 29, 'n_a': 53, 'n_steps': 3, 'gamma': 1.7608775401314658, 'lambda_sparse': 0.0021245349306742924}. Best is trial 40 with value: 0.2639664310728177.


epoch 78 | loss: 0.0878  | val_0_rmse: 0.50748 |  0:00:07s

Early stopping occurred at epoch 78 with best_epoch = 68 and best_val_0_rmse = 0.40567




epoch 0  | loss: 194.0627| val_0_rmse: 14.88257|  0:00:00s
epoch 1  | loss: 157.91447| val_0_rmse: 11.34651|  0:00:00s
epoch 2  | loss: 132.0179| val_0_rmse: 10.83937|  0:00:00s
epoch 3  | loss: 108.94131| val_0_rmse: 13.31364|  0:00:00s
epoch 4  | loss: 89.22426| val_0_rmse: 14.75226|  0:00:00s
epoch 5  | loss: 73.10844| val_0_rmse: 14.04726|  0:00:00s
epoch 6  | loss: 57.78735| val_0_rmse: 16.10378|  0:00:00s
epoch 7  | loss: 44.73546| val_0_rmse: 16.66153|  0:00:00s
epoch 8  | loss: 33.38227| val_0_rmse: 19.31999|  0:00:01s
epoch 9  | loss: 25.21476| val_0_rmse: 21.93583|  0:00:01s
epoch 10 | loss: 18.57924| val_0_rmse: 22.79162|  0:00:01s
epoch 11 | loss: 14.48634| val_0_rmse: 21.29155|  0:00:01s


[I 2025-08-17 19:46:32,027] Trial 109 finished with value: 10.839368874125427 and parameters: {'n_d': 27, 'n_a': 47, 'n_steps': 4, 'gamma': 1.6820578921170812, 'lambda_sparse': 0.0011107959599644846}. Best is trial 40 with value: 0.2639664310728177.


epoch 12 | loss: 11.61848| val_0_rmse: 20.40609|  0:00:01s

Early stopping occurred at epoch 12 with best_epoch = 2 and best_val_0_rmse = 10.83937




epoch 0  | loss: 147.02303| val_0_rmse: 10.68947|  0:00:00s
epoch 1  | loss: 114.65413| val_0_rmse: 9.08838 |  0:00:00s
epoch 2  | loss: 92.80059| val_0_rmse: 8.64224 |  0:00:00s
epoch 3  | loss: 73.25002| val_0_rmse: 8.17749 |  0:00:00s
epoch 4  | loss: 54.53476| val_0_rmse: 7.12385 |  0:00:00s
epoch 5  | loss: 39.70241| val_0_rmse: 6.11952 |  0:00:00s
epoch 6  | loss: 26.18687| val_0_rmse: 5.68096 |  0:00:00s
epoch 7  | loss: 16.68676| val_0_rmse: 5.53006 |  0:00:00s
epoch 8  | loss: 10.03325| val_0_rmse: 5.76219 |  0:00:00s
epoch 9  | loss: 6.74651 | val_0_rmse: 6.34966 |  0:00:01s
epoch 10 | loss: 6.0206  | val_0_rmse: 6.39749 |  0:00:01s
epoch 11 | loss: 5.0939  | val_0_rmse: 6.13763 |  0:00:01s
epoch 12 | loss: 3.58608 | val_0_rmse: 6.3356  |  0:00:01s
epoch 13 | loss: 2.1267  | val_0_rmse: 4.91061 |  0:00:01s
epoch 14 | loss: 1.60539 | val_0_rmse: 4.10909 |  0:00:01s
epoch 15 | loss: 1.52928 | val_0_rmse: 3.61056 |  0:00:01s
epoch 16 | loss: 1.30272 | val_0_rmse: 3.44471 |  0:00

[I 2025-08-17 19:46:42,101] Trial 110 finished with value: 0.2849245620043306 and parameters: {'n_d': 33, 'n_a': 60, 'n_steps': 3, 'gamma': 1.4342131820625985, 'lambda_sparse': 0.0030315485071352372}. Best is trial 40 with value: 0.2639664310728177.


epoch 98 | loss: 0.08145 | val_0_rmse: 0.29581 |  0:00:09s
epoch 99 | loss: 0.05869 | val_0_rmse: 0.28492 |  0:00:09s
Stop training because you reached max_epochs = 100 with best_epoch = 99 and best_val_0_rmse = 0.28492




epoch 0  | loss: 141.70471| val_0_rmse: 11.58233|  0:00:00s
epoch 1  | loss: 120.74991| val_0_rmse: 9.93783 |  0:00:00s
epoch 2  | loss: 100.23096| val_0_rmse: 8.80345 |  0:00:00s
epoch 3  | loss: 81.16787| val_0_rmse: 7.86451 |  0:00:00s
epoch 4  | loss: 63.43136| val_0_rmse: 7.56839 |  0:00:00s
epoch 5  | loss: 48.65827| val_0_rmse: 6.86297 |  0:00:00s
epoch 6  | loss: 37.78002| val_0_rmse: 6.17578 |  0:00:00s
epoch 7  | loss: 26.47567| val_0_rmse: 5.69618 |  0:00:00s
epoch 8  | loss: 16.42386| val_0_rmse: 5.48097 |  0:00:00s
epoch 9  | loss: 10.74776| val_0_rmse: 5.70033 |  0:00:00s
epoch 10 | loss: 6.6894  | val_0_rmse: 6.07578 |  0:00:01s
epoch 11 | loss: 6.3838  | val_0_rmse: 6.29659 |  0:00:01s
epoch 12 | loss: 6.24497 | val_0_rmse: 6.69322 |  0:00:01s
epoch 13 | loss: 6.3382  | val_0_rmse: 6.69701 |  0:00:01s
epoch 14 | loss: 5.23173 | val_0_rmse: 6.47188 |  0:00:01s
epoch 15 | loss: 3.56235 | val_0_rmse: 5.78289 |  0:00:01s
epoch 16 | loss: 2.49014 | val_0_rmse: 5.09298 |  0:0

[I 2025-08-17 19:46:48,141] Trial 111 finished with value: 0.41077587608582405 and parameters: {'n_d': 22, 'n_a': 60, 'n_steps': 3, 'gamma': 1.3468875701484215, 'lambda_sparse': 0.0028448284604161754}. Best is trial 40 with value: 0.2639664310728177.


epoch 62 | loss: 0.08597 | val_0_rmse: 0.44989 |  0:00:05s
epoch 63 | loss: 0.09469 | val_0_rmse: 0.42137 |  0:00:05s

Early stopping occurred at epoch 63 with best_epoch = 53 and best_val_0_rmse = 0.41078




epoch 0  | loss: 100.22435| val_0_rmse: 9.41537 |  0:00:00s
epoch 1  | loss: 75.17268| val_0_rmse: 8.63885 |  0:00:00s
epoch 2  | loss: 55.3292 | val_0_rmse: 7.92298 |  0:00:00s
epoch 3  | loss: 38.95339| val_0_rmse: 7.9611  |  0:00:00s
epoch 4  | loss: 27.52448| val_0_rmse: 7.75356 |  0:00:00s
epoch 5  | loss: 19.49723| val_0_rmse: 8.71604 |  0:00:00s
epoch 6  | loss: 11.65114| val_0_rmse: 11.12847|  0:00:00s
epoch 7  | loss: 9.74356 | val_0_rmse: 10.12264|  0:00:00s
epoch 8  | loss: 7.37445 | val_0_rmse: 7.38904 |  0:00:00s
epoch 9  | loss: 6.07744 | val_0_rmse: 6.51955 |  0:00:01s
epoch 10 | loss: 3.89792 | val_0_rmse: 6.80808 |  0:00:01s
epoch 11 | loss: 2.53371 | val_0_rmse: 5.61603 |  0:00:01s
epoch 12 | loss: 2.18636 | val_0_rmse: 4.75642 |  0:00:01s
epoch 13 | loss: 1.62837 | val_0_rmse: 4.53481 |  0:00:01s
epoch 14 | loss: 1.09041 | val_0_rmse: 4.18325 |  0:00:01s
epoch 15 | loss: 1.06244 | val_0_rmse: 2.96209 |  0:00:01s
epoch 16 | loss: 0.7727  | val_0_rmse: 2.2812  |  0:00:

[I 2025-08-17 19:46:54,516] Trial 112 finished with value: 0.5660680276133204 and parameters: {'n_d': 33, 'n_a': 62, 'n_steps': 3, 'gamma': 1.4338999062116995, 'lambda_sparse': 0.0023481554172484266}. Best is trial 40 with value: 0.2639664310728177.


epoch 0  | loss: 132.85017| val_0_rmse: 12.1436 |  0:00:00s
epoch 1  | loss: 107.65118| val_0_rmse: 8.44597 |  0:00:00s
epoch 2  | loss: 85.51849| val_0_rmse: 7.5246  |  0:00:00s
epoch 3  | loss: 65.51591| val_0_rmse: 8.69211 |  0:00:00s
epoch 4  | loss: 49.28026| val_0_rmse: 9.9002  |  0:00:00s
epoch 5  | loss: 34.25661| val_0_rmse: 11.0857 |  0:00:00s
epoch 6  | loss: 23.08163| val_0_rmse: 13.42454|  0:00:00s
epoch 7  | loss: 13.61432| val_0_rmse: 15.55966|  0:00:00s
epoch 8  | loss: 9.53484 | val_0_rmse: 16.67345|  0:00:00s
epoch 9  | loss: 7.33956 | val_0_rmse: 13.59105|  0:00:00s
epoch 10 | loss: 7.20543 | val_0_rmse: 11.23897|  0:00:01s
epoch 11 | loss: 5.69761 | val_0_rmse: 9.30257 |  0:00:01s
epoch 12 | loss: 4.24611 | val_0_rmse: 6.7857  |  0:00:01s
epoch 13 | loss: 2.26905 | val_0_rmse: 5.68173 |  0:00:01s
epoch 14 | loss: 1.59495 | val_0_rmse: 4.31836 |  0:00:01s
epoch 15 | loss: 1.87456 | val_0_rmse: 3.51267 |  0:00:01s
epoch 16 | loss: 1.77952 | val_0_rmse: 3.38052 |  0:00

[I 2025-08-17 19:47:04,557] Trial 113 finished with value: 0.29086042428624703 and parameters: {'n_d': 35, 'n_a': 57, 'n_steps': 3, 'gamma': 1.4902773539028467, 'lambda_sparse': 0.004175097978357825}. Best is trial 40 with value: 0.2639664310728177.


epoch 0  | loss: 120.13731| val_0_rmse: 9.04738 |  0:00:00s
epoch 1  | loss: 95.41899| val_0_rmse: 8.48292 |  0:00:00s
epoch 2  | loss: 75.33   | val_0_rmse: 7.9304  |  0:00:00s
epoch 3  | loss: 55.97908| val_0_rmse: 6.63144 |  0:00:00s
epoch 4  | loss: 39.57623| val_0_rmse: 12.89517|  0:00:00s
epoch 5  | loss: 25.72484| val_0_rmse: 22.19662|  0:00:00s
epoch 6  | loss: 15.56917| val_0_rmse: 29.41518|  0:00:00s
epoch 7  | loss: 9.67888 | val_0_rmse: 33.63643|  0:00:00s
epoch 8  | loss: 8.28384 | val_0_rmse: 36.82755|  0:00:00s
epoch 9  | loss: 7.60343 | val_0_rmse: 38.81925|  0:00:00s
epoch 10 | loss: 5.8825  | val_0_rmse: 42.11675|  0:00:01s
epoch 11 | loss: 5.03034 | val_0_rmse: 40.29133|  0:00:01s
epoch 12 | loss: 3.44201 | val_0_rmse: 33.17403|  0:00:01s


[I 2025-08-17 19:47:06,001] Trial 114 finished with value: 6.631438753784171 and parameters: {'n_d': 37, 'n_a': 55, 'n_steps': 3, 'gamma': 1.4386049953596887, 'lambda_sparse': 0.00406668523290313}. Best is trial 40 with value: 0.2639664310728177.


epoch 13 | loss: 1.96217 | val_0_rmse: 27.84164|  0:00:01s

Early stopping occurred at epoch 13 with best_epoch = 3 and best_val_0_rmse = 6.63144
epoch 0  | loss: 195.77379| val_0_rmse: 15.27708|  0:00:00s




epoch 1  | loss: 156.13919| val_0_rmse: 10.7995 |  0:00:00s
epoch 2  | loss: 119.64756| val_0_rmse: 9.53588 |  0:00:00s
epoch 3  | loss: 93.65823| val_0_rmse: 8.65369 |  0:00:00s
epoch 4  | loss: 71.33123| val_0_rmse: 8.51704 |  0:00:00s
epoch 5  | loss: 52.517  | val_0_rmse: 8.04743 |  0:00:00s
epoch 6  | loss: 36.22827| val_0_rmse: 8.35766 |  0:00:00s
epoch 7  | loss: 24.01045| val_0_rmse: 7.8863  |  0:00:00s
epoch 8  | loss: 14.00863| val_0_rmse: 8.44325 |  0:00:00s
epoch 9  | loss: 7.6921  | val_0_rmse: 8.96643 |  0:00:01s
epoch 10 | loss: 6.76492 | val_0_rmse: 8.03959 |  0:00:01s
epoch 11 | loss: 6.26999 | val_0_rmse: 7.37643 |  0:00:01s
epoch 12 | loss: 5.73424 | val_0_rmse: 5.41568 |  0:00:01s
epoch 13 | loss: 4.81829 | val_0_rmse: 3.95297 |  0:00:01s
epoch 14 | loss: 2.37544 | val_0_rmse: 3.28814 |  0:00:01s
epoch 15 | loss: 1.7142  | val_0_rmse: 2.98293 |  0:00:01s
epoch 16 | loss: 1.49747 | val_0_rmse: 2.81725 |  0:00:01s
epoch 17 | loss: 1.32318 | val_0_rmse: 2.3903  |  0:00

[I 2025-08-17 19:47:13,057] Trial 115 finished with value: 0.4245125898090095 and parameters: {'n_d': 39, 'n_a': 58, 'n_steps': 3, 'gamma': 1.3710867943088747, 'lambda_sparse': 0.0033325651244681454}. Best is trial 40 with value: 0.2639664310728177.



Early stopping occurred at epoch 65 with best_epoch = 55 and best_val_0_rmse = 0.42451




epoch 0  | loss: 117.93831| val_0_rmse: 12.92034|  0:00:00s
epoch 1  | loss: 87.50351| val_0_rmse: 10.90902|  0:00:00s
epoch 2  | loss: 62.52711| val_0_rmse: 10.22354|  0:00:00s
epoch 3  | loss: 44.50682| val_0_rmse: 11.9111 |  0:00:00s
epoch 4  | loss: 27.06764| val_0_rmse: 11.61734|  0:00:00s
epoch 5  | loss: 16.32181| val_0_rmse: 10.66474|  0:00:00s
epoch 6  | loss: 9.71105 | val_0_rmse: 11.24928|  0:00:00s
epoch 7  | loss: 8.85947 | val_0_rmse: 8.26777 |  0:00:00s
epoch 8  | loss: 7.54857 | val_0_rmse: 6.79592 |  0:00:00s
epoch 9  | loss: 6.37451 | val_0_rmse: 5.43448 |  0:00:00s
epoch 10 | loss: 4.63212 | val_0_rmse: 4.99982 |  0:00:01s
epoch 11 | loss: 2.79014 | val_0_rmse: 5.03402 |  0:00:01s
epoch 12 | loss: 1.7331  | val_0_rmse: 4.59318 |  0:00:01s
epoch 13 | loss: 1.36058 | val_0_rmse: 4.27998 |  0:00:01s
epoch 14 | loss: 1.28641 | val_0_rmse: 3.67427 |  0:00:01s
epoch 15 | loss: 0.90287 | val_0_rmse: 3.07629 |  0:00:01s
epoch 16 | loss: 0.92443 | val_0_rmse: 3.0575  |  0:00:

[I 2025-08-17 19:47:19,738] Trial 116 finished with value: 0.4107432817081414 and parameters: {'n_d': 33, 'n_a': 56, 'n_steps': 3, 'gamma': 1.4741672619103743, 'lambda_sparse': 0.0028334107386524684}. Best is trial 40 with value: 0.2639664310728177.


epoch 66 | loss: 0.10565 | val_0_rmse: 0.41584 |  0:00:06s
epoch 67 | loss: 0.11909 | val_0_rmse: 0.42224 |  0:00:06s

Early stopping occurred at epoch 67 with best_epoch = 57 and best_val_0_rmse = 0.41074




epoch 0  | loss: 161.24696| val_0_rmse: 17.52369|  0:00:00s
epoch 1  | loss: 131.03912| val_0_rmse: 13.79739|  0:00:00s
epoch 2  | loss: 103.83389| val_0_rmse: 9.59461 |  0:00:00s
epoch 3  | loss: 83.49831| val_0_rmse: 8.00588 |  0:00:00s
epoch 4  | loss: 64.50515| val_0_rmse: 7.75687 |  0:00:00s
epoch 5  | loss: 49.22378| val_0_rmse: 7.33025 |  0:00:00s
epoch 6  | loss: 36.7737 | val_0_rmse: 8.32331 |  0:00:00s
epoch 7  | loss: 24.83973| val_0_rmse: 9.10456 |  0:00:00s
epoch 8  | loss: 15.14998| val_0_rmse: 9.6327  |  0:00:00s
epoch 9  | loss: 9.25565 | val_0_rmse: 8.80363 |  0:00:01s
epoch 10 | loss: 5.72818 | val_0_rmse: 7.85364 |  0:00:01s
epoch 11 | loss: 6.48083 | val_0_rmse: 6.77816 |  0:00:01s
epoch 12 | loss: 5.8967  | val_0_rmse: 6.52699 |  0:00:01s
epoch 13 | loss: 4.35705 | val_0_rmse: 5.95154 |  0:00:01s
epoch 14 | loss: 2.52891 | val_0_rmse: 5.412   |  0:00:01s
epoch 15 | loss: 1.47109 | val_0_rmse: 4.85032 |  0:00:01s
epoch 16 | loss: 1.53188 | val_0_rmse: 4.44595 |  0:0

[I 2025-08-17 19:47:30,091] Trial 117 finished with value: 0.32956262482796156 and parameters: {'n_d': 31, 'n_a': 64, 'n_steps': 3, 'gamma': 1.7930995852705784, 'lambda_sparse': 0.0019044000163558326}. Best is trial 40 with value: 0.2639664310728177.


epoch 98 | loss: 0.06974 | val_0_rmse: 0.34699 |  0:00:10s
epoch 99 | loss: 0.07766 | val_0_rmse: 0.3407  |  0:00:10s
Stop training because you reached max_epochs = 100 with best_epoch = 97 and best_val_0_rmse = 0.32956




epoch 0  | loss: 122.15791| val_0_rmse: 10.62839|  0:00:00s
epoch 1  | loss: 91.3927 | val_0_rmse: 8.45113 |  0:00:00s
epoch 2  | loss: 68.32434| val_0_rmse: 8.33675 |  0:00:00s
epoch 3  | loss: 48.20682| val_0_rmse: 8.6775  |  0:00:00s
epoch 4  | loss: 33.38988| val_0_rmse: 7.46265 |  0:00:00s
epoch 5  | loss: 23.83397| val_0_rmse: 6.74805 |  0:00:00s
epoch 6  | loss: 20.54789| val_0_rmse: 7.7286  |  0:00:00s
epoch 7  | loss: 17.7622 | val_0_rmse: 7.41923 |  0:00:00s
epoch 8  | loss: 15.1259 | val_0_rmse: 7.43058 |  0:00:01s
epoch 9  | loss: 12.34462| val_0_rmse: 7.9214  |  0:00:01s
epoch 10 | loss: 12.59755| val_0_rmse: 6.73105 |  0:00:01s
epoch 11 | loss: 8.74844 | val_0_rmse: 6.56406 |  0:00:01s
epoch 12 | loss: 6.81476 | val_0_rmse: 6.22165 |  0:00:01s
epoch 13 | loss: 4.72661 | val_0_rmse: 5.9596  |  0:00:01s
epoch 14 | loss: 4.88913 | val_0_rmse: 5.16277 |  0:00:01s
epoch 15 | loss: 3.81809 | val_0_rmse: 5.20724 |  0:00:01s
epoch 16 | loss: 2.56193 | val_0_rmse: 5.00483 |  0:00:

[I 2025-08-17 19:47:34,542] Trial 118 finished with value: 2.6042830617546375 and parameters: {'n_d': 34, 'n_a': 43, 'n_steps': 4, 'gamma': 1.5859482073224727, 'lambda_sparse': 0.006525601736707683}. Best is trial 40 with value: 0.2639664310728177.


epoch 34 | loss: 0.36389 | val_0_rmse: 3.37278 |  0:00:04s

Early stopping occurred at epoch 34 with best_epoch = 24 and best_val_0_rmse = 2.60428




epoch 0  | loss: 135.60185| val_0_rmse: 9.19416 |  0:00:00s
epoch 1  | loss: 107.88937| val_0_rmse: 8.3763  |  0:00:00s
epoch 2  | loss: 84.68682| val_0_rmse: 7.51841 |  0:00:00s
epoch 3  | loss: 65.4366 | val_0_rmse: 7.0135  |  0:00:00s
epoch 4  | loss: 48.13438| val_0_rmse: 6.04551 |  0:00:00s
epoch 5  | loss: 34.56451| val_0_rmse: 6.06276 |  0:00:00s
epoch 6  | loss: 23.85688| val_0_rmse: 5.33928 |  0:00:00s
epoch 7  | loss: 14.1806 | val_0_rmse: 6.13358 |  0:00:00s
epoch 8  | loss: 9.07598 | val_0_rmse: 6.74784 |  0:00:01s
epoch 9  | loss: 7.27925 | val_0_rmse: 6.69104 |  0:00:01s
epoch 10 | loss: 7.42742 | val_0_rmse: 6.31587 |  0:00:01s
epoch 11 | loss: 6.74448 | val_0_rmse: 5.60123 |  0:00:01s
epoch 12 | loss: 5.11465 | val_0_rmse: 5.62186 |  0:00:01s
epoch 13 | loss: 3.70899 | val_0_rmse: 5.40867 |  0:00:01s
epoch 14 | loss: 2.07911 | val_0_rmse: 5.32264 |  0:00:01s
epoch 15 | loss: 1.52848 | val_0_rmse: 4.76072 |  0:00:01s
epoch 16 | loss: 1.29932 | val_0_rmse: 4.89546 |  0:00

[I 2025-08-17 19:47:41,525] Trial 119 finished with value: 0.5297064405192031 and parameters: {'n_d': 30, 'n_a': 61, 'n_steps': 3, 'gamma': 1.5422930931749241, 'lambda_sparse': 0.004614033757867384}. Best is trial 40 with value: 0.2639664310728177.


epoch 0  | loss: 122.0652| val_0_rmse: 10.04181|  0:00:00s
epoch 1  | loss: 101.82466| val_0_rmse: 10.47896|  0:00:00s
epoch 2  | loss: 82.29736| val_0_rmse: 9.17661 |  0:00:00s
epoch 3  | loss: 65.96831| val_0_rmse: 8.42335 |  0:00:00s
epoch 4  | loss: 50.1025 | val_0_rmse: 7.43003 |  0:00:00s
epoch 5  | loss: 40.00116| val_0_rmse: 6.2427  |  0:00:00s
epoch 6  | loss: 27.42145| val_0_rmse: 6.40946 |  0:00:00s
epoch 7  | loss: 18.87237| val_0_rmse: 6.78456 |  0:00:00s
epoch 8  | loss: 10.91093| val_0_rmse: 6.23755 |  0:00:00s
epoch 9  | loss: 7.19775 | val_0_rmse: 6.28427 |  0:00:01s
epoch 10 | loss: 5.49374 | val_0_rmse: 5.90094 |  0:00:01s
epoch 11 | loss: 5.88437 | val_0_rmse: 5.59133 |  0:00:01s
epoch 12 | loss: 4.65144 | val_0_rmse: 5.326   |  0:00:01s
epoch 13 | loss: 2.91414 | val_0_rmse: 4.57273 |  0:00:01s
epoch 14 | loss: 1.42089 | val_0_rmse: 3.72919 |  0:00:01s
epoch 15 | loss: 1.49401 | val_0_rmse: 3.20389 |  0:00:01s
epoch 16 | loss: 1.24082 | val_0_rmse: 2.9882  |  0:00:

[I 2025-08-17 19:47:49,668] Trial 120 finished with value: 0.287677276951161 and parameters: {'n_d': 27, 'n_a': 57, 'n_steps': 3, 'gamma': 1.6471401849551706, 'lambda_sparse': 0.0022477708825786005}. Best is trial 40 with value: 0.2639664310728177.


epoch 0  | loss: 122.06396| val_0_rmse: 10.08799|  0:00:00s
epoch 1  | loss: 101.98878| val_0_rmse: 9.15743 |  0:00:00s
epoch 2  | loss: 83.48256| val_0_rmse: 9.21057 |  0:00:00s
epoch 3  | loss: 64.15379| val_0_rmse: 8.02968 |  0:00:00s
epoch 4  | loss: 49.18472| val_0_rmse: 7.85617 |  0:00:00s
epoch 5  | loss: 35.90409| val_0_rmse: 9.75395 |  0:00:00s
epoch 6  | loss: 24.41485| val_0_rmse: 11.47929|  0:00:00s
epoch 7  | loss: 13.75658| val_0_rmse: 14.51412|  0:00:00s
epoch 8  | loss: 9.78266 | val_0_rmse: 15.00122|  0:00:00s
epoch 9  | loss: 7.0934  | val_0_rmse: 13.42959|  0:00:01s
epoch 10 | loss: 6.9575  | val_0_rmse: 10.39475|  0:00:01s
epoch 11 | loss: 6.53353 | val_0_rmse: 6.97558 |  0:00:01s
epoch 12 | loss: 4.64243 | val_0_rmse: 4.62411 |  0:00:01s
epoch 13 | loss: 2.86534 | val_0_rmse: 2.90444 |  0:00:01s
epoch 14 | loss: 1.58508 | val_0_rmse: 2.78957 |  0:00:01s
epoch 15 | loss: 1.43987 | val_0_rmse: 2.70669 |  0:00:01s
epoch 16 | loss: 1.8583  | val_0_rmse: 2.55428 |  0:00

[I 2025-08-17 19:48:00,063] Trial 121 finished with value: 0.411297430516546 and parameters: {'n_d': 27, 'n_a': 57, 'n_steps': 3, 'gamma': 1.6456191302281828, 'lambda_sparse': 0.00226701641353157}. Best is trial 40 with value: 0.2639664310728177.


epoch 98 | loss: 0.05636 | val_0_rmse: 0.42635 |  0:00:10s
epoch 99 | loss: 0.05446 | val_0_rmse: 0.4113  |  0:00:10s
Stop training because you reached max_epochs = 100 with best_epoch = 99 and best_val_0_rmse = 0.4113




epoch 0  | loss: 149.15852| val_0_rmse: 10.40444|  0:00:00s
epoch 1  | loss: 112.92605| val_0_rmse: 9.21415 |  0:00:00s
epoch 2  | loss: 86.02887| val_0_rmse: 8.88286 |  0:00:00s
epoch 3  | loss: 64.28912| val_0_rmse: 14.81949|  0:00:00s
epoch 4  | loss: 46.8966 | val_0_rmse: 18.49487|  0:00:00s
epoch 5  | loss: 34.95116| val_0_rmse: 18.10995|  0:00:00s
epoch 6  | loss: 20.49463| val_0_rmse: 15.77696|  0:00:00s
epoch 7  | loss: 13.28352| val_0_rmse: 13.30409|  0:00:00s
epoch 8  | loss: 8.34526 | val_0_rmse: 9.20325 |  0:00:00s
epoch 9  | loss: 7.373   | val_0_rmse: 5.99596 |  0:00:00s
epoch 10 | loss: 7.94534 | val_0_rmse: 4.89028 |  0:00:01s
epoch 11 | loss: 6.4968  | val_0_rmse: 4.42742 |  0:00:01s
epoch 12 | loss: 3.84893 | val_0_rmse: 4.66908 |  0:00:01s
epoch 13 | loss: 2.46586 | val_0_rmse: 4.28566 |  0:00:01s
epoch 14 | loss: 1.78895 | val_0_rmse: 3.93035 |  0:00:01s
epoch 15 | loss: 1.81579 | val_0_rmse: 3.60521 |  0:00:01s
epoch 16 | loss: 1.26764 | val_0_rmse: 3.11574 |  0:00

[I 2025-08-17 19:48:08,280] Trial 122 finished with value: 0.4385275952298454 and parameters: {'n_d': 29, 'n_a': 58, 'n_steps': 3, 'gamma': 1.621048443715518, 'lambda_sparse': 0.01257750748337098}. Best is trial 40 with value: 0.2639664310728177.


epoch 0  | loss: 150.01399| val_0_rmse: 10.78188|  0:00:00s
epoch 1  | loss: 121.88497| val_0_rmse: 8.95537 |  0:00:00s
epoch 2  | loss: 96.11836| val_0_rmse: 8.57431 |  0:00:00s
epoch 3  | loss: 76.44305| val_0_rmse: 7.86983 |  0:00:00s
epoch 4  | loss: 57.26341| val_0_rmse: 6.69668 |  0:00:00s
epoch 5  | loss: 41.65442| val_0_rmse: 6.26371 |  0:00:00s
epoch 6  | loss: 28.80507| val_0_rmse: 7.2357  |  0:00:00s
epoch 7  | loss: 18.97693| val_0_rmse: 9.80681 |  0:00:00s
epoch 8  | loss: 11.56102| val_0_rmse: 16.48828|  0:00:00s
epoch 9  | loss: 8.3499  | val_0_rmse: 22.20035|  0:00:01s
epoch 10 | loss: 7.02761 | val_0_rmse: 26.68438|  0:00:01s
epoch 11 | loss: 7.19249 | val_0_rmse: 25.65531|  0:00:01s
epoch 12 | loss: 7.33738 | val_0_rmse: 21.90833|  0:00:01s
epoch 13 | loss: 4.88538 | val_0_rmse: 16.75021|  0:00:01s
epoch 14 | loss: 3.14663 | val_0_rmse: 12.93552|  0:00:01s


[I 2025-08-17 19:48:10,047] Trial 123 finished with value: 6.263707657653406 and parameters: {'n_d': 35, 'n_a': 60, 'n_steps': 3, 'gamma': 1.705450394815359, 'lambda_sparse': 0.0007758083429532216}. Best is trial 40 with value: 0.2639664310728177.


epoch 15 | loss: 1.75229 | val_0_rmse: 9.60881 |  0:00:01s

Early stopping occurred at epoch 15 with best_epoch = 5 and best_val_0_rmse = 6.26371




epoch 0  | loss: 144.02208| val_0_rmse: 9.45148 |  0:00:00s
epoch 1  | loss: 96.77456| val_0_rmse: 8.35013 |  0:00:00s
epoch 2  | loss: 64.74246| val_0_rmse: 7.98356 |  0:00:00s
epoch 3  | loss: 45.0215 | val_0_rmse: 11.10807|  0:00:00s
epoch 4  | loss: 31.3128 | val_0_rmse: 13.6316 |  0:00:00s
epoch 5  | loss: 25.09155| val_0_rmse: 16.03144|  0:00:00s
epoch 6  | loss: 17.95515| val_0_rmse: 16.53442|  0:00:00s
epoch 7  | loss: 15.99778| val_0_rmse: 13.96887|  0:00:00s
epoch 8  | loss: 10.5453 | val_0_rmse: 10.99538|  0:00:00s
epoch 9  | loss: 7.67952 | val_0_rmse: 9.37732 |  0:00:01s
epoch 10 | loss: 4.60636 | val_0_rmse: 8.21609 |  0:00:01s
epoch 11 | loss: 3.08227 | val_0_rmse: 7.19337 |  0:00:01s
epoch 12 | loss: 3.00596 | val_0_rmse: 6.54741 |  0:00:01s
epoch 13 | loss: 2.02484 | val_0_rmse: 5.23354 |  0:00:01s
epoch 14 | loss: 1.55051 | val_0_rmse: 4.16721 |  0:00:01s
epoch 15 | loss: 1.19797 | val_0_rmse: 3.43339 |  0:00:01s
epoch 16 | loss: 0.91527 | val_0_rmse: 3.1307  |  0:00:

[I 2025-08-17 19:48:19,925] Trial 124 finished with value: 0.3070496864918863 and parameters: {'n_d': 60, 'n_a': 54, 'n_steps': 3, 'gamma': 1.55959452623553, 'lambda_sparse': 0.0016601260174063713}. Best is trial 40 with value: 0.2639664310728177.


epoch 0  | loss: 136.31235| val_0_rmse: 9.94474 |  0:00:00s
epoch 1  | loss: 98.10796| val_0_rmse: 10.67739|  0:00:00s
epoch 2  | loss: 70.34682| val_0_rmse: 12.01929|  0:00:00s
epoch 3  | loss: 49.65948| val_0_rmse: 12.18651|  0:00:00s
epoch 4  | loss: 36.44108| val_0_rmse: 11.73079|  0:00:00s
epoch 5  | loss: 28.46268| val_0_rmse: 9.43762 |  0:00:00s
epoch 6  | loss: 19.82272| val_0_rmse: 9.52943 |  0:00:00s
epoch 7  | loss: 15.44867| val_0_rmse: 9.82143 |  0:00:00s
epoch 8  | loss: 11.1919 | val_0_rmse: 10.96553|  0:00:00s
epoch 9  | loss: 10.44856| val_0_rmse: 11.63671|  0:00:00s
epoch 10 | loss: 7.36339 | val_0_rmse: 12.79383|  0:00:01s
epoch 11 | loss: 5.59773 | val_0_rmse: 10.98541|  0:00:01s
epoch 12 | loss: 3.82549 | val_0_rmse: 8.23141 |  0:00:01s
epoch 13 | loss: 2.70972 | val_0_rmse: 6.79153 |  0:00:01s
epoch 14 | loss: 2.40114 | val_0_rmse: 6.23161 |  0:00:01s
epoch 15 | loss: 1.69194 | val_0_rmse: 5.88063 |  0:00:01s
epoch 16 | loss: 1.15826 | val_0_rmse: 5.65612 |  0:00:

[I 2025-08-17 19:48:29,868] Trial 125 finished with value: 0.2920101082750822 and parameters: {'n_d': 51, 'n_a': 45, 'n_steps': 3, 'gamma': 1.6706252945013311, 'lambda_sparse': 0.0030487020808835064}. Best is trial 40 with value: 0.2639664310728177.


epoch 0  | loss: 148.43922| val_0_rmse: 14.04769|  0:00:00s
epoch 1  | loss: 105.35107| val_0_rmse: 8.89942 |  0:00:00s
epoch 2  | loss: 73.48329| val_0_rmse: 7.96923 |  0:00:00s
epoch 3  | loss: 49.9119 | val_0_rmse: 8.54302 |  0:00:00s
epoch 4  | loss: 31.23933| val_0_rmse: 9.10656 |  0:00:00s
epoch 5  | loss: 21.86106| val_0_rmse: 11.11766|  0:00:00s
epoch 6  | loss: 14.66595| val_0_rmse: 12.31541|  0:00:00s
epoch 7  | loss: 12.89148| val_0_rmse: 12.30227|  0:00:00s
epoch 8  | loss: 11.6687 | val_0_rmse: 11.8955 |  0:00:00s
epoch 9  | loss: 8.56445 | val_0_rmse: 10.39303|  0:00:00s
epoch 10 | loss: 6.49991 | val_0_rmse: 8.00456 |  0:00:01s
epoch 11 | loss: 4.08954 | val_0_rmse: 5.99862 |  0:00:01s
epoch 12 | loss: 2.94622 | val_0_rmse: 4.62163 |  0:00:01s
epoch 13 | loss: 2.4004  | val_0_rmse: 3.84929 |  0:00:01s
epoch 14 | loss: 1.94245 | val_0_rmse: 3.7189  |  0:00:01s
epoch 15 | loss: 1.54421 | val_0_rmse: 3.85554 |  0:00:01s
epoch 16 | loss: 1.20843 | val_0_rmse: 3.95264 |  0:00

[I 2025-08-17 19:48:36,180] Trial 126 finished with value: 0.5932298721839582 and parameters: {'n_d': 52, 'n_a': 45, 'n_steps': 3, 'gamma': 1.6628780265224492, 'lambda_sparse': 0.0030744654935283867}. Best is trial 40 with value: 0.2639664310728177.


epoch 0  | loss: 123.56294| val_0_rmse: 8.98782 |  0:00:00s
epoch 1  | loss: 85.66255| val_0_rmse: 7.51914 |  0:00:00s
epoch 2  | loss: 59.37442| val_0_rmse: 6.98117 |  0:00:00s
epoch 3  | loss: 38.40799| val_0_rmse: 7.47827 |  0:00:00s
epoch 4  | loss: 22.80649| val_0_rmse: 8.39249 |  0:00:00s
epoch 5  | loss: 16.36857| val_0_rmse: 9.31967 |  0:00:00s
epoch 6  | loss: 15.45892| val_0_rmse: 11.57943|  0:00:00s
epoch 7  | loss: 13.83018| val_0_rmse: 19.86359|  0:00:00s
epoch 8  | loss: 11.6462 | val_0_rmse: 20.10059|  0:00:00s
epoch 9  | loss: 6.38919 | val_0_rmse: 14.38905|  0:00:01s
epoch 10 | loss: 3.92006 | val_0_rmse: 10.78702|  0:00:01s


[I 2025-08-17 19:48:37,646] Trial 127 finished with value: 6.981168259913109 and parameters: {'n_d': 55, 'n_a': 42, 'n_steps': 3, 'gamma': 1.6016727190549633, 'lambda_sparse': 0.0037272633322288353}. Best is trial 40 with value: 0.2639664310728177.


epoch 11 | loss: 3.41364 | val_0_rmse: 9.75166 |  0:00:01s
epoch 12 | loss: 3.28974 | val_0_rmse: 9.75973 |  0:00:01s

Early stopping occurred at epoch 12 with best_epoch = 2 and best_val_0_rmse = 6.98117




epoch 0  | loss: 105.72083| val_0_rmse: 9.23457 |  0:00:00s
epoch 1  | loss: 72.13537| val_0_rmse: 8.49708 |  0:00:00s
epoch 2  | loss: 44.66126| val_0_rmse: 13.73175|  0:00:00s
epoch 3  | loss: 28.11966| val_0_rmse: 17.53199|  0:00:00s
epoch 4  | loss: 21.23295| val_0_rmse: 20.31233|  0:00:00s
epoch 5  | loss: 17.44771| val_0_rmse: 19.98581|  0:00:00s
epoch 6  | loss: 15.61265| val_0_rmse: 18.35912|  0:00:00s
epoch 7  | loss: 13.92333| val_0_rmse: 17.36386|  0:00:00s
epoch 8  | loss: 9.97427 | val_0_rmse: 14.20442|  0:00:01s
epoch 9  | loss: 7.75651 | val_0_rmse: 10.30401|  0:00:01s
epoch 10 | loss: 4.20975 | val_0_rmse: 8.66    |  0:00:01s
epoch 11 | loss: 3.81428 | val_0_rmse: 7.6232  |  0:00:01s
epoch 12 | loss: 3.21959 | val_0_rmse: 7.31653 |  0:00:01s
epoch 13 | loss: 2.41886 | val_0_rmse: 7.27703 |  0:00:01s
epoch 14 | loss: 1.49439 | val_0_rmse: 7.39812 |  0:00:01s
epoch 15 | loss: 1.88835 | val_0_rmse: 6.96515 |  0:00:01s
epoch 16 | loss: 1.53325 | val_0_rmse: 5.85589 |  0:00:

[I 2025-08-17 19:48:45,339] Trial 128 finished with value: 0.6855749044565306 and parameters: {'n_d': 50, 'n_a': 40, 'n_steps': 4, 'gamma': 1.6359549021328934, 'lambda_sparse': 0.0025051455785173875}. Best is trial 40 with value: 0.2639664310728177.


epoch 60 | loss: 0.27733 | val_0_rmse: 0.72067 |  0:00:07s
epoch 61 | loss: 0.30215 | val_0_rmse: 0.75242 |  0:00:07s

Early stopping occurred at epoch 61 with best_epoch = 51 and best_val_0_rmse = 0.68557




epoch 0  | loss: 186.85527| val_0_rmse: 15.64186|  0:00:00s
epoch 1  | loss: 137.27119| val_0_rmse: 10.33633|  0:00:00s
epoch 2  | loss: 101.91725| val_0_rmse: 9.61548 |  0:00:00s
epoch 3  | loss: 75.04209| val_0_rmse: 11.13674|  0:00:00s
epoch 4  | loss: 53.04955| val_0_rmse: 12.13543|  0:00:00s
epoch 5  | loss: 36.01164| val_0_rmse: 11.4407 |  0:00:00s
epoch 6  | loss: 22.68809| val_0_rmse: 8.74525 |  0:00:00s
epoch 7  | loss: 13.90018| val_0_rmse: 10.33056|  0:00:00s
epoch 8  | loss: 10.63039| val_0_rmse: 12.08046|  0:00:00s
epoch 9  | loss: 9.23196 | val_0_rmse: 12.67701|  0:00:00s
epoch 10 | loss: 8.81612 | val_0_rmse: 12.72297|  0:00:01s
epoch 11 | loss: 5.73692 | val_0_rmse: 9.13394 |  0:00:01s
epoch 12 | loss: 3.92175 | val_0_rmse: 6.64764 |  0:00:01s
epoch 13 | loss: 2.58059 | val_0_rmse: 4.1184  |  0:00:01s
epoch 14 | loss: 2.17309 | val_0_rmse: 3.32043 |  0:00:01s
epoch 15 | loss: 2.12779 | val_0_rmse: 3.90516 |  0:00:01s
epoch 16 | loss: 1.49229 | val_0_rmse: 3.28875 |  0:0

[I 2025-08-17 19:48:49,331] Trial 129 finished with value: 1.0783348987892192 and parameters: {'n_d': 48, 'n_a': 48, 'n_steps': 3, 'gamma': 1.4921312132998827, 'lambda_sparse': 0.0043567225181152785}. Best is trial 40 with value: 0.2639664310728177.


epoch 38 | loss: 0.26153 | val_0_rmse: 1.16512 |  0:00:03s
epoch 39 | loss: 0.17439 | val_0_rmse: 1.1229  |  0:00:03s

Early stopping occurred at epoch 39 with best_epoch = 29 and best_val_0_rmse = 1.07833




epoch 0  | loss: 174.10524| val_0_rmse: 10.69859|  0:00:00s
epoch 1  | loss: 127.8814| val_0_rmse: 9.09874 |  0:00:00s
epoch 2  | loss: 92.79778| val_0_rmse: 8.6429  |  0:00:00s
epoch 3  | loss: 65.41234| val_0_rmse: 8.26979 |  0:00:00s
epoch 4  | loss: 42.69963| val_0_rmse: 11.64266|  0:00:00s
epoch 5  | loss: 26.61101| val_0_rmse: 11.57042|  0:00:00s
epoch 6  | loss: 18.77837| val_0_rmse: 14.81809|  0:00:00s
epoch 7  | loss: 14.10755| val_0_rmse: 15.86023|  0:00:00s
epoch 8  | loss: 11.39785| val_0_rmse: 18.14632|  0:00:00s
epoch 9  | loss: 8.94709 | val_0_rmse: 18.30267|  0:00:01s
epoch 10 | loss: 7.10607 | val_0_rmse: 19.86235|  0:00:01s
epoch 11 | loss: 4.5079  | val_0_rmse: 17.66093|  0:00:01s
epoch 12 | loss: 2.32874 | val_0_rmse: 17.36938|  0:00:01s


[I 2025-08-17 19:48:50,839] Trial 130 finished with value: 8.26979247151676 and parameters: {'n_d': 44, 'n_a': 46, 'n_steps': 3, 'gamma': 1.5821887021987104, 'lambda_sparse': 0.009280822497785375}. Best is trial 40 with value: 0.2639664310728177.


epoch 13 | loss: 1.9518  | val_0_rmse: 15.04404|  0:00:01s

Early stopping occurred at epoch 13 with best_epoch = 3 and best_val_0_rmse = 8.26979




epoch 0  | loss: 142.94405| val_0_rmse: 13.72339|  0:00:00s
epoch 1  | loss: 118.3073| val_0_rmse: 10.42544|  0:00:00s
epoch 2  | loss: 95.94291| val_0_rmse: 8.70214 |  0:00:00s
epoch 3  | loss: 77.68661| val_0_rmse: 8.09191 |  0:00:00s
epoch 4  | loss: 62.50424| val_0_rmse: 10.16302|  0:00:00s
epoch 5  | loss: 46.57201| val_0_rmse: 15.32711|  0:00:00s
epoch 6  | loss: 34.68135| val_0_rmse: 19.89349|  0:00:00s
epoch 7  | loss: 24.42089| val_0_rmse: 24.19248|  0:00:00s
epoch 8  | loss: 16.48008| val_0_rmse: 24.01339|  0:00:00s
epoch 9  | loss: 9.67575 | val_0_rmse: 22.27882|  0:00:00s
epoch 10 | loss: 8.24829 | val_0_rmse: 18.39939|  0:00:01s
epoch 11 | loss: 8.1238  | val_0_rmse: 15.43565|  0:00:01s


[I 2025-08-17 19:48:52,304] Trial 131 finished with value: 8.091912073905544 and parameters: {'n_d': 31, 'n_a': 57, 'n_steps': 3, 'gamma': 1.6847360085895953, 'lambda_sparse': 0.0019080667577552294}. Best is trial 40 with value: 0.2639664310728177.


epoch 12 | loss: 7.97788 | val_0_rmse: 13.98617|  0:00:01s
epoch 13 | loss: 6.69336 | val_0_rmse: 12.15538|  0:00:01s

Early stopping occurred at epoch 13 with best_epoch = 3 and best_val_0_rmse = 8.09191




epoch 0  | loss: 110.14651| val_0_rmse: 10.95882|  0:00:00s
epoch 1  | loss: 85.50452| val_0_rmse: 11.65182|  0:00:00s
epoch 2  | loss: 66.22829| val_0_rmse: 9.47636 |  0:00:00s
epoch 3  | loss: 47.51572| val_0_rmse: 8.91485 |  0:00:00s
epoch 4  | loss: 29.95952| val_0_rmse: 8.43551 |  0:00:00s
epoch 5  | loss: 21.76608| val_0_rmse: 10.30086|  0:00:00s
epoch 6  | loss: 15.18737| val_0_rmse: 11.69671|  0:00:00s
epoch 7  | loss: 9.76527 | val_0_rmse: 12.8058 |  0:00:00s
epoch 8  | loss: 10.93393| val_0_rmse: 15.22964|  0:00:00s
epoch 9  | loss: 8.73141 | val_0_rmse: 16.9625 |  0:00:00s
epoch 10 | loss: 6.98835 | val_0_rmse: 16.58843|  0:00:01s
epoch 11 | loss: 4.71256 | val_0_rmse: 14.28995|  0:00:01s
epoch 12 | loss: 3.35923 | val_0_rmse: 11.18364|  0:00:01s
epoch 13 | loss: 2.19404 | val_0_rmse: 9.64731 |  0:00:01s
epoch 14 | loss: 2.23812 | val_0_rmse: 7.32509 |  0:00:01s
epoch 15 | loss: 2.10121 | val_0_rmse: 7.12699 |  0:00:01s
epoch 16 | loss: 1.74351 | val_0_rmse: 7.12666 |  0:00:

[I 2025-08-17 19:49:01,851] Trial 132 finished with value: 0.37294626701068545 and parameters: {'n_d': 34, 'n_a': 44, 'n_steps': 3, 'gamma': 1.7170332523228, 'lambda_sparse': 0.02031384490142319}. Best is trial 40 with value: 0.2639664310728177.


epoch 98 | loss: 0.10667 | val_0_rmse: 0.42483 |  0:00:09s
epoch 99 | loss: 0.0894  | val_0_rmse: 0.37295 |  0:00:09s
Stop training because you reached max_epochs = 100 with best_epoch = 99 and best_val_0_rmse = 0.37295




epoch 0  | loss: 164.83391| val_0_rmse: 12.51104|  0:00:00s
epoch 1  | loss: 127.14268| val_0_rmse: 9.28389 |  0:00:00s
epoch 2  | loss: 99.95326| val_0_rmse: 7.81425 |  0:00:00s
epoch 3  | loss: 73.89594| val_0_rmse: 7.54613 |  0:00:00s
epoch 4  | loss: 53.91378| val_0_rmse: 7.55804 |  0:00:00s
epoch 5  | loss: 40.46105| val_0_rmse: 7.83755 |  0:00:00s
epoch 6  | loss: 30.11935| val_0_rmse: 9.9797  |  0:00:00s
epoch 7  | loss: 21.05816| val_0_rmse: 11.33765|  0:00:00s
epoch 8  | loss: 13.52929| val_0_rmse: 13.33373|  0:00:00s
epoch 9  | loss: 9.00421 | val_0_rmse: 13.91982|  0:00:00s
epoch 10 | loss: 6.54023 | val_0_rmse: 13.0545 |  0:00:01s
epoch 11 | loss: 5.04626 | val_0_rmse: 10.96783|  0:00:01s
epoch 12 | loss: 5.27177 | val_0_rmse: 9.66241 |  0:00:01s
epoch 13 | loss: 4.11045 | val_0_rmse: 8.50071 |  0:00:01s

Early stopping occurred at epoch 13 with best_epoch = 3 and best_val_0_rmse = 7.54613


[I 2025-08-17 19:49:03,293] Trial 133 finished with value: 7.546128465030668 and parameters: {'n_d': 33, 'n_a': 55, 'n_steps': 3, 'gamma': 1.6184031839802382, 'lambda_sparse': 0.0031142340355795155}. Best is trial 40 with value: 0.2639664310728177.


epoch 0  | loss: 132.78111| val_0_rmse: 11.53591|  0:00:00s
epoch 1  | loss: 101.58134| val_0_rmse: 9.46458 |  0:00:00s
epoch 2  | loss: 75.16674| val_0_rmse: 8.07447 |  0:00:00s
epoch 3  | loss: 53.01017| val_0_rmse: 9.38731 |  0:00:00s
epoch 4  | loss: 36.75168| val_0_rmse: 8.34956 |  0:00:00s
epoch 5  | loss: 23.42022| val_0_rmse: 9.59911 |  0:00:00s
epoch 6  | loss: 14.2499 | val_0_rmse: 11.13184|  0:00:00s
epoch 7  | loss: 9.67192 | val_0_rmse: 12.44093|  0:00:00s
epoch 8  | loss: 8.15063 | val_0_rmse: 11.59629|  0:00:00s
epoch 9  | loss: 7.39352 | val_0_rmse: 9.71893 |  0:00:01s
epoch 10 | loss: 6.80834 | val_0_rmse: 6.427   |  0:00:01s
epoch 11 | loss: 4.61071 | val_0_rmse: 5.16426 |  0:00:01s
epoch 12 | loss: 2.68927 | val_0_rmse: 4.43055 |  0:00:01s
epoch 13 | loss: 1.99515 | val_0_rmse: 4.06981 |  0:00:01s
epoch 14 | loss: 1.75841 | val_0_rmse: 3.85245 |  0:00:01s
epoch 15 | loss: 1.38679 | val_0_rmse: 3.51776 |  0:00:01s
epoch 16 | loss: 0.93249 | val_0_rmse: 3.04848 |  0:00

[I 2025-08-17 19:49:11,404] Trial 134 finished with value: 0.37754354580137656 and parameters: {'n_d': 38, 'n_a': 59, 'n_steps': 3, 'gamma': 1.6595687202206522, 'lambda_sparse': 0.0021150355889957}. Best is trial 40 with value: 0.2639664310728177.


epoch 0  | loss: 165.27193| val_0_rmse: 13.51166|  0:00:00s
epoch 1  | loss: 126.20945| val_0_rmse: 11.49739|  0:00:00s
epoch 2  | loss: 98.56877| val_0_rmse: 12.4745 |  0:00:00s
epoch 3  | loss: 75.13819| val_0_rmse: 6.71991 |  0:00:00s
epoch 4  | loss: 56.69085| val_0_rmse: 7.65283 |  0:00:00s
epoch 5  | loss: 38.82929| val_0_rmse: 10.57084|  0:00:00s
epoch 6  | loss: 27.24089| val_0_rmse: 11.89398|  0:00:00s
epoch 7  | loss: 16.07997| val_0_rmse: 15.79631|  0:00:00s
epoch 8  | loss: 9.54292 | val_0_rmse: 17.41038|  0:00:00s
epoch 9  | loss: 6.92822 | val_0_rmse: 17.10417|  0:00:00s
epoch 10 | loss: 5.43495 | val_0_rmse: 15.06132|  0:00:01s
epoch 11 | loss: 6.04795 | val_0_rmse: 13.50167|  0:00:01s


[I 2025-08-17 19:49:12,838] Trial 135 finished with value: 6.7199058363301765 and parameters: {'n_d': 37, 'n_a': 51, 'n_steps': 3, 'gamma': 1.45590942637787, 'lambda_sparse': 0.0025834201688029416}. Best is trial 40 with value: 0.2639664310728177.


epoch 12 | loss: 3.69301 | val_0_rmse: 11.87752|  0:00:01s
epoch 13 | loss: 2.6967  | val_0_rmse: 9.16741 |  0:00:01s

Early stopping occurred at epoch 13 with best_epoch = 3 and best_val_0_rmse = 6.71991




epoch 0  | loss: 156.88098| val_0_rmse: 22.13436|  0:00:00s
epoch 1  | loss: 130.99335| val_0_rmse: 11.25591|  0:00:00s
epoch 2  | loss: 106.47104| val_0_rmse: 9.17781 |  0:00:00s
epoch 3  | loss: 87.22807| val_0_rmse: 8.18565 |  0:00:00s
epoch 4  | loss: 70.13359| val_0_rmse: 7.3661  |  0:00:00s
epoch 5  | loss: 54.55819| val_0_rmse: 6.86566 |  0:00:00s
epoch 6  | loss: 39.97697| val_0_rmse: 6.49362 |  0:00:00s
epoch 7  | loss: 29.2676 | val_0_rmse: 6.42047 |  0:00:00s
epoch 8  | loss: 19.23984| val_0_rmse: 6.6598  |  0:00:00s
epoch 9  | loss: 11.41762| val_0_rmse: 7.64755 |  0:00:00s
epoch 10 | loss: 7.89799 | val_0_rmse: 8.47581 |  0:00:01s
epoch 11 | loss: 6.53095 | val_0_rmse: 9.20338 |  0:00:01s
epoch 12 | loss: 6.63563 | val_0_rmse: 9.00166 |  0:00:01s
epoch 13 | loss: 5.6497  | val_0_rmse: 8.9037  |  0:00:01s
epoch 14 | loss: 4.33412 | val_0_rmse: 8.82384 |  0:00:01s
epoch 15 | loss: 2.51222 | val_0_rmse: 8.07028 |  0:00:01s
epoch 16 | loss: 1.74553 | val_0_rmse: 6.96004 |  0:0

[I 2025-08-17 19:49:14,685] Trial 136 finished with value: 6.42046706933697 and parameters: {'n_d': 28, 'n_a': 62, 'n_steps': 3, 'gamma': 1.7441809852159595, 'lambda_sparse': 0.003668620336564245}. Best is trial 40 with value: 0.2639664310728177.


epoch 0  | loss: 175.50294| val_0_rmse: 17.69761|  0:00:00s
epoch 1  | loss: 141.08952| val_0_rmse: 12.22287|  0:00:00s
epoch 2  | loss: 114.87172| val_0_rmse: 9.89904 |  0:00:00s
epoch 3  | loss: 92.93217| val_0_rmse: 10.1739 |  0:00:00s
epoch 4  | loss: 74.93057| val_0_rmse: 9.75293 |  0:00:00s
epoch 5  | loss: 56.92878| val_0_rmse: 8.66952 |  0:00:00s
epoch 6  | loss: 42.33541| val_0_rmse: 7.72741 |  0:00:00s
epoch 7  | loss: 29.73349| val_0_rmse: 9.91833 |  0:00:00s
epoch 8  | loss: 19.1352 | val_0_rmse: 14.05286|  0:00:00s
epoch 9  | loss: 12.53254| val_0_rmse: 17.49881|  0:00:00s
epoch 10 | loss: 7.98526 | val_0_rmse: 19.69919|  0:00:01s
epoch 11 | loss: 6.3842  | val_0_rmse: 19.07495|  0:00:01s
epoch 12 | loss: 5.47735 | val_0_rmse: 19.23874|  0:00:01s
epoch 13 | loss: 4.78981 | val_0_rmse: 17.26283|  0:00:01s
epoch 14 | loss: 3.6008  | val_0_rmse: 11.10357|  0:00:01s
epoch 15 | loss: 2.49399 | val_0_rmse: 7.66345 |  0:00:01s
epoch 16 | loss: 1.47901 | val_0_rmse: 5.80693 |  0:0

[I 2025-08-17 19:49:24,581] Trial 137 finished with value: 0.4319328985851184 and parameters: {'n_d': 30, 'n_a': 57, 'n_steps': 3, 'gamma': 1.417144684852084, 'lambda_sparse': 0.0028240656166655716}. Best is trial 40 with value: 0.2639664310728177.


epoch 0  | loss: 139.22141| val_0_rmse: 11.5314 |  0:00:00s
epoch 1  | loss: 97.84793| val_0_rmse: 9.1973  |  0:00:00s
epoch 2  | loss: 68.75944| val_0_rmse: 9.2505  |  0:00:00s
epoch 3  | loss: 45.76926| val_0_rmse: 8.90378 |  0:00:00s
epoch 4  | loss: 31.31299| val_0_rmse: 7.59772 |  0:00:00s
epoch 5  | loss: 21.98835| val_0_rmse: 9.0176  |  0:00:00s
epoch 6  | loss: 17.03862| val_0_rmse: 8.92065 |  0:00:00s
epoch 7  | loss: 14.04648| val_0_rmse: 9.44403 |  0:00:01s
epoch 8  | loss: 13.94457| val_0_rmse: 9.00141 |  0:00:01s
epoch 9  | loss: 10.64617| val_0_rmse: 8.24373 |  0:00:01s
epoch 10 | loss: 7.72452 | val_0_rmse: 7.55163 |  0:00:01s
epoch 11 | loss: 4.45666 | val_0_rmse: 7.26335 |  0:00:01s
epoch 12 | loss: 3.837   | val_0_rmse: 6.79617 |  0:00:01s
epoch 13 | loss: 3.9823  | val_0_rmse: 6.20874 |  0:00:01s
epoch 14 | loss: 3.24821 | val_0_rmse: 5.88253 |  0:00:01s
epoch 15 | loss: 1.657   | val_0_rmse: 5.85708 |  0:00:02s
epoch 16 | loss: 1.76997 | val_0_rmse: 4.94909 |  0:00:

[I 2025-08-17 19:49:37,550] Trial 138 finished with value: 0.5122478533984858 and parameters: {'n_d': 32, 'n_a': 61, 'n_steps': 4, 'gamma': 1.3885196261797974, 'lambda_sparse': 0.007916769011625401}. Best is trial 40 with value: 0.2639664310728177.


epoch 0  | loss: 130.13736| val_0_rmse: 10.31741|  0:00:00s
epoch 1  | loss: 97.45548| val_0_rmse: 8.05968 |  0:00:00s
epoch 2  | loss: 72.95202| val_0_rmse: 7.54395 |  0:00:00s
epoch 3  | loss: 52.57598| val_0_rmse: 7.49684 |  0:00:00s
epoch 4  | loss: 37.89956| val_0_rmse: 8.01344 |  0:00:00s
epoch 5  | loss: 23.04908| val_0_rmse: 10.33401|  0:00:00s
epoch 6  | loss: 15.69259| val_0_rmse: 13.32667|  0:00:00s
epoch 7  | loss: 10.41182| val_0_rmse: 17.68594|  0:00:00s
epoch 8  | loss: 8.90478 | val_0_rmse: 19.26994|  0:00:00s
epoch 9  | loss: 9.43172 | val_0_rmse: 15.96662|  0:00:00s
epoch 10 | loss: 7.40746 | val_0_rmse: 12.76366|  0:00:01s
epoch 11 | loss: 4.93035 | val_0_rmse: 11.63532|  0:00:01s
epoch 12 | loss: 2.67271 | val_0_rmse: 10.16127|  0:00:01s


[I 2025-08-17 19:49:39,028] Trial 139 finished with value: 7.496843811866637 and parameters: {'n_d': 36, 'n_a': 53, 'n_steps': 3, 'gamma': 1.565923753716685, 'lambda_sparse': 0.0015734416336170348}. Best is trial 40 with value: 0.2639664310728177.


epoch 13 | loss: 1.8425  | val_0_rmse: 8.99038 |  0:00:01s

Early stopping occurred at epoch 13 with best_epoch = 3 and best_val_0_rmse = 7.49684
epoch 0  | loss: 194.62854| val_0_rmse: 14.43524|  0:00:00s




epoch 1  | loss: 155.79161| val_0_rmse: 11.55531|  0:00:00s
epoch 2  | loss: 127.17573| val_0_rmse: 10.03945|  0:00:00s
epoch 3  | loss: 102.30585| val_0_rmse: 8.97752 |  0:00:00s
epoch 4  | loss: 82.24728| val_0_rmse: 7.96045 |  0:00:00s
epoch 5  | loss: 64.48696| val_0_rmse: 7.06501 |  0:00:00s
epoch 6  | loss: 50.16762| val_0_rmse: 6.75377 |  0:00:00s
epoch 7  | loss: 36.42614| val_0_rmse: 7.57643 |  0:00:00s
epoch 8  | loss: 27.30441| val_0_rmse: 7.62417 |  0:00:00s
epoch 9  | loss: 17.34826| val_0_rmse: 7.75378 |  0:00:00s
epoch 10 | loss: 11.49242| val_0_rmse: 7.8843  |  0:00:01s
epoch 11 | loss: 9.10119 | val_0_rmse: 8.38537 |  0:00:01s
epoch 12 | loss: 7.97751 | val_0_rmse: 8.58814 |  0:00:01s
epoch 13 | loss: 7.57872 | val_0_rmse: 7.75205 |  0:00:01s
epoch 14 | loss: 7.05292 | val_0_rmse: 6.73527 |  0:00:01s
epoch 15 | loss: 4.82148 | val_0_rmse: 5.67912 |  0:00:01s
epoch 16 | loss: 3.35443 | val_0_rmse: 4.69303 |  0:00:01s
epoch 17 | loss: 2.10336 | val_0_rmse: 3.81852 |  0:0

[I 2025-08-17 19:49:43,061] Trial 140 finished with value: 1.5791703183866959 and parameters: {'n_d': 35, 'n_a': 30, 'n_steps': 3, 'gamma': 1.6422106428671643, 'lambda_sparse': 0.0014188866206880028}. Best is trial 40 with value: 0.2639664310728177.


epoch 40 | loss: 0.20574 | val_0_rmse: 2.09375 |  0:00:03s

Early stopping occurred at epoch 40 with best_epoch = 30 and best_val_0_rmse = 1.57917




epoch 0  | loss: 231.15544| val_0_rmse: 21.69519|  0:00:00s
epoch 1  | loss: 125.41768| val_0_rmse: 16.88965|  0:00:00s
epoch 2  | loss: 59.958  | val_0_rmse: 12.39881|  0:00:00s
epoch 3  | loss: 30.46788| val_0_rmse: 12.92537|  0:00:01s
epoch 4  | loss: 25.67889| val_0_rmse: 14.60656|  0:00:01s
epoch 5  | loss: 37.33859| val_0_rmse: 18.29444|  0:00:01s
epoch 6  | loss: 46.62724| val_0_rmse: 16.35356|  0:00:01s
epoch 7  | loss: 31.04562| val_0_rmse: 14.67098|  0:00:02s
epoch 8  | loss: 14.63456| val_0_rmse: 12.51777|  0:00:02s
epoch 9  | loss: 10.41938| val_0_rmse: 10.9545 |  0:00:02s
epoch 10 | loss: 13.11473| val_0_rmse: 9.89949 |  0:00:02s
epoch 11 | loss: 8.90822 | val_0_rmse: 10.0194 |  0:00:03s
epoch 12 | loss: 5.82898 | val_0_rmse: 10.22923|  0:00:03s
epoch 13 | loss: 8.06012 | val_0_rmse: 9.51596 |  0:00:03s
epoch 14 | loss: 6.53358 | val_0_rmse: 7.64722 |  0:00:03s
epoch 15 | loss: 4.90172 | val_0_rmse: 6.43815 |  0:00:03s
epoch 16 | loss: 4.33178 | val_0_rmse: 6.23396 |  0:00

[I 2025-08-17 19:50:00,798] Trial 141 finished with value: 0.9073463503986613 and parameters: {'n_d': 64, 'n_a': 54, 'n_steps': 8, 'gamma': 1.5554588049707643, 'lambda_sparse': 0.0017766522709978404}. Best is trial 40 with value: 0.2639664310728177.


epoch 67 | loss: 0.49209 | val_0_rmse: 1.18708 |  0:00:17s

Early stopping occurred at epoch 67 with best_epoch = 57 and best_val_0_rmse = 0.90735




epoch 0  | loss: 135.22093| val_0_rmse: 10.76543|  0:00:00s
epoch 1  | loss: 89.28491| val_0_rmse: 10.73772|  0:00:00s
epoch 2  | loss: 58.40688| val_0_rmse: 10.55277|  0:00:00s
epoch 3  | loss: 37.11254| val_0_rmse: 13.2889 |  0:00:00s
epoch 4  | loss: 21.56236| val_0_rmse: 15.95152|  0:00:00s
epoch 5  | loss: 15.51589| val_0_rmse: 14.0374 |  0:00:00s
epoch 6  | loss: 14.02296| val_0_rmse: 8.34465 |  0:00:00s
epoch 7  | loss: 9.47412 | val_0_rmse: 6.94252 |  0:00:00s
epoch 8  | loss: 6.34292 | val_0_rmse: 6.4571  |  0:00:00s
epoch 9  | loss: 3.76576 | val_0_rmse: 5.68622 |  0:00:01s
epoch 10 | loss: 3.36631 | val_0_rmse: 5.36417 |  0:00:01s
epoch 11 | loss: 2.1667  | val_0_rmse: 5.21902 |  0:00:01s
epoch 12 | loss: 1.4564  | val_0_rmse: 4.98988 |  0:00:01s
epoch 13 | loss: 1.25427 | val_0_rmse: 4.41028 |  0:00:01s
epoch 14 | loss: 1.26913 | val_0_rmse: 4.10655 |  0:00:01s
epoch 15 | loss: 0.88434 | val_0_rmse: 3.50684 |  0:00:01s
epoch 16 | loss: 0.90642 | val_0_rmse: 3.03297 |  0:00:

[I 2025-08-17 19:50:11,634] Trial 142 finished with value: 0.3140156327838993 and parameters: {'n_d': 60, 'n_a': 52, 'n_steps': 3, 'gamma': 1.59308216576119, 'lambda_sparse': 0.0012315278966851754}. Best is trial 40 with value: 0.2639664310728177.


epoch 99 | loss: 0.08295 | val_0_rmse: 0.33937 |  0:00:10s
Stop training because you reached max_epochs = 100 with best_epoch = 98 and best_val_0_rmse = 0.31402




epoch 0  | loss: 131.23238| val_0_rmse: 9.89248 |  0:00:00s
epoch 1  | loss: 88.74314| val_0_rmse: 7.79847 |  0:00:00s
epoch 2  | loss: 60.3112 | val_0_rmse: 7.30928 |  0:00:00s
epoch 3  | loss: 40.14531| val_0_rmse: 6.89011 |  0:00:00s
epoch 4  | loss: 24.33425| val_0_rmse: 7.17676 |  0:00:00s
epoch 5  | loss: 17.14765| val_0_rmse: 8.25174 |  0:00:00s
epoch 6  | loss: 12.79162| val_0_rmse: 9.16014 |  0:00:00s
epoch 7  | loss: 10.63601| val_0_rmse: 8.72154 |  0:00:00s
epoch 8  | loss: 7.26083 | val_0_rmse: 7.88743 |  0:00:01s
epoch 9  | loss: 4.59651 | val_0_rmse: 6.66359 |  0:00:01s
epoch 10 | loss: 3.04625 | val_0_rmse: 5.73552 |  0:00:01s
epoch 11 | loss: 2.39599 | val_0_rmse: 5.44722 |  0:00:01s
epoch 12 | loss: 1.80798 | val_0_rmse: 5.13441 |  0:00:01s
epoch 13 | loss: 1.52258 | val_0_rmse: 5.02842 |  0:00:01s
epoch 14 | loss: 1.06859 | val_0_rmse: 4.69238 |  0:00:01s
epoch 15 | loss: 1.02608 | val_0_rmse: 4.16897 |  0:00:01s
epoch 16 | loss: 0.7479  | val_0_rmse: 4.28379 |  0:00:

[I 2025-08-17 19:50:18,055] Trial 143 finished with value: 0.6951769283028552 and parameters: {'n_d': 62, 'n_a': 55, 'n_steps': 3, 'gamma': 1.540457707887096, 'lambda_sparse': 0.0017022940021595829}. Best is trial 40 with value: 0.2639664310728177.


epoch 54 | loss: 0.12853 | val_0_rmse: 0.79531 |  0:00:06s
epoch 55 | loss: 0.22587 | val_0_rmse: 0.80268 |  0:00:06s

Early stopping occurred at epoch 55 with best_epoch = 45 and best_val_0_rmse = 0.69518




epoch 0  | loss: 101.35857| val_0_rmse: 10.82442|  0:00:00s
epoch 1  | loss: 67.78821| val_0_rmse: 12.43247|  0:00:00s
epoch 2  | loss: 45.13941| val_0_rmse: 16.15423|  0:00:00s
epoch 3  | loss: 32.40008| val_0_rmse: 17.65863|  0:00:00s
epoch 4  | loss: 20.6042 | val_0_rmse: 18.4482 |  0:00:00s
epoch 5  | loss: 14.53142| val_0_rmse: 17.09325|  0:00:00s
epoch 6  | loss: 12.64624| val_0_rmse: 14.79908|  0:00:00s
epoch 7  | loss: 10.47359| val_0_rmse: 12.94478|  0:00:00s
epoch 8  | loss: 5.04832 | val_0_rmse: 9.97143 |  0:00:00s
epoch 9  | loss: 3.40111 | val_0_rmse: 7.29091 |  0:00:01s
epoch 10 | loss: 2.70102 | val_0_rmse: 5.34196 |  0:00:01s
epoch 11 | loss: 2.98822 | val_0_rmse: 4.84025 |  0:00:01s
epoch 12 | loss: 2.08067 | val_0_rmse: 4.48647 |  0:00:01s
epoch 13 | loss: 1.82896 | val_0_rmse: 4.87236 |  0:00:01s
epoch 14 | loss: 1.83228 | val_0_rmse: 4.60623 |  0:00:01s
epoch 15 | loss: 1.00987 | val_0_rmse: 4.24947 |  0:00:01s
epoch 16 | loss: 1.09252 | val_0_rmse: 4.13742 |  0:00:

[I 2025-08-17 19:50:24,866] Trial 144 finished with value: 0.48303598467615655 and parameters: {'n_d': 57, 'n_a': 59, 'n_steps': 3, 'gamma': 1.614847390023064, 'lambda_sparse': 0.0022126613801191435}. Best is trial 40 with value: 0.2639664310728177.


epoch 57 | loss: 0.1462  | val_0_rmse: 0.58774 |  0:00:06s
epoch 58 | loss: 0.14433 | val_0_rmse: 0.55169 |  0:00:06s

Early stopping occurred at epoch 58 with best_epoch = 48 and best_val_0_rmse = 0.48304




epoch 0  | loss: 263.07666| val_0_rmse: 24.92044|  0:00:00s
epoch 1  | loss: 204.35684| val_0_rmse: 16.60408|  0:00:00s
epoch 2  | loss: 158.23218| val_0_rmse: 12.93222|  0:00:00s
epoch 3  | loss: 123.96618| val_0_rmse: 10.2898 |  0:00:00s
epoch 4  | loss: 99.54527| val_0_rmse: 8.73606 |  0:00:00s
epoch 5  | loss: 76.88995| val_0_rmse: 7.92893 |  0:00:00s
epoch 6  | loss: 59.59099| val_0_rmse: 7.07217 |  0:00:00s
epoch 7  | loss: 44.45942| val_0_rmse: 6.23317 |  0:00:00s
epoch 8  | loss: 31.62188| val_0_rmse: 5.73257 |  0:00:01s
epoch 9  | loss: 22.02088| val_0_rmse: 4.91343 |  0:00:01s
epoch 10 | loss: 13.8079 | val_0_rmse: 4.28338 |  0:00:01s
epoch 11 | loss: 7.00544 | val_0_rmse: 5.06181 |  0:00:01s
epoch 12 | loss: 5.54067 | val_0_rmse: 4.95067 |  0:00:01s
epoch 13 | loss: 5.47799 | val_0_rmse: 4.44864 |  0:00:01s
epoch 14 | loss: 5.55912 | val_0_rmse: 3.96456 |  0:00:01s
epoch 15 | loss: 4.07982 | val_0_rmse: 3.44878 |  0:00:01s
epoch 16 | loss: 2.55494 | val_0_rmse: 3.39744 |  0:

[I 2025-08-17 19:50:33,509] Trial 145 finished with value: 0.39233585165032275 and parameters: {'n_d': 46, 'n_a': 56, 'n_steps': 3, 'gamma': 1.6747242741287849, 'lambda_sparse': 0.0032641595191338743}. Best is trial 40 with value: 0.2639664310728177.


epoch 76 | loss: 0.07996 | val_0_rmse: 0.43038 |  0:00:08s

Early stopping occurred at epoch 76 with best_epoch = 66 and best_val_0_rmse = 0.39234




epoch 0  | loss: 166.31915| val_0_rmse: 11.80784|  0:00:00s
epoch 1  | loss: 115.85858| val_0_rmse: 8.54257 |  0:00:00s
epoch 2  | loss: 79.82034| val_0_rmse: 7.83541 |  0:00:00s
epoch 3  | loss: 51.34914| val_0_rmse: 7.41124 |  0:00:00s
epoch 4  | loss: 33.71629| val_0_rmse: 7.50159 |  0:00:00s
epoch 5  | loss: 20.93169| val_0_rmse: 7.26848 |  0:00:00s
epoch 6  | loss: 14.14616| val_0_rmse: 5.95819 |  0:00:00s
epoch 7  | loss: 11.08389| val_0_rmse: 5.99157 |  0:00:00s
epoch 8  | loss: 9.93526 | val_0_rmse: 6.60063 |  0:00:00s
epoch 9  | loss: 11.02707| val_0_rmse: 6.60224 |  0:00:01s
epoch 10 | loss: 11.31691| val_0_rmse: 5.51016 |  0:00:01s
epoch 11 | loss: 6.21658 | val_0_rmse: 4.59662 |  0:00:01s
epoch 12 | loss: 3.15512 | val_0_rmse: 3.83917 |  0:00:01s
epoch 13 | loss: 1.88107 | val_0_rmse: 3.33911 |  0:00:01s
epoch 14 | loss: 2.20605 | val_0_rmse: 2.9404  |  0:00:01s
epoch 15 | loss: 1.55953 | val_0_rmse: 2.55644 |  0:00:01s
epoch 16 | loss: 0.9959  | val_0_rmse: 2.31351 |  0:00

[I 2025-08-17 19:50:44,473] Trial 146 finished with value: 0.26651451085404804 and parameters: {'n_d': 59, 'n_a': 54, 'n_steps': 3, 'gamma': 1.0492920130545706, 'lambda_sparse': 0.0024057630032325635}. Best is trial 40 with value: 0.2639664310728177.


epoch 99 | loss: 0.07359 | val_0_rmse: 0.28825 |  0:00:10s
Stop training because you reached max_epochs = 100 with best_epoch = 98 and best_val_0_rmse = 0.26651




epoch 0  | loss: 122.92855| val_0_rmse: 8.57098 |  0:00:00s
epoch 1  | loss: 101.93786| val_0_rmse: 7.95283 |  0:00:00s
epoch 2  | loss: 79.81853| val_0_rmse: 7.49798 |  0:00:00s
epoch 3  | loss: 63.0341 | val_0_rmse: 6.87226 |  0:00:00s
epoch 4  | loss: 45.26269| val_0_rmse: 6.27663 |  0:00:00s
epoch 5  | loss: 33.83147| val_0_rmse: 5.53991 |  0:00:00s
epoch 6  | loss: 22.25414| val_0_rmse: 4.69241 |  0:00:00s
epoch 7  | loss: 15.59626| val_0_rmse: 4.6789  |  0:00:00s
epoch 8  | loss: 10.80399| val_0_rmse: 6.38813 |  0:00:01s
epoch 9  | loss: 8.58206 | val_0_rmse: 5.68029 |  0:00:01s
epoch 10 | loss: 6.90737 | val_0_rmse: 4.73749 |  0:00:01s
epoch 11 | loss: 5.94122 | val_0_rmse: 3.9479  |  0:00:01s
epoch 12 | loss: 4.05965 | val_0_rmse: 3.87586 |  0:00:01s
epoch 13 | loss: 2.6599  | val_0_rmse: 3.77536 |  0:00:01s
epoch 14 | loss: 2.17659 | val_0_rmse: 3.91933 |  0:00:01s
epoch 15 | loss: 1.95166 | val_0_rmse: 3.95464 |  0:00:01s
epoch 16 | loss: 1.60101 | val_0_rmse: 4.74149 |  0:00

[I 2025-08-17 19:50:49,106] Trial 147 finished with value: 0.8281409025081146 and parameters: {'n_d': 33, 'n_a': 58, 'n_steps': 3, 'gamma': 1.1708847021410702, 'lambda_sparse': 0.0020784062123728043}. Best is trial 40 with value: 0.2639664310728177.


epoch 0  | loss: 111.43624| val_0_rmse: 9.66629 |  0:00:00s
epoch 1  | loss: 81.56074| val_0_rmse: 11.69268|  0:00:00s
epoch 2  | loss: 58.39351| val_0_rmse: 8.34258 |  0:00:00s
epoch 3  | loss: 40.78539| val_0_rmse: 7.042   |  0:00:00s
epoch 4  | loss: 26.59551| val_0_rmse: 7.94036 |  0:00:00s
epoch 5  | loss: 18.07895| val_0_rmse: 7.53463 |  0:00:00s
epoch 6  | loss: 12.66675| val_0_rmse: 6.87392 |  0:00:00s
epoch 7  | loss: 12.22709| val_0_rmse: 6.03619 |  0:00:00s
epoch 8  | loss: 9.40115 | val_0_rmse: 5.20799 |  0:00:01s
epoch 9  | loss: 5.9215  | val_0_rmse: 5.1898  |  0:00:01s
epoch 10 | loss: 2.90776 | val_0_rmse: 4.97473 |  0:00:01s
epoch 11 | loss: 2.67171 | val_0_rmse: 3.95026 |  0:00:01s
epoch 12 | loss: 2.07193 | val_0_rmse: 3.43035 |  0:00:01s
epoch 13 | loss: 1.82449 | val_0_rmse: 2.92239 |  0:00:01s
epoch 14 | loss: 1.36978 | val_0_rmse: 2.55519 |  0:00:01s
epoch 15 | loss: 1.08693 | val_0_rmse: 2.75596 |  0:00:01s
epoch 16 | loss: 0.73211 | val_0_rmse: 2.77616 |  0:00:

[I 2025-08-17 19:50:54,987] Trial 148 finished with value: 0.424654220290803 and parameters: {'n_d': 51, 'n_a': 63, 'n_steps': 3, 'gamma': 1.2849486335673719, 'lambda_sparse': 0.0026074078237210366}. Best is trial 40 with value: 0.2639664310728177.


epoch 0  | loss: 210.29015| val_0_rmse: 15.46242|  0:00:00s
epoch 1  | loss: 184.64523| val_0_rmse: 13.23733|  0:00:00s
epoch 2  | loss: 165.87041| val_0_rmse: 12.2086 |  0:00:00s
epoch 3  | loss: 149.35027| val_0_rmse: 11.60859|  0:00:00s
epoch 4  | loss: 137.05779| val_0_rmse: 11.10805|  0:00:00s
epoch 5  | loss: 125.93639| val_0_rmse: 10.61809|  0:00:00s
epoch 6  | loss: 115.29619| val_0_rmse: 9.90345 |  0:00:00s
epoch 7  | loss: 106.53168| val_0_rmse: 9.15405 |  0:00:00s
epoch 8  | loss: 97.46584| val_0_rmse: 8.22524 |  0:00:00s
epoch 9  | loss: 88.43651| val_0_rmse: 7.52292 |  0:00:01s
epoch 10 | loss: 77.9641 | val_0_rmse: 7.64253 |  0:00:01s
epoch 11 | loss: 68.02056| val_0_rmse: 8.40197 |  0:00:01s
epoch 12 | loss: 58.09889| val_0_rmse: 8.92837 |  0:00:01s
epoch 13 | loss: 48.68311| val_0_rmse: 9.81735 |  0:00:01s
epoch 14 | loss: 38.07506| val_0_rmse: 10.62593|  0:00:01s
epoch 15 | loss: 29.88399| val_0_rmse: 10.73901|  0:00:01s
epoch 16 | loss: 21.52081| val_0_rmse: 9.73704 |

[I 2025-08-17 19:50:57,052] Trial 149 finished with value: 7.522920967174386 and parameters: {'n_d': 22, 'n_a': 45, 'n_steps': 3, 'gamma': 1.0772981568297753, 'lambda_sparse': 0.003908549007421411}. Best is trial 40 with value: 0.2639664310728177.


epoch 18 | loss: 8.61539 | val_0_rmse: 11.98989|  0:00:01s
epoch 19 | loss: 5.05641 | val_0_rmse: 13.86159|  0:00:01s

Early stopping occurred at epoch 19 with best_epoch = 9 and best_val_0_rmse = 7.52292




epoch 0  | loss: 201.36748| val_0_rmse: 12.18316|  0:00:00s
epoch 1  | loss: 151.89139| val_0_rmse: 9.0868  |  0:00:00s
epoch 2  | loss: 115.5063| val_0_rmse: 7.69394 |  0:00:00s
epoch 3  | loss: 86.51229| val_0_rmse: 7.63343 |  0:00:00s
epoch 4  | loss: 63.45348| val_0_rmse: 8.04701 |  0:00:00s
epoch 5  | loss: 44.47025| val_0_rmse: 9.21019 |  0:00:00s
epoch 6  | loss: 28.20795| val_0_rmse: 9.68056 |  0:00:00s
epoch 7  | loss: 19.15824| val_0_rmse: 9.17652 |  0:00:00s
epoch 8  | loss: 14.83261| val_0_rmse: 9.73757 |  0:00:00s
epoch 9  | loss: 12.10442| val_0_rmse: 11.1233 |  0:00:00s
epoch 10 | loss: 12.56281| val_0_rmse: 10.5533 |  0:00:01s
epoch 11 | loss: 11.92383| val_0_rmse: 9.69532 |  0:00:01s
epoch 12 | loss: 9.6044  | val_0_rmse: 8.91678 |  0:00:01s
epoch 13 | loss: 6.58235 | val_0_rmse: 7.27888 |  0:00:01s
epoch 14 | loss: 3.54753 | val_0_rmse: 5.9465  |  0:00:01s
epoch 15 | loss: 2.2176  | val_0_rmse: 5.04976 |  0:00:01s
epoch 16 | loss: 2.49132 | val_0_rmse: 4.32333 |  0:00

[I 2025-08-17 19:51:01,807] Trial 150 finished with value: 0.7400907707321817 and parameters: {'n_d': 54, 'n_a': 25, 'n_steps': 3, 'gamma': 1.0128057664890422, 'lambda_sparse': 0.02856033275024972}. Best is trial 40 with value: 0.2639664310728177.


epoch 46 | loss: 0.22374 | val_0_rmse: 0.93616 |  0:00:04s

Early stopping occurred at epoch 46 with best_epoch = 36 and best_val_0_rmse = 0.74009




epoch 0  | loss: 165.41983| val_0_rmse: 12.08625|  0:00:00s
epoch 1  | loss: 117.66168| val_0_rmse: 10.36287|  0:00:00s
epoch 2  | loss: 80.3946 | val_0_rmse: 10.82914|  0:00:00s
epoch 3  | loss: 50.58516| val_0_rmse: 7.14945 |  0:00:00s
epoch 4  | loss: 30.89571| val_0_rmse: 6.06907 |  0:00:00s
epoch 5  | loss: 19.85089| val_0_rmse: 5.86323 |  0:00:00s
epoch 6  | loss: 12.8886 | val_0_rmse: 6.04481 |  0:00:00s
epoch 7  | loss: 11.59042| val_0_rmse: 5.90484 |  0:00:00s
epoch 8  | loss: 11.12344| val_0_rmse: 4.886   |  0:00:01s
epoch 9  | loss: 8.46204 | val_0_rmse: 4.75582 |  0:00:01s
epoch 10 | loss: 4.94708 | val_0_rmse: 4.24832 |  0:00:01s
epoch 11 | loss: 3.41828 | val_0_rmse: 4.66711 |  0:00:01s
epoch 12 | loss: 2.46644 | val_0_rmse: 4.46647 |  0:00:01s
epoch 13 | loss: 1.87103 | val_0_rmse: 5.11255 |  0:00:01s
epoch 14 | loss: 1.59596 | val_0_rmse: 4.38587 |  0:00:01s
epoch 15 | loss: 1.05056 | val_0_rmse: 3.39977 |  0:00:01s
epoch 16 | loss: 0.96903 | val_0_rmse: 2.7291  |  0:00

[I 2025-08-17 19:51:08,855] Trial 151 finished with value: 0.34467733613859547 and parameters: {'n_d': 59, 'n_a': 54, 'n_steps': 3, 'gamma': 1.5774385360688765, 'lambda_sparse': 0.0019023549114829676}. Best is trial 40 with value: 0.2639664310728177.


epoch 63 | loss: 0.20644 | val_0_rmse: 0.6255  |  0:00:06s

Early stopping occurred at epoch 63 with best_epoch = 53 and best_val_0_rmse = 0.34468




epoch 0  | loss: 152.25119| val_0_rmse: 10.76793|  0:00:00s
epoch 1  | loss: 118.60244| val_0_rmse: 9.1554  |  0:00:00s
epoch 2  | loss: 89.50146| val_0_rmse: 8.6251  |  0:00:00s
epoch 3  | loss: 63.01203| val_0_rmse: 9.92236 |  0:00:00s
epoch 4  | loss: 44.39518| val_0_rmse: 11.15177|  0:00:00s
epoch 5  | loss: 29.83253| val_0_rmse: 12.22418|  0:00:00s
epoch 6  | loss: 17.94155| val_0_rmse: 13.43858|  0:00:00s
epoch 7  | loss: 10.82814| val_0_rmse: 13.74924|  0:00:00s
epoch 8  | loss: 8.75693 | val_0_rmse: 11.87506|  0:00:00s
epoch 9  | loss: 7.60192 | val_0_rmse: 11.16804|  0:00:01s
epoch 10 | loss: 7.27937 | val_0_rmse: 10.2272 |  0:00:01s
epoch 11 | loss: 5.5001  | val_0_rmse: 9.28021 |  0:00:01s
epoch 12 | loss: 4.75591 | val_0_rmse: 8.20666 |  0:00:01s
epoch 13 | loss: 2.2986  | val_0_rmse: 7.0919  |  0:00:01s
epoch 14 | loss: 1.97743 | val_0_rmse: 6.17469 |  0:00:01s
epoch 15 | loss: 1.61669 | val_0_rmse: 5.55525 |  0:00:01s
epoch 16 | loss: 1.50353 | val_0_rmse: 5.30317 |  0:00

[I 2025-08-17 19:51:14,724] Trial 152 finished with value: 0.7751005324886612 and parameters: {'n_d': 42, 'n_a': 53, 'n_steps': 3, 'gamma': 1.6972830344825263, 'lambda_sparse': 0.0023279904592588925}. Best is trial 40 with value: 0.2639664310728177.


epoch 0  | loss: 171.70303| val_0_rmse: 11.14081|  0:00:00s
epoch 1  | loss: 121.47954| val_0_rmse: 9.00327 |  0:00:00s
epoch 2  | loss: 84.88877| val_0_rmse: 11.73277|  0:00:00s
epoch 3  | loss: 55.50417| val_0_rmse: 11.05832|  0:00:00s
epoch 4  | loss: 33.32765| val_0_rmse: 9.67829 |  0:00:00s
epoch 5  | loss: 17.65285| val_0_rmse: 8.90259 |  0:00:00s
epoch 6  | loss: 10.3113 | val_0_rmse: 10.67426|  0:00:00s
epoch 7  | loss: 10.99276| val_0_rmse: 11.33143|  0:00:00s
epoch 8  | loss: 11.79733| val_0_rmse: 7.82506 |  0:00:00s
epoch 9  | loss: 7.54427 | val_0_rmse: 6.82528 |  0:00:01s
epoch 10 | loss: 4.6432  | val_0_rmse: 6.23175 |  0:00:01s
epoch 11 | loss: 3.22765 | val_0_rmse: 5.69701 |  0:00:01s
epoch 12 | loss: 2.20663 | val_0_rmse: 5.73853 |  0:00:01s
epoch 13 | loss: 2.51655 | val_0_rmse: 5.16723 |  0:00:01s
epoch 14 | loss: 1.75355 | val_0_rmse: 5.44397 |  0:00:01s
epoch 15 | loss: 1.20282 | val_0_rmse: 5.34399 |  0:00:01s
epoch 16 | loss: 1.16294 | val_0_rmse: 4.49908 |  0:00

[I 2025-08-17 19:51:22,758] Trial 153 finished with value: 0.4201844379295759 and parameters: {'n_d': 61, 'n_a': 56, 'n_steps': 3, 'gamma': 1.2302467689950558, 'lambda_sparse': 0.016753174741882097}. Best is trial 40 with value: 0.2639664310728177.


epoch 72 | loss: 0.1213  | val_0_rmse: 0.55454 |  0:00:07s
epoch 73 | loss: 0.19429 | val_0_rmse: 0.5801  |  0:00:07s

Early stopping occurred at epoch 73 with best_epoch = 63 and best_val_0_rmse = 0.42018




epoch 0  | loss: 157.12994| val_0_rmse: 13.73436|  0:00:00s
epoch 1  | loss: 114.04633| val_0_rmse: 8.45327 |  0:00:00s
epoch 2  | loss: 84.54068| val_0_rmse: 8.21085 |  0:00:00s
epoch 3  | loss: 60.74088| val_0_rmse: 7.02924 |  0:00:00s
epoch 4  | loss: 40.8144 | val_0_rmse: 6.31431 |  0:00:00s
epoch 5  | loss: 26.97223| val_0_rmse: 6.78556 |  0:00:00s
epoch 6  | loss: 16.37065| val_0_rmse: 9.57959 |  0:00:00s
epoch 7  | loss: 10.42297| val_0_rmse: 13.17011|  0:00:00s
epoch 8  | loss: 8.70419 | val_0_rmse: 13.56843|  0:00:01s
epoch 9  | loss: 7.84818 | val_0_rmse: 9.90428 |  0:00:01s
epoch 10 | loss: 5.37161 | val_0_rmse: 10.17165|  0:00:01s
epoch 11 | loss: 3.10757 | val_0_rmse: 7.98914 |  0:00:01s
epoch 12 | loss: 1.9821  | val_0_rmse: 7.39708 |  0:00:01s
epoch 13 | loss: 1.38719 | val_0_rmse: 6.36493 |  0:00:01s
epoch 14 | loss: 1.5524  | val_0_rmse: 5.96615 |  0:00:01s
epoch 15 | loss: 1.02012 | val_0_rmse: 5.48118 |  0:00:01s
epoch 16 | loss: 0.71088 | val_0_rmse: 4.43833 |  0:00

[I 2025-08-17 19:51:26,986] Trial 154 finished with value: 2.0279967494004585 and parameters: {'n_d': 58, 'n_a': 60, 'n_steps': 3, 'gamma': 1.6263674778219601, 'lambda_sparse': 0.0014696483834104202}. Best is trial 40 with value: 0.2639664310728177.


epoch 0  | loss: 112.41891| val_0_rmse: 14.2644 |  0:00:00s
epoch 1  | loss: 80.48064| val_0_rmse: 8.34967 |  0:00:00s
epoch 2  | loss: 57.5198 | val_0_rmse: 8.62332 |  0:00:00s
epoch 3  | loss: 40.58241| val_0_rmse: 7.93825 |  0:00:00s
epoch 4  | loss: 29.39822| val_0_rmse: 9.59064 |  0:00:00s
epoch 5  | loss: 22.07486| val_0_rmse: 9.67531 |  0:00:01s
epoch 6  | loss: 18.87558| val_0_rmse: 9.64488 |  0:00:01s
epoch 7  | loss: 19.51982| val_0_rmse: 11.26284|  0:00:01s
epoch 8  | loss: 18.82491| val_0_rmse: 9.91665 |  0:00:01s
epoch 9  | loss: 17.20632| val_0_rmse: 9.92938 |  0:00:01s
epoch 10 | loss: 12.18764| val_0_rmse: 10.06227|  0:00:01s
epoch 11 | loss: 9.94784 | val_0_rmse: 7.35441 |  0:00:02s
epoch 12 | loss: 7.36078 | val_0_rmse: 5.17029 |  0:00:02s
epoch 13 | loss: 5.52858 | val_0_rmse: 4.83781 |  0:00:02s
epoch 14 | loss: 5.31416 | val_0_rmse: 4.32346 |  0:00:02s
epoch 15 | loss: 4.52599 | val_0_rmse: 3.94167 |  0:00:02s
epoch 16 | loss: 3.98328 | val_0_rmse: 4.45415 |  0:00:

[I 2025-08-17 19:51:43,069] Trial 155 finished with value: 0.5346564804014007 and parameters: {'n_d': 31, 'n_a': 49, 'n_steps': 6, 'gamma': 1.5655761115813347, 'lambda_sparse': 0.002925764332280809}. Best is trial 40 with value: 0.2639664310728177.


epoch 93 | loss: 0.22547 | val_0_rmse: 0.77279 |  0:00:15s

Early stopping occurred at epoch 93 with best_epoch = 83 and best_val_0_rmse = 0.53466




epoch 0  | loss: 142.70506| val_0_rmse: 19.3651 |  0:00:00s
epoch 1  | loss: 100.45587| val_0_rmse: 9.93521 |  0:00:00s
epoch 2  | loss: 72.51239| val_0_rmse: 10.37443|  0:00:00s
epoch 3  | loss: 51.19805| val_0_rmse: 11.16091|  0:00:00s
epoch 4  | loss: 31.52345| val_0_rmse: 11.28672|  0:00:00s
epoch 5  | loss: 19.6092 | val_0_rmse: 10.28301|  0:00:00s
epoch 6  | loss: 12.13051| val_0_rmse: 10.08862|  0:00:00s
epoch 7  | loss: 10.56507| val_0_rmse: 10.36911|  0:00:00s
epoch 8  | loss: 9.88503 | val_0_rmse: 8.56842 |  0:00:00s
epoch 9  | loss: 6.39431 | val_0_rmse: 6.40609 |  0:00:01s
epoch 10 | loss: 4.38184 | val_0_rmse: 5.27196 |  0:00:01s
epoch 11 | loss: 2.39623 | val_0_rmse: 4.30146 |  0:00:01s
epoch 12 | loss: 2.20139 | val_0_rmse: 3.79505 |  0:00:01s
epoch 13 | loss: 1.71183 | val_0_rmse: 3.24476 |  0:00:01s
epoch 14 | loss: 1.09489 | val_0_rmse: 3.1061  |  0:00:01s
epoch 15 | loss: 1.15362 | val_0_rmse: 2.88737 |  0:00:01s
epoch 16 | loss: 1.15568 | val_0_rmse: 2.31661 |  0:00

[I 2025-08-17 19:51:53,914] Trial 156 finished with value: 0.3494979437232629 and parameters: {'n_d': 62, 'n_a': 51, 'n_steps': 3, 'gamma': 1.5166895342078575, 'lambda_sparse': 0.002513702183468837}. Best is trial 40 with value: 0.2639664310728177.


epoch 99 | loss: 0.06731 | val_0_rmse: 0.3495  |  0:00:10s
Stop training because you reached max_epochs = 100 with best_epoch = 99 and best_val_0_rmse = 0.3495




epoch 0  | loss: 210.9288| val_0_rmse: 16.00385|  0:00:00s
epoch 1  | loss: 131.94345| val_0_rmse: 12.26987|  0:00:00s
epoch 2  | loss: 78.42647| val_0_rmse: 11.45423|  0:00:00s
epoch 3  | loss: 43.17589| val_0_rmse: 12.19856|  0:00:01s
epoch 4  | loss: 24.1339 | val_0_rmse: 12.42892|  0:00:01s
epoch 5  | loss: 21.31794| val_0_rmse: 12.12302|  0:00:01s
epoch 6  | loss: 18.93826| val_0_rmse: 9.54667 |  0:00:01s
epoch 7  | loss: 12.11906| val_0_rmse: 8.39392 |  0:00:02s
epoch 8  | loss: 10.17585| val_0_rmse: 6.90383 |  0:00:02s
epoch 9  | loss: 8.72158 | val_0_rmse: 6.046   |  0:00:02s
epoch 10 | loss: 7.92602 | val_0_rmse: 6.26257 |  0:00:03s
epoch 11 | loss: 7.62139 | val_0_rmse: 7.11384 |  0:00:03s
epoch 12 | loss: 4.7604  | val_0_rmse: 8.41154 |  0:00:03s
epoch 13 | loss: 5.96639 | val_0_rmse: 8.87329 |  0:00:03s
epoch 14 | loss: 7.0081  | val_0_rmse: 7.95258 |  0:00:04s
epoch 15 | loss: 5.07943 | val_0_rmse: 6.42207 |  0:00:04s
epoch 16 | loss: 3.52472 | val_0_rmse: 6.00866 |  0:00:

[I 2025-08-17 19:52:09,870] Trial 157 finished with value: 1.4255820856040329 and parameters: {'n_d': 60, 'n_a': 55, 'n_steps': 9, 'gamma': 1.6528635486515741, 'lambda_sparse': 0.003395997370382941}. Best is trial 40 with value: 0.2639664310728177.


epoch 56 | loss: 20.67957| val_0_rmse: 4.36065 |  0:00:15s

Early stopping occurred at epoch 56 with best_epoch = 46 and best_val_0_rmse = 1.42558




epoch 0  | loss: 95.55051| val_0_rmse: 11.24847|  0:00:00s
epoch 1  | loss: 75.21247| val_0_rmse: 10.86835|  0:00:00s
epoch 2  | loss: 55.44778| val_0_rmse: 11.45246|  0:00:00s
epoch 3  | loss: 38.44634| val_0_rmse: 11.46219|  0:00:00s
epoch 4  | loss: 26.85535| val_0_rmse: 10.31673|  0:00:00s
epoch 5  | loss: 19.45553| val_0_rmse: 9.10696 |  0:00:00s
epoch 6  | loss: 16.1012 | val_0_rmse: 6.98869 |  0:00:00s
epoch 7  | loss: 15.11738| val_0_rmse: 8.13203 |  0:00:00s
epoch 8  | loss: 15.56568| val_0_rmse: 10.11806|  0:00:01s
epoch 9  | loss: 12.65204| val_0_rmse: 8.23299 |  0:00:01s
epoch 10 | loss: 9.76074 | val_0_rmse: 7.86204 |  0:00:01s
epoch 11 | loss: 6.63789 | val_0_rmse: 5.69826 |  0:00:01s
epoch 12 | loss: 5.34576 | val_0_rmse: 6.2228  |  0:00:01s
epoch 13 | loss: 4.96674 | val_0_rmse: 5.47014 |  0:00:01s
epoch 14 | loss: 4.48405 | val_0_rmse: 5.54753 |  0:00:01s
epoch 15 | loss: 4.62104 | val_0_rmse: 5.76039 |  0:00:01s
epoch 16 | loss: 3.11387 | val_0_rmse: 5.41511 |  0:00:0

[I 2025-08-17 19:52:17,307] Trial 158 finished with value: 0.6144277766204919 and parameters: {'n_d': 32, 'n_a': 36, 'n_steps': 4, 'gamma': 1.7211202522897115, 'lambda_sparse': 0.0017196180520929634}. Best is trial 40 with value: 0.2639664310728177.


epoch 60 | loss: 0.3901  | val_0_rmse: 0.70108 |  0:00:07s
epoch 61 | loss: 0.23516 | val_0_rmse: 0.85073 |  0:00:07s

Early stopping occurred at epoch 61 with best_epoch = 51 and best_val_0_rmse = 0.61443




epoch 0  | loss: 149.11002| val_0_rmse: 14.5093 |  0:00:00s
epoch 1  | loss: 113.23199| val_0_rmse: 9.28805 |  0:00:00s
epoch 2  | loss: 90.00428| val_0_rmse: 8.11694 |  0:00:00s
epoch 3  | loss: 69.9361 | val_0_rmse: 9.17812 |  0:00:00s
epoch 4  | loss: 51.63251| val_0_rmse: 11.45555|  0:00:00s
epoch 5  | loss: 36.07619| val_0_rmse: 11.13614|  0:00:00s
epoch 6  | loss: 24.92754| val_0_rmse: 10.01657|  0:00:00s
epoch 7  | loss: 16.26632| val_0_rmse: 9.26306 |  0:00:00s
epoch 8  | loss: 10.57356| val_0_rmse: 8.16408 |  0:00:00s
epoch 9  | loss: 9.11587 | val_0_rmse: 7.43178 |  0:00:00s
epoch 10 | loss: 6.85833 | val_0_rmse: 6.32368 |  0:00:01s
epoch 11 | loss: 6.68517 | val_0_rmse: 5.32648 |  0:00:01s
epoch 12 | loss: 4.81218 | val_0_rmse: 4.67357 |  0:00:01s
epoch 13 | loss: 2.8093  | val_0_rmse: 3.6565  |  0:00:01s
epoch 14 | loss: 1.56675 | val_0_rmse: 3.63267 |  0:00:01s
epoch 15 | loss: 1.74031 | val_0_rmse: 2.81291 |  0:00:01s
epoch 16 | loss: 1.58269 | val_0_rmse: 2.45141 |  0:00

[I 2025-08-17 19:52:27,546] Trial 159 finished with value: 0.3387712939047927 and parameters: {'n_d': 29, 'n_a': 58, 'n_steps': 3, 'gamma': 1.8214979279921222, 'lambda_sparse': 0.0010053191214155995}. Best is trial 40 with value: 0.2639664310728177.


epoch 99 | loss: 0.06215 | val_0_rmse: 0.36772 |  0:00:10s
Stop training because you reached max_epochs = 100 with best_epoch = 91 and best_val_0_rmse = 0.33877




epoch 0  | loss: 194.2923| val_0_rmse: 12.81625|  0:00:00s
epoch 1  | loss: 128.56239| val_0_rmse: 10.19838|  0:00:00s
epoch 2  | loss: 79.13825| val_0_rmse: 9.3571  |  0:00:00s
epoch 3  | loss: 48.93877| val_0_rmse: 8.36284 |  0:00:00s
epoch 4  | loss: 27.20407| val_0_rmse: 10.72195|  0:00:00s
epoch 5  | loss: 21.57174| val_0_rmse: 9.54461 |  0:00:01s
epoch 6  | loss: 18.30118| val_0_rmse: 7.73874 |  0:00:01s
epoch 7  | loss: 17.78226| val_0_rmse: 7.46688 |  0:00:01s
epoch 8  | loss: 20.4482 | val_0_rmse: 7.77957 |  0:00:01s
epoch 9  | loss: 19.64269| val_0_rmse: 8.53994 |  0:00:01s
epoch 10 | loss: 14.78631| val_0_rmse: 7.79181 |  0:00:02s
epoch 11 | loss: 10.17074| val_0_rmse: 7.4354  |  0:00:02s
epoch 12 | loss: 8.0073  | val_0_rmse: 6.61341 |  0:00:02s
epoch 13 | loss: 5.92473 | val_0_rmse: 5.84389 |  0:00:02s
epoch 14 | loss: 5.8179  | val_0_rmse: 4.93586 |  0:00:02s
epoch 15 | loss: 6.58736 | val_0_rmse: 4.55492 |  0:00:03s
epoch 16 | loss: 4.51185 | val_0_rmse: 4.57525 |  0:00:

[I 2025-08-17 19:52:39,533] Trial 160 finished with value: 1.0096507007985625 and parameters: {'n_d': 34, 'n_a': 47, 'n_steps': 7, 'gamma': 1.603148405960135, 'lambda_sparse': 0.00467566264047997}. Best is trial 40 with value: 0.2639664310728177.


epoch 0  | loss: 104.03316| val_0_rmse: 13.43811|  0:00:00s
epoch 1  | loss: 83.45214| val_0_rmse: 10.65229|  0:00:00s
epoch 2  | loss: 61.8591 | val_0_rmse: 9.16373 |  0:00:00s
epoch 3  | loss: 46.68847| val_0_rmse: 7.2365  |  0:00:00s
epoch 4  | loss: 32.74183| val_0_rmse: 5.41116 |  0:00:00s
epoch 5  | loss: 23.1307 | val_0_rmse: 7.11736 |  0:00:00s
epoch 6  | loss: 15.10392| val_0_rmse: 8.31625 |  0:00:00s
epoch 7  | loss: 11.86205| val_0_rmse: 9.00794 |  0:00:00s
epoch 8  | loss: 9.18668 | val_0_rmse: 10.9936 |  0:00:01s
epoch 9  | loss: 8.75862 | val_0_rmse: 11.31706|  0:00:01s
epoch 10 | loss: 6.91101 | val_0_rmse: 10.50119|  0:00:01s
epoch 11 | loss: 4.26289 | val_0_rmse: 8.86843 |  0:00:01s
epoch 12 | loss: 3.16955 | val_0_rmse: 6.97232 |  0:00:01s
epoch 13 | loss: 2.39558 | val_0_rmse: 7.13651 |  0:00:01s


[I 2025-08-17 19:52:41,317] Trial 161 finished with value: 5.411156370387191 and parameters: {'n_d': 36, 'n_a': 59, 'n_steps': 3, 'gamma': 1.4926385480933795, 'lambda_sparse': 0.0052433127931552186}. Best is trial 40 with value: 0.2639664310728177.


epoch 14 | loss: 2.24574 | val_0_rmse: 6.67202 |  0:00:01s

Early stopping occurred at epoch 14 with best_epoch = 4 and best_val_0_rmse = 5.41116




epoch 0  | loss: 150.11011| val_0_rmse: 10.4185 |  0:00:00s
epoch 1  | loss: 121.08698| val_0_rmse: 8.91307 |  0:00:00s
epoch 2  | loss: 97.7679 | val_0_rmse: 8.633   |  0:00:00s
epoch 3  | loss: 77.35546| val_0_rmse: 8.27217 |  0:00:00s
epoch 4  | loss: 61.07402| val_0_rmse: 7.98111 |  0:00:00s
epoch 5  | loss: 47.38921| val_0_rmse: 7.1905  |  0:00:00s
epoch 6  | loss: 34.72808| val_0_rmse: 7.39449 |  0:00:00s
epoch 7  | loss: 22.37332| val_0_rmse: 8.04748 |  0:00:00s
epoch 8  | loss: 15.04787| val_0_rmse: 8.5499  |  0:00:00s
epoch 9  | loss: 9.67754 | val_0_rmse: 8.65543 |  0:00:01s
epoch 10 | loss: 8.23108 | val_0_rmse: 8.2796  |  0:00:01s
epoch 11 | loss: 6.96608 | val_0_rmse: 8.39856 |  0:00:01s
epoch 12 | loss: 5.4119  | val_0_rmse: 5.65747 |  0:00:01s
epoch 13 | loss: 3.95341 | val_0_rmse: 5.31693 |  0:00:01s
epoch 14 | loss: 2.66334 | val_0_rmse: 5.16366 |  0:00:01s
epoch 15 | loss: 2.56016 | val_0_rmse: 3.26362 |  0:00:01s
epoch 16 | loss: 1.50538 | val_0_rmse: 3.17961 |  0:00

[I 2025-08-17 19:52:51,870] Trial 162 finished with value: 0.2730055174071543 and parameters: {'n_d': 35, 'n_a': 60, 'n_steps': 3, 'gamma': 1.5280057116213248, 'lambda_sparse': 0.005740122282288143}. Best is trial 40 with value: 0.2639664310728177.


epoch 99 | loss: 0.05959 | val_0_rmse: 0.27793 |  0:00:10s
Stop training because you reached max_epochs = 100 with best_epoch = 91 and best_val_0_rmse = 0.27301




epoch 0  | loss: 207.52904| val_0_rmse: 16.709  |  0:00:00s
epoch 1  | loss: 163.26042| val_0_rmse: 12.5529 |  0:00:00s
epoch 2  | loss: 131.57022| val_0_rmse: 10.7921 |  0:00:00s
epoch 3  | loss: 107.45393| val_0_rmse: 9.58088 |  0:00:00s
epoch 4  | loss: 86.54867| val_0_rmse: 8.95766 |  0:00:00s
epoch 5  | loss: 68.47581| val_0_rmse: 8.35169 |  0:00:00s
epoch 6  | loss: 53.32837| val_0_rmse: 7.73243 |  0:00:00s
epoch 7  | loss: 37.12438| val_0_rmse: 7.2653  |  0:00:00s
epoch 8  | loss: 25.71351| val_0_rmse: 6.36269 |  0:00:00s
epoch 9  | loss: 15.74526| val_0_rmse: 6.57215 |  0:00:01s
epoch 10 | loss: 9.25364 | val_0_rmse: 9.78087 |  0:00:01s
epoch 11 | loss: 5.42163 | val_0_rmse: 10.73247|  0:00:01s
epoch 12 | loss: 4.87476 | val_0_rmse: 8.46662 |  0:00:01s
epoch 13 | loss: 4.75264 | val_0_rmse: 7.00895 |  0:00:01s
epoch 14 | loss: 4.43995 | val_0_rmse: 6.75142 |  0:00:01s
epoch 15 | loss: 3.13917 | val_0_rmse: 6.30315 |  0:00:01s
epoch 16 | loss: 1.93881 | val_0_rmse: 6.47245 |  0:

[I 2025-08-17 19:53:01,947] Trial 163 finished with value: 0.19407104242042633 and parameters: {'n_d': 39, 'n_a': 57, 'n_steps': 3, 'gamma': 1.521073730613384, 'lambda_sparse': 0.007316938863543923}. Best is trial 163 with value: 0.19407104242042633.


epoch 99 | loss: 0.05487 | val_0_rmse: 0.20833 |  0:00:09s
Stop training because you reached max_epochs = 100 with best_epoch = 90 and best_val_0_rmse = 0.19407




epoch 0  | loss: 143.21146| val_0_rmse: 10.37296|  0:00:00s
epoch 1  | loss: 113.33737| val_0_rmse: 8.64097 |  0:00:00s
epoch 2  | loss: 88.17597| val_0_rmse: 8.25084 |  0:00:00s
epoch 3  | loss: 68.1896 | val_0_rmse: 7.32596 |  0:00:00s
epoch 4  | loss: 49.63738| val_0_rmse: 7.20169 |  0:00:00s
epoch 5  | loss: 34.06521| val_0_rmse: 7.57182 |  0:00:00s
epoch 6  | loss: 23.101  | val_0_rmse: 8.90074 |  0:00:00s
epoch 7  | loss: 12.43144| val_0_rmse: 9.09107 |  0:00:00s
epoch 8  | loss: 8.88085 | val_0_rmse: 10.05995|  0:00:00s
epoch 9  | loss: 6.56638 | val_0_rmse: 8.90521 |  0:00:01s
epoch 10 | loss: 6.17379 | val_0_rmse: 7.45967 |  0:00:01s
epoch 11 | loss: 4.78244 | val_0_rmse: 6.38658 |  0:00:01s
epoch 12 | loss: 3.51503 | val_0_rmse: 5.46489 |  0:00:01s
epoch 13 | loss: 1.93477 | val_0_rmse: 4.62408 |  0:00:01s
epoch 14 | loss: 1.73054 | val_0_rmse: 4.29162 |  0:00:01s
epoch 15 | loss: 1.32698 | val_0_rmse: 3.89974 |  0:00:01s
epoch 16 | loss: 1.00989 | val_0_rmse: 3.82065 |  0:00

[I 2025-08-17 19:53:12,601] Trial 164 finished with value: 0.2468453145921003 and parameters: {'n_d': 39, 'n_a': 61, 'n_steps': 3, 'gamma': 1.5452717109198517, 'lambda_sparse': 0.006756249394420088}. Best is trial 163 with value: 0.19407104242042633.


epoch 98 | loss: 0.09219 | val_0_rmse: 0.26359 |  0:00:10s
epoch 99 | loss: 0.07352 | val_0_rmse: 0.24685 |  0:00:10s
Stop training because you reached max_epochs = 100 with best_epoch = 99 and best_val_0_rmse = 0.24685




epoch 0  | loss: 145.26596| val_0_rmse: 30.67391|  0:00:00s
epoch 1  | loss: 111.81228| val_0_rmse: 14.69103|  0:00:00s
epoch 2  | loss: 81.28529| val_0_rmse: 11.95867|  0:00:00s
epoch 3  | loss: 58.3425 | val_0_rmse: 10.49719|  0:00:00s
epoch 4  | loss: 37.75565| val_0_rmse: 10.03955|  0:00:00s
epoch 5  | loss: 24.66813| val_0_rmse: 11.11195|  0:00:00s
epoch 6  | loss: 14.44927| val_0_rmse: 12.53932|  0:00:00s
epoch 7  | loss: 9.32505 | val_0_rmse: 14.45212|  0:00:00s
epoch 8  | loss: 7.81902 | val_0_rmse: 16.15671|  0:00:00s
epoch 9  | loss: 7.18252 | val_0_rmse: 17.19465|  0:00:01s
epoch 10 | loss: 5.51669 | val_0_rmse: 14.68745|  0:00:01s
epoch 11 | loss: 3.00122 | val_0_rmse: 10.57404|  0:00:01s
epoch 12 | loss: 1.98008 | val_0_rmse: 8.08294 |  0:00:01s
epoch 13 | loss: 2.04428 | val_0_rmse: 6.06439 |  0:00:01s
epoch 14 | loss: 1.95225 | val_0_rmse: 5.96744 |  0:00:01s
epoch 15 | loss: 1.32944 | val_0_rmse: 6.43084 |  0:00:01s
epoch 16 | loss: 0.76645 | val_0_rmse: 5.7073  |  0:00

[I 2025-08-17 19:53:16,106] Trial 165 finished with value: 2.3457186714438523 and parameters: {'n_d': 39, 'n_a': 62, 'n_steps': 3, 'gamma': 1.5441029948629714, 'lambda_sparse': 0.0070942429471292845}. Best is trial 163 with value: 0.19407104242042633.


epoch 31 | loss: 0.20067 | val_0_rmse: 2.67139 |  0:00:03s

Early stopping occurred at epoch 31 with best_epoch = 21 and best_val_0_rmse = 2.34572




epoch 0  | loss: 112.5828| val_0_rmse: 11.97192|  0:00:00s
epoch 1  | loss: 88.16469| val_0_rmse: 9.60255 |  0:00:00s
epoch 2  | loss: 66.86758| val_0_rmse: 9.02992 |  0:00:00s
epoch 3  | loss: 44.93238| val_0_rmse: 10.33881|  0:00:00s
epoch 4  | loss: 33.20493| val_0_rmse: 8.73726 |  0:00:00s
epoch 5  | loss: 20.24666| val_0_rmse: 9.19339 |  0:00:00s
epoch 6  | loss: 13.0529 | val_0_rmse: 12.64022|  0:00:00s
epoch 7  | loss: 11.30969| val_0_rmse: 13.52206|  0:00:00s
epoch 8  | loss: 9.66215 | val_0_rmse: 12.7492 |  0:00:00s
epoch 9  | loss: 7.8141  | val_0_rmse: 11.05649|  0:00:01s
epoch 10 | loss: 5.74847 | val_0_rmse: 8.77537 |  0:00:01s
epoch 11 | loss: 3.41063 | val_0_rmse: 6.67705 |  0:00:01s
epoch 12 | loss: 2.62532 | val_0_rmse: 5.3303  |  0:00:01s
epoch 13 | loss: 2.12088 | val_0_rmse: 4.6509  |  0:00:01s
epoch 14 | loss: 1.31711 | val_0_rmse: 4.5628  |  0:00:01s
epoch 15 | loss: 1.05308 | val_0_rmse: 4.0202  |  0:00:01s
epoch 16 | loss: 0.99186 | val_0_rmse: 3.5563  |  0:00:0

[I 2025-08-17 19:53:23,877] Trial 166 finished with value: 0.5129753578326649 and parameters: {'n_d': 41, 'n_a': 60, 'n_steps': 3, 'gamma': 1.50660693537068, 'lambda_sparse': 0.0059820654139042355}. Best is trial 163 with value: 0.19407104242042633.


epoch 69 | loss: 0.11017 | val_0_rmse: 0.76595 |  0:00:07s
epoch 70 | loss: 0.11117 | val_0_rmse: 0.75108 |  0:00:07s

Early stopping occurred at epoch 70 with best_epoch = 60 and best_val_0_rmse = 0.51298




epoch 0  | loss: 191.78073| val_0_rmse: 16.52121|  0:00:00s
epoch 1  | loss: 145.53021| val_0_rmse: 11.01811|  0:00:00s
epoch 2  | loss: 112.5869| val_0_rmse: 9.57914 |  0:00:00s
epoch 3  | loss: 86.15493| val_0_rmse: 9.17467 |  0:00:00s
epoch 4  | loss: 64.53966| val_0_rmse: 9.38829 |  0:00:00s
epoch 5  | loss: 45.75941| val_0_rmse: 10.08987|  0:00:00s
epoch 6  | loss: 33.1774 | val_0_rmse: 15.77583|  0:00:00s
epoch 7  | loss: 19.67314| val_0_rmse: 18.41258|  0:00:00s
epoch 8  | loss: 11.36739| val_0_rmse: 20.7076 |  0:00:00s
epoch 9  | loss: 7.86795 | val_0_rmse: 21.06474|  0:00:01s
epoch 10 | loss: 6.78443 | val_0_rmse: 22.41517|  0:00:01s
epoch 11 | loss: 6.53637 | val_0_rmse: 21.83004|  0:00:01s
epoch 12 | loss: 4.67393 | val_0_rmse: 18.40749|  0:00:01s


[I 2025-08-17 19:53:25,504] Trial 167 finished with value: 9.174672901051615 and parameters: {'n_d': 42, 'n_a': 61, 'n_steps': 3, 'gamma': 1.5269439985753583, 'lambda_sparse': 0.0095236686685457}. Best is trial 163 with value: 0.19407104242042633.


epoch 13 | loss: 2.72585 | val_0_rmse: 13.97994|  0:00:01s

Early stopping occurred at epoch 13 with best_epoch = 3 and best_val_0_rmse = 9.17467




epoch 0  | loss: 170.70236| val_0_rmse: 17.40243|  0:00:00s
epoch 1  | loss: 130.92023| val_0_rmse: 13.8213 |  0:00:00s
epoch 2  | loss: 99.97104| val_0_rmse: 10.5716 |  0:00:00s
epoch 3  | loss: 76.72399| val_0_rmse: 7.66667 |  0:00:00s
epoch 4  | loss: 54.50246| val_0_rmse: 8.61721 |  0:00:00s
epoch 5  | loss: 39.22795| val_0_rmse: 9.98208 |  0:00:00s
epoch 6  | loss: 25.3425 | val_0_rmse: 11.05733|  0:00:00s
epoch 7  | loss: 14.53776| val_0_rmse: 12.14005|  0:00:00s
epoch 8  | loss: 7.39014 | val_0_rmse: 14.21051|  0:00:00s
epoch 9  | loss: 5.7765  | val_0_rmse: 14.78829|  0:00:01s
epoch 10 | loss: 7.16824 | val_0_rmse: 14.98669|  0:00:01s
epoch 11 | loss: 6.90329 | val_0_rmse: 14.06034|  0:00:01s
epoch 12 | loss: 4.75301 | val_0_rmse: 12.29356|  0:00:01s


[I 2025-08-17 19:53:27,073] Trial 168 finished with value: 7.666674971323242 and parameters: {'n_d': 40, 'n_a': 64, 'n_steps': 3, 'gamma': 1.47769780224639, 'lambda_sparse': 0.007744693549971041}. Best is trial 163 with value: 0.19407104242042633.


epoch 13 | loss: 2.48452 | val_0_rmse: 10.29872|  0:00:01s

Early stopping occurred at epoch 13 with best_epoch = 3 and best_val_0_rmse = 7.66667




epoch 0  | loss: 99.47707| val_0_rmse: 12.09033|  0:00:00s
epoch 1  | loss: 78.46265| val_0_rmse: 9.43082 |  0:00:00s
epoch 2  | loss: 55.48477| val_0_rmse: 7.76516 |  0:00:00s
epoch 3  | loss: 40.29814| val_0_rmse: 12.66371|  0:00:00s
epoch 4  | loss: 25.57932| val_0_rmse: 15.22019|  0:00:00s
epoch 5  | loss: 16.65077| val_0_rmse: 6.58129 |  0:00:00s
epoch 6  | loss: 12.80708| val_0_rmse: 6.50213 |  0:00:00s
epoch 7  | loss: 10.94987| val_0_rmse: 7.91463 |  0:00:00s
epoch 8  | loss: 9.83121 | val_0_rmse: 8.7534  |  0:00:00s
epoch 9  | loss: 7.43741 | val_0_rmse: 9.97081 |  0:00:01s
epoch 10 | loss: 5.20379 | val_0_rmse: 9.62169 |  0:00:01s
epoch 11 | loss: 3.11437 | val_0_rmse: 8.1693  |  0:00:01s
epoch 12 | loss: 2.73007 | val_0_rmse: 7.23639 |  0:00:01s
epoch 13 | loss: 1.85565 | val_0_rmse: 5.86006 |  0:00:01s
epoch 14 | loss: 1.51485 | val_0_rmse: 3.90818 |  0:00:01s
epoch 15 | loss: 1.19449 | val_0_rmse: 3.19609 |  0:00:01s
epoch 16 | loss: 1.1893  | val_0_rmse: 3.3483  |  0:00:0

[I 2025-08-17 19:53:37,650] Trial 169 finished with value: 0.5365058805692816 and parameters: {'n_d': 38, 'n_a': 57, 'n_steps': 3, 'gamma': 1.5939410341600202, 'lambda_sparse': 0.0051179355881567435}. Best is trial 163 with value: 0.19407104242042633.


epoch 99 | loss: 0.13392 | val_0_rmse: 0.74723 |  0:00:10s
Stop training because you reached max_epochs = 100 with best_epoch = 96 and best_val_0_rmse = 0.53651




epoch 0  | loss: 127.65848| val_0_rmse: 25.38753|  0:00:00s
epoch 1  | loss: 103.29499| val_0_rmse: 18.73456|  0:00:00s
epoch 2  | loss: 81.39696| val_0_rmse: 15.60687|  0:00:00s
epoch 3  | loss: 63.1286 | val_0_rmse: 11.50026|  0:00:00s
epoch 4  | loss: 49.87653| val_0_rmse: 10.50135|  0:00:00s
epoch 5  | loss: 35.27377| val_0_rmse: 9.02354 |  0:00:00s
epoch 6  | loss: 24.26563| val_0_rmse: 8.97701 |  0:00:00s
epoch 7  | loss: 18.37429| val_0_rmse: 8.72983 |  0:00:00s
epoch 8  | loss: 14.7352 | val_0_rmse: 9.63432 |  0:00:00s
epoch 9  | loss: 12.02301| val_0_rmse: 10.69581|  0:00:01s
epoch 10 | loss: 11.16012| val_0_rmse: 9.69602 |  0:00:01s
epoch 11 | loss: 11.51846| val_0_rmse: 9.10514 |  0:00:01s
epoch 12 | loss: 9.66618 | val_0_rmse: 7.9262  |  0:00:01s
epoch 13 | loss: 6.25621 | val_0_rmse: 7.7491  |  0:00:01s
epoch 14 | loss: 3.93577 | val_0_rmse: 6.03429 |  0:00:01s
epoch 15 | loss: 2.67099 | val_0_rmse: 4.93085 |  0:00:01s
epoch 16 | loss: 2.13762 | val_0_rmse: 4.19146 |  0:00

[I 2025-08-17 19:53:45,831] Trial 170 finished with value: 0.5319784241323408 and parameters: {'n_d': 36, 'n_a': 43, 'n_steps': 3, 'gamma': 1.5328762857114593, 'lambda_sparse': 0.006630516219420911}. Best is trial 163 with value: 0.19407104242042633.


epoch 78 | loss: 0.08235 | val_0_rmse: 0.59635 |  0:00:07s
epoch 79 | loss: 0.10522 | val_0_rmse: 0.60292 |  0:00:08s

Early stopping occurred at epoch 79 with best_epoch = 69 and best_val_0_rmse = 0.53198




epoch 0  | loss: 126.60757| val_0_rmse: 16.84662|  0:00:00s
epoch 1  | loss: 96.78802| val_0_rmse: 15.35156|  0:00:00s
epoch 2  | loss: 71.0701 | val_0_rmse: 16.31554|  0:00:00s
epoch 3  | loss: 52.52517| val_0_rmse: 13.80445|  0:00:00s
epoch 4  | loss: 33.76307| val_0_rmse: 14.52154|  0:00:00s
epoch 5  | loss: 21.1743 | val_0_rmse: 11.04951|  0:00:00s
epoch 6  | loss: 14.73937| val_0_rmse: 10.31232|  0:00:00s
epoch 7  | loss: 11.91881| val_0_rmse: 8.05424 |  0:00:00s
epoch 8  | loss: 10.43051| val_0_rmse: 10.4085 |  0:00:00s
epoch 9  | loss: 9.81654 | val_0_rmse: 8.45421 |  0:00:01s
epoch 10 | loss: 6.58966 | val_0_rmse: 6.54226 |  0:00:01s
epoch 11 | loss: 4.91797 | val_0_rmse: 4.54483 |  0:00:01s
epoch 12 | loss: 2.90633 | val_0_rmse: 4.09788 |  0:00:01s
epoch 13 | loss: 2.76573 | val_0_rmse: 3.90026 |  0:00:01s
epoch 14 | loss: 2.44196 | val_0_rmse: 2.90813 |  0:00:01s
epoch 15 | loss: 1.49819 | val_0_rmse: 2.48793 |  0:00:01s
epoch 16 | loss: 1.15379 | val_0_rmse: 2.01604 |  0:00:

[I 2025-08-17 19:53:54,661] Trial 171 finished with value: 0.3730928375646091 and parameters: {'n_d': 38, 'n_a': 60, 'n_steps': 3, 'gamma': 1.5756098971569816, 'lambda_sparse': 0.012175846898347407}. Best is trial 163 with value: 0.19407104242042633.


epoch 81 | loss: 0.09484 | val_0_rmse: 0.44857 |  0:00:08s
epoch 82 | loss: 0.07923 | val_0_rmse: 0.44485 |  0:00:08s

Early stopping occurred at epoch 82 with best_epoch = 72 and best_val_0_rmse = 0.37309




epoch 0  | loss: 125.18971| val_0_rmse: 9.09178 |  0:00:00s
epoch 1  | loss: 99.98815| val_0_rmse: 9.23889 |  0:00:00s
epoch 2  | loss: 81.2298 | val_0_rmse: 8.08902 |  0:00:00s
epoch 3  | loss: 63.17062| val_0_rmse: 6.93178 |  0:00:00s
epoch 4  | loss: 47.26597| val_0_rmse: 6.59778 |  0:00:00s
epoch 5  | loss: 35.87265| val_0_rmse: 8.10955 |  0:00:00s
epoch 6  | loss: 24.26425| val_0_rmse: 8.3252  |  0:00:00s
epoch 7  | loss: 16.57926| val_0_rmse: 9.67138 |  0:00:00s
epoch 8  | loss: 12.49038| val_0_rmse: 10.90333|  0:00:00s
epoch 9  | loss: 8.61481 | val_0_rmse: 9.78216 |  0:00:01s
epoch 10 | loss: 7.88466 | val_0_rmse: 7.85399 |  0:00:01s
epoch 11 | loss: 7.02811 | val_0_rmse: 7.70991 |  0:00:01s
epoch 12 | loss: 5.13607 | val_0_rmse: 6.21224 |  0:00:01s
epoch 13 | loss: 3.37007 | val_0_rmse: 5.31281 |  0:00:01s
epoch 14 | loss: 2.10954 | val_0_rmse: 5.07082 |  0:00:01s
epoch 15 | loss: 1.91601 | val_0_rmse: 4.46832 |  0:00:01s
epoch 16 | loss: 2.03664 | val_0_rmse: 4.1863  |  0:00:

[I 2025-08-17 19:54:01,441] Trial 172 finished with value: 0.5418436838329351 and parameters: {'n_d': 34, 'n_a': 58, 'n_steps': 3, 'gamma': 1.6247705456413941, 'lambda_sparse': 0.00426249107527843}. Best is trial 163 with value: 0.19407104242042633.


epoch 62 | loss: 0.08958 | val_0_rmse: 0.63775 |  0:00:06s

Early stopping occurred at epoch 62 with best_epoch = 52 and best_val_0_rmse = 0.54184




epoch 0  | loss: 122.92481| val_0_rmse: 9.2494  |  0:00:00s
epoch 1  | loss: 94.80803| val_0_rmse: 9.45054 |  0:00:00s
epoch 2  | loss: 72.57216| val_0_rmse: 9.62707 |  0:00:00s
epoch 3  | loss: 54.02597| val_0_rmse: 9.485   |  0:00:00s
epoch 4  | loss: 36.50568| val_0_rmse: 9.95894 |  0:00:00s
epoch 5  | loss: 24.86571| val_0_rmse: 10.63355|  0:00:00s
epoch 6  | loss: 16.67745| val_0_rmse: 12.27335|  0:00:00s
epoch 7  | loss: 10.16656| val_0_rmse: 12.10979|  0:00:00s
epoch 8  | loss: 9.58103 | val_0_rmse: 11.25585|  0:00:00s
epoch 9  | loss: 8.32154 | val_0_rmse: 9.01777 |  0:00:01s
epoch 10 | loss: 5.90501 | val_0_rmse: 7.85163 |  0:00:01s
epoch 11 | loss: 3.61239 | val_0_rmse: 6.08643 |  0:00:01s
epoch 12 | loss: 2.392   | val_0_rmse: 5.0492  |  0:00:01s
epoch 13 | loss: 2.3344  | val_0_rmse: 4.55886 |  0:00:01s
epoch 14 | loss: 3.53863 | val_0_rmse: 3.99282 |  0:00:01s
epoch 15 | loss: 3.57621 | val_0_rmse: 3.65445 |  0:00:01s
epoch 16 | loss: 1.54369 | val_0_rmse: 3.50823 |  0:00:

[I 2025-08-17 19:54:12,442] Trial 173 finished with value: 0.3892716272710465 and parameters: {'n_d': 37, 'n_a': 57, 'n_steps': 3, 'gamma': 1.670439961218935, 'lambda_sparse': 0.005817631801105855}. Best is trial 163 with value: 0.19407104242042633.


epoch 99 | loss: 0.06745 | val_0_rmse: 0.40795 |  0:00:10s
Stop training because you reached max_epochs = 100 with best_epoch = 94 and best_val_0_rmse = 0.38927




epoch 0  | loss: 138.23318| val_0_rmse: 12.64064|  0:00:00s
epoch 1  | loss: 109.96201| val_0_rmse: 10.71833|  0:00:00s
epoch 2  | loss: 86.94922| val_0_rmse: 11.63152|  0:00:00s
epoch 3  | loss: 65.68523| val_0_rmse: 12.41697|  0:00:00s
epoch 4  | loss: 51.23837| val_0_rmse: 13.45998|  0:00:00s
epoch 5  | loss: 33.38829| val_0_rmse: 16.48362|  0:00:00s
epoch 6  | loss: 23.1531 | val_0_rmse: 20.82676|  0:00:00s
epoch 7  | loss: 15.22503| val_0_rmse: 20.48856|  0:00:00s
epoch 8  | loss: 9.61039 | val_0_rmse: 19.76177|  0:00:00s
epoch 9  | loss: 7.95983 | val_0_rmse: 17.7422 |  0:00:01s
epoch 10 | loss: 7.47623 | val_0_rmse: 15.62061|  0:00:01s


[I 2025-08-17 19:54:13,776] Trial 174 finished with value: 10.718326198158463 and parameters: {'n_d': 35, 'n_a': 62, 'n_steps': 3, 'gamma': 1.554341700520705, 'lambda_sparse': 0.00864261919063369}. Best is trial 163 with value: 0.19407104242042633.


epoch 11 | loss: 7.04641 | val_0_rmse: 14.01034|  0:00:01s

Early stopping occurred at epoch 11 with best_epoch = 1 and best_val_0_rmse = 10.71833




epoch 0  | loss: 209.88928| val_0_rmse: 15.60601|  0:00:00s
epoch 1  | loss: 165.19048| val_0_rmse: 12.55733|  0:00:00s
epoch 2  | loss: 133.48274| val_0_rmse: 11.01088|  0:00:00s
epoch 3  | loss: 107.42847| val_0_rmse: 9.70599 |  0:00:00s
epoch 4  | loss: 85.88675| val_0_rmse: 9.2822  |  0:00:00s
epoch 5  | loss: 68.60073| val_0_rmse: 11.26388|  0:00:00s
epoch 6  | loss: 52.66648| val_0_rmse: 9.47039 |  0:00:00s
epoch 7  | loss: 38.49034| val_0_rmse: 9.05761 |  0:00:00s
epoch 8  | loss: 25.70511| val_0_rmse: 8.97607 |  0:00:01s
epoch 9  | loss: 16.23765| val_0_rmse: 9.59108 |  0:00:01s
epoch 10 | loss: 9.1563  | val_0_rmse: 9.95748 |  0:00:01s
epoch 11 | loss: 5.46534 | val_0_rmse: 10.294  |  0:00:01s
epoch 12 | loss: 5.04418 | val_0_rmse: 9.02197 |  0:00:01s
epoch 13 | loss: 6.07762 | val_0_rmse: 7.98395 |  0:00:01s
epoch 14 | loss: 5.66383 | val_0_rmse: 7.06659 |  0:00:01s
epoch 15 | loss: 4.50021 | val_0_rmse: 5.8082  |  0:00:01s
epoch 16 | loss: 2.62267 | val_0_rmse: 4.47498 |  0:

[I 2025-08-17 19:54:22,122] Trial 175 finished with value: 0.3891716740352429 and parameters: {'n_d': 40, 'n_a': 59, 'n_steps': 3, 'gamma': 1.6449686887624897, 'lambda_sparse': 0.004425221326966841}. Best is trial 163 with value: 0.19407104242042633.


epoch 74 | loss: 0.08161 | val_0_rmse: 0.39112 |  0:00:08s
epoch 75 | loss: 0.0837  | val_0_rmse: 0.42036 |  0:00:08s

Early stopping occurred at epoch 75 with best_epoch = 65 and best_val_0_rmse = 0.38917




epoch 0  | loss: 131.17908| val_0_rmse: 9.63596 |  0:00:00s
epoch 1  | loss: 113.27638| val_0_rmse: 10.06201|  0:00:00s
epoch 2  | loss: 98.40524| val_0_rmse: 9.20917 |  0:00:00s
epoch 3  | loss: 84.43117| val_0_rmse: 7.19179 |  0:00:00s
epoch 4  | loss: 70.04533| val_0_rmse: 6.72294 |  0:00:00s
epoch 5  | loss: 57.98544| val_0_rmse: 6.47289 |  0:00:00s
epoch 6  | loss: 49.01341| val_0_rmse: 6.27776 |  0:00:00s
epoch 7  | loss: 37.73046| val_0_rmse: 6.58235 |  0:00:00s
epoch 8  | loss: 29.30618| val_0_rmse: 6.36813 |  0:00:00s
epoch 9  | loss: 20.55578| val_0_rmse: 6.84749 |  0:00:01s
epoch 10 | loss: 14.73973| val_0_rmse: 7.7992  |  0:00:01s
epoch 11 | loss: 8.65439 | val_0_rmse: 8.78833 |  0:00:01s
epoch 12 | loss: 5.90566 | val_0_rmse: 10.19021|  0:00:01s
epoch 13 | loss: 4.20589 | val_0_rmse: 10.65449|  0:00:01s
epoch 14 | loss: 4.37519 | val_0_rmse: 10.45469|  0:00:01s
epoch 15 | loss: 4.11235 | val_0_rmse: 9.42057 |  0:00:01s


[I 2025-08-17 19:54:23,947] Trial 176 finished with value: 6.277756184007592 and parameters: {'n_d': 12, 'n_a': 61, 'n_steps': 3, 'gamma': 1.6034995116776527, 'lambda_sparse': 0.0036845207006384424}. Best is trial 163 with value: 0.19407104242042633.


epoch 16 | loss: 4.03352 | val_0_rmse: 8.80058 |  0:00:01s

Early stopping occurred at epoch 16 with best_epoch = 6 and best_val_0_rmse = 6.27776




epoch 0  | loss: 116.99531| val_0_rmse: 9.8368  |  0:00:00s
epoch 1  | loss: 95.7721 | val_0_rmse: 9.41221 |  0:00:00s
epoch 2  | loss: 74.91307| val_0_rmse: 11.11079|  0:00:00s
epoch 3  | loss: 57.95101| val_0_rmse: 12.11444|  0:00:00s
epoch 4  | loss: 44.8526 | val_0_rmse: 12.56294|  0:00:00s
epoch 5  | loss: 31.51943| val_0_rmse: 12.74436|  0:00:00s
epoch 6  | loss: 22.92278| val_0_rmse: 13.47207|  0:00:00s
epoch 7  | loss: 13.60124| val_0_rmse: 11.6042 |  0:00:00s
epoch 8  | loss: 10.63261| val_0_rmse: 11.8141 |  0:00:00s
epoch 9  | loss: 9.52557 | val_0_rmse: 9.98519 |  0:00:00s
epoch 10 | loss: 8.7521  | val_0_rmse: 10.09705|  0:00:01s
epoch 11 | loss: 7.21716 | val_0_rmse: 8.82175 |  0:00:01s
epoch 12 | loss: 5.65424 | val_0_rmse: 7.77132 |  0:00:01s
epoch 13 | loss: 3.40773 | val_0_rmse: 6.51543 |  0:00:01s
epoch 14 | loss: 2.24026 | val_0_rmse: 5.71988 |  0:00:01s
epoch 15 | loss: 2.10974 | val_0_rmse: 5.03713 |  0:00:01s
epoch 16 | loss: 1.61064 | val_0_rmse: 5.39545 |  0:00:

[I 2025-08-17 19:54:32,457] Trial 177 finished with value: 0.515399085690102 and parameters: {'n_d': 32, 'n_a': 56, 'n_steps': 3, 'gamma': 1.7665489452215588, 'lambda_sparse': 0.0028376983003538607}. Best is trial 163 with value: 0.19407104242042633.


epoch 84 | loss: 0.13961 | val_0_rmse: 0.5584  |  0:00:08s

Early stopping occurred at epoch 84 with best_epoch = 74 and best_val_0_rmse = 0.5154




epoch 0  | loss: 162.79744| val_0_rmse: 13.95953|  0:00:00s
epoch 1  | loss: 129.1716| val_0_rmse: 10.47769|  0:00:00s
epoch 2  | loss: 104.36729| val_0_rmse: 9.13702 |  0:00:00s
epoch 3  | loss: 81.48002| val_0_rmse: 8.35032 |  0:00:00s
epoch 4  | loss: 63.59379| val_0_rmse: 7.2558  |  0:00:00s
epoch 5  | loss: 49.18166| val_0_rmse: 6.80583 |  0:00:00s
epoch 6  | loss: 33.94342| val_0_rmse: 7.51039 |  0:00:00s
epoch 7  | loss: 24.29284| val_0_rmse: 6.92421 |  0:00:00s
epoch 8  | loss: 13.85493| val_0_rmse: 6.78147 |  0:00:00s
epoch 9  | loss: 7.68568 | val_0_rmse: 8.24086 |  0:00:01s
epoch 10 | loss: 6.59279 | val_0_rmse: 8.35037 |  0:00:01s
epoch 11 | loss: 5.81446 | val_0_rmse: 9.0742  |  0:00:01s
epoch 12 | loss: 6.3955  | val_0_rmse: 8.02402 |  0:00:01s
epoch 13 | loss: 5.0994  | val_0_rmse: 6.73441 |  0:00:01s
epoch 14 | loss: 2.95218 | val_0_rmse: 5.09549 |  0:00:01s
epoch 15 | loss: 1.68318 | val_0_rmse: 4.25809 |  0:00:01s
epoch 16 | loss: 1.1079  | val_0_rmse: 4.60257 |  0:00

[I 2025-08-17 19:54:39,068] Trial 178 finished with value: 0.5451276705109018 and parameters: {'n_d': 31, 'n_a': 60, 'n_steps': 3, 'gamma': 1.5882281171075174, 'lambda_sparse': 0.010899487594386859}. Best is trial 163 with value: 0.19407104242042633.


epoch 63 | loss: 0.09502 | val_0_rmse: 0.63774 |  0:00:06s

Early stopping occurred at epoch 63 with best_epoch = 53 and best_val_0_rmse = 0.54513




epoch 0  | loss: 163.62921| val_0_rmse: 12.42593|  0:00:00s
epoch 1  | loss: 128.25859| val_0_rmse: 9.20912 |  0:00:00s
epoch 2  | loss: 98.5134 | val_0_rmse: 8.43771 |  0:00:00s
epoch 3  | loss: 74.14365| val_0_rmse: 8.28291 |  0:00:00s
epoch 4  | loss: 52.72648| val_0_rmse: 8.07513 |  0:00:00s
epoch 5  | loss: 37.35244| val_0_rmse: 8.12796 |  0:00:00s
epoch 6  | loss: 26.42806| val_0_rmse: 7.91183 |  0:00:00s
epoch 7  | loss: 16.09573| val_0_rmse: 7.45912 |  0:00:01s
epoch 8  | loss: 11.99376| val_0_rmse: 7.83026 |  0:00:01s
epoch 9  | loss: 11.53918| val_0_rmse: 8.19712 |  0:00:01s
epoch 10 | loss: 10.99389| val_0_rmse: 7.90005 |  0:00:01s
epoch 11 | loss: 10.48186| val_0_rmse: 7.17292 |  0:00:01s
epoch 12 | loss: 7.28388 | val_0_rmse: 6.76026 |  0:00:01s
epoch 13 | loss: 5.21566 | val_0_rmse: 5.6115  |  0:00:01s
epoch 14 | loss: 3.32812 | val_0_rmse: 5.33173 |  0:00:01s
epoch 15 | loss: 2.90438 | val_0_rmse: 4.81245 |  0:00:02s
epoch 16 | loss: 2.71208 | val_0_rmse: 4.88328 |  0:00

[I 2025-08-17 19:54:52,574] Trial 179 finished with value: 0.34722485280568466 and parameters: {'n_d': 37, 'n_a': 58, 'n_steps': 4, 'gamma': 1.7921393530055032, 'lambda_sparse': 0.0031825939744797643}. Best is trial 163 with value: 0.19407104242042633.


epoch 0  | loss: 142.08086| val_0_rmse: 12.16454|  0:00:00s
epoch 1  | loss: 109.69576| val_0_rmse: 9.86553 |  0:00:00s
epoch 2  | loss: 83.31555| val_0_rmse: 8.25824 |  0:00:00s
epoch 3  | loss: 63.17818| val_0_rmse: 8.28256 |  0:00:00s
epoch 4  | loss: 44.41893| val_0_rmse: 7.2645  |  0:00:00s
epoch 5  | loss: 31.70897| val_0_rmse: 5.4366  |  0:00:00s
epoch 6  | loss: 20.42756| val_0_rmse: 5.13248 |  0:00:00s
epoch 7  | loss: 14.277  | val_0_rmse: 5.96688 |  0:00:00s
epoch 8  | loss: 9.79811 | val_0_rmse: 5.91152 |  0:00:00s
epoch 9  | loss: 8.61735 | val_0_rmse: 6.1766  |  0:00:01s
epoch 10 | loss: 8.54197 | val_0_rmse: 6.13557 |  0:00:01s
epoch 11 | loss: 6.9579  | val_0_rmse: 5.51741 |  0:00:01s
epoch 12 | loss: 4.75244 | val_0_rmse: 5.02607 |  0:00:01s
epoch 13 | loss: 2.75892 | val_0_rmse: 4.36621 |  0:00:01s
epoch 14 | loss: 2.21407 | val_0_rmse: 3.69451 |  0:00:01s
epoch 15 | loss: 2.3131  | val_0_rmse: 3.52571 |  0:00:01s
epoch 16 | loss: 1.98883 | val_0_rmse: 3.3487  |  0:00

[I 2025-08-17 19:55:01,381] Trial 180 finished with value: 0.35832696502413286 and parameters: {'n_d': 33, 'n_a': 38, 'n_steps': 3, 'gamma': 1.694127272378058, 'lambda_sparse': 0.006739265240643606}. Best is trial 163 with value: 0.19407104242042633.


epoch 87 | loss: 0.09656 | val_0_rmse: 0.48203 |  0:00:08s

Early stopping occurred at epoch 87 with best_epoch = 77 and best_val_0_rmse = 0.35833




epoch 0  | loss: 133.70132| val_0_rmse: 11.01956|  0:00:00s
epoch 1  | loss: 105.88039| val_0_rmse: 7.61082 |  0:00:00s
epoch 2  | loss: 81.53247| val_0_rmse: 6.8563  |  0:00:00s
epoch 3  | loss: 59.14711| val_0_rmse: 6.92035 |  0:00:00s
epoch 4  | loss: 42.83615| val_0_rmse: 7.05418 |  0:00:00s
epoch 5  | loss: 28.58744| val_0_rmse: 6.6076  |  0:00:00s
epoch 6  | loss: 17.24618| val_0_rmse: 8.7209  |  0:00:00s
epoch 7  | loss: 10.08467| val_0_rmse: 11.45098|  0:00:00s
epoch 8  | loss: 8.18896 | val_0_rmse: 10.90886|  0:00:00s
epoch 9  | loss: 6.83783 | val_0_rmse: 7.83627 |  0:00:00s
epoch 10 | loss: 5.31172 | val_0_rmse: 7.32159 |  0:00:01s
epoch 11 | loss: 3.91732 | val_0_rmse: 6.70399 |  0:00:01s
epoch 12 | loss: 2.00461 | val_0_rmse: 5.66894 |  0:00:01s
epoch 13 | loss: 1.58658 | val_0_rmse: 4.98797 |  0:00:01s
epoch 14 | loss: 1.39681 | val_0_rmse: 4.12982 |  0:00:01s
epoch 15 | loss: 1.02935 | val_0_rmse: 3.69106 |  0:00:01s
epoch 16 | loss: 0.93537 | val_0_rmse: 3.29183 |  0:00

[I 2025-08-17 19:55:09,527] Trial 181 finished with value: 0.3192316116353148 and parameters: {'n_d': 35, 'n_a': 54, 'n_steps': 3, 'gamma': 1.5186713033140546, 'lambda_sparse': 0.0020940283258640527}. Best is trial 163 with value: 0.19407104242042633.


epoch 79 | loss: 0.24516 | val_0_rmse: 0.46398 |  0:00:08s

Early stopping occurred at epoch 79 with best_epoch = 69 and best_val_0_rmse = 0.31923




epoch 0  | loss: 132.16068| val_0_rmse: 19.78826|  0:00:00s
epoch 1  | loss: 97.38102| val_0_rmse: 13.61673|  0:00:00s
epoch 2  | loss: 68.32674| val_0_rmse: 10.30021|  0:00:00s
epoch 3  | loss: 43.28317| val_0_rmse: 18.09591|  0:00:00s
epoch 4  | loss: 29.02281| val_0_rmse: 13.8048 |  0:00:00s
epoch 5  | loss: 22.27421| val_0_rmse: 13.56057|  0:00:00s
epoch 6  | loss: 13.08815| val_0_rmse: 10.64017|  0:00:00s
epoch 7  | loss: 12.70116| val_0_rmse: 10.65229|  0:00:00s
epoch 8  | loss: 10.36316| val_0_rmse: 7.54671 |  0:00:00s
epoch 9  | loss: 7.91082 | val_0_rmse: 6.19136 |  0:00:01s
epoch 10 | loss: 3.91057 | val_0_rmse: 4.38246 |  0:00:01s
epoch 11 | loss: 2.29339 | val_0_rmse: 3.81954 |  0:00:01s
epoch 12 | loss: 2.09225 | val_0_rmse: 3.30086 |  0:00:01s
epoch 13 | loss: 1.50496 | val_0_rmse: 3.52627 |  0:00:01s
epoch 14 | loss: 0.96256 | val_0_rmse: 4.00507 |  0:00:01s
epoch 15 | loss: 0.91316 | val_0_rmse: 2.93801 |  0:00:01s
epoch 16 | loss: 0.7878  | val_0_rmse: 2.37752 |  0:00:

[I 2025-08-17 19:55:16,041] Trial 182 finished with value: 0.45239102060356684 and parameters: {'n_d': 44, 'n_a': 55, 'n_steps': 3, 'gamma': 1.5665680373270252, 'lambda_sparse': 0.0023796765379549907}. Best is trial 163 with value: 0.19407104242042633.


epoch 0  | loss: 126.3051| val_0_rmse: 15.3782 |  0:00:00s
epoch 1  | loss: 102.38305| val_0_rmse: 8.54802 |  0:00:00s
epoch 2  | loss: 81.24883| val_0_rmse: 7.24552 |  0:00:00s
epoch 3  | loss: 60.85613| val_0_rmse: 7.62622 |  0:00:00s
epoch 4  | loss: 42.89685| val_0_rmse: 8.56272 |  0:00:00s
epoch 5  | loss: 29.52916| val_0_rmse: 11.28283|  0:00:00s
epoch 6  | loss: 17.92421| val_0_rmse: 13.37326|  0:00:00s
epoch 7  | loss: 10.99251| val_0_rmse: 18.12923|  0:00:00s
epoch 8  | loss: 8.41912 | val_0_rmse: 21.59506|  0:00:00s
epoch 9  | loss: 9.40775 | val_0_rmse: 22.04984|  0:00:00s
epoch 10 | loss: 10.22072| val_0_rmse: 20.89139|  0:00:01s
epoch 11 | loss: 8.16324 | val_0_rmse: 17.19093|  0:00:01s


[I 2025-08-17 19:55:17,461] Trial 183 finished with value: 7.245519266044036 and parameters: {'n_d': 39, 'n_a': 52, 'n_steps': 3, 'gamma': 1.5471648793500679, 'lambda_sparse': 0.0018775214373063687}. Best is trial 163 with value: 0.19407104242042633.


epoch 12 | loss: 4.59182 | val_0_rmse: 14.5419 |  0:00:01s

Early stopping occurred at epoch 12 with best_epoch = 2 and best_val_0_rmse = 7.24552




epoch 0  | loss: 166.6292| val_0_rmse: 13.1131 |  0:00:00s
epoch 1  | loss: 141.53886| val_0_rmse: 10.82327|  0:00:00s
epoch 2  | loss: 119.90334| val_0_rmse: 9.71708 |  0:00:00s
epoch 3  | loss: 102.39509| val_0_rmse: 9.24334 |  0:00:00s
epoch 4  | loss: 86.99446| val_0_rmse: 8.68203 |  0:00:00s
epoch 5  | loss: 73.02802| val_0_rmse: 8.02815 |  0:00:00s
epoch 6  | loss: 56.62411| val_0_rmse: 7.28391 |  0:00:00s
epoch 7  | loss: 43.25556| val_0_rmse: 6.52257 |  0:00:00s
epoch 8  | loss: 34.74026| val_0_rmse: 6.05984 |  0:00:00s
epoch 9  | loss: 24.46521| val_0_rmse: 6.09158 |  0:00:01s
epoch 10 | loss: 15.76336| val_0_rmse: 6.39563 |  0:00:01s
epoch 11 | loss: 10.99499| val_0_rmse: 6.65956 |  0:00:01s
epoch 12 | loss: 7.62602 | val_0_rmse: 5.85925 |  0:00:01s
epoch 13 | loss: 8.07722 | val_0_rmse: 5.41138 |  0:00:01s
epoch 14 | loss: 8.03469 | val_0_rmse: 5.00822 |  0:00:01s
epoch 15 | loss: 8.07381 | val_0_rmse: 4.69417 |  0:00:01s
epoch 16 | loss: 7.36453 | val_0_rmse: 3.96307 |  0:0

[I 2025-08-17 19:55:26,479] Trial 184 finished with value: 0.32207155595036113 and parameters: {'n_d': 30, 'n_a': 56, 'n_steps': 3, 'gamma': 1.6160404653338818, 'lambda_sparse': 0.002751641988588813}. Best is trial 163 with value: 0.19407104242042633.


epoch 85 | loss: 0.08968 | val_0_rmse: 0.34899 |  0:00:08s
epoch 86 | loss: 0.08091 | val_0_rmse: 0.365   |  0:00:08s

Early stopping occurred at epoch 86 with best_epoch = 76 and best_val_0_rmse = 0.32207




epoch 0  | loss: 163.5761| val_0_rmse: 12.63416|  0:00:00s
epoch 1  | loss: 133.31624| val_0_rmse: 11.02568|  0:00:00s
epoch 2  | loss: 110.2635| val_0_rmse: 10.31192|  0:00:00s
epoch 3  | loss: 88.57439| val_0_rmse: 9.74016 |  0:00:00s
epoch 4  | loss: 70.97681| val_0_rmse: 9.21515 |  0:00:00s
epoch 5  | loss: 57.48856| val_0_rmse: 6.52032 |  0:00:00s
epoch 6  | loss: 46.0391 | val_0_rmse: 6.68395 |  0:00:00s
epoch 7  | loss: 36.502  | val_0_rmse: 8.51494 |  0:00:00s
epoch 8  | loss: 24.83015| val_0_rmse: 10.93777|  0:00:00s
epoch 9  | loss: 17.06996| val_0_rmse: 11.94725|  0:00:00s
epoch 10 | loss: 12.23506| val_0_rmse: 11.60932|  0:00:01s
epoch 11 | loss: 9.76254 | val_0_rmse: 10.80535|  0:00:01s
epoch 12 | loss: 8.18637 | val_0_rmse: 10.6651 |  0:00:01s
epoch 13 | loss: 7.21336 | val_0_rmse: 9.86078 |  0:00:01s
epoch 14 | loss: 6.62298 | val_0_rmse: 8.85985 |  0:00:01s


[I 2025-08-17 19:55:28,192] Trial 185 finished with value: 6.520321426527865 and parameters: {'n_d': 28, 'n_a': 53, 'n_steps': 3, 'gamma': 1.5006006377322398, 'lambda_sparse': 0.0015963394256345797}. Best is trial 163 with value: 0.19407104242042633.


epoch 15 | loss: 5.17376 | val_0_rmse: 7.39769 |  0:00:01s

Early stopping occurred at epoch 15 with best_epoch = 5 and best_val_0_rmse = 6.52032




epoch 0  | loss: 135.39406| val_0_rmse: 9.37177 |  0:00:00s
epoch 1  | loss: 89.04944| val_0_rmse: 8.27689 |  0:00:00s
epoch 2  | loss: 58.45047| val_0_rmse: 7.87773 |  0:00:00s
epoch 3  | loss: 35.19965| val_0_rmse: 9.45412 |  0:00:00s
epoch 4  | loss: 23.42116| val_0_rmse: 10.60484|  0:00:00s
epoch 5  | loss: 14.9916 | val_0_rmse: 10.01781|  0:00:00s
epoch 6  | loss: 15.83179| val_0_rmse: 8.24646 |  0:00:00s
epoch 7  | loss: 14.55526| val_0_rmse: 8.53699 |  0:00:00s
epoch 8  | loss: 11.4503 | val_0_rmse: 7.44977 |  0:00:00s
epoch 9  | loss: 6.60315 | val_0_rmse: 7.37719 |  0:00:01s
epoch 10 | loss: 3.53764 | val_0_rmse: 6.61999 |  0:00:01s
epoch 11 | loss: 3.49071 | val_0_rmse: 5.56114 |  0:00:01s
epoch 12 | loss: 2.45052 | val_0_rmse: 6.51954 |  0:00:01s
epoch 13 | loss: 1.51019 | val_0_rmse: 6.77518 |  0:00:01s
epoch 14 | loss: 1.24693 | val_0_rmse: 5.41716 |  0:00:01s
epoch 15 | loss: 1.21245 | val_0_rmse: 4.41142 |  0:00:01s
epoch 16 | loss: 0.9801  | val_0_rmse: 3.65931 |  0:00:

[I 2025-08-17 19:55:39,097] Trial 186 finished with value: 0.30922140806054793 and parameters: {'n_d': 57, 'n_a': 46, 'n_steps': 3, 'gamma': 1.63209538879986, 'lambda_sparse': 0.00381616778677567}. Best is trial 163 with value: 0.19407104242042633.


epoch 98 | loss: 0.0614  | val_0_rmse: 0.30922 |  0:00:10s
epoch 99 | loss: 0.0565  | val_0_rmse: 0.32482 |  0:00:10s
Stop training because you reached max_epochs = 100 with best_epoch = 98 and best_val_0_rmse = 0.30922




epoch 0  | loss: 166.29759| val_0_rmse: 10.53939|  0:00:00s
epoch 1  | loss: 127.93172| val_0_rmse: 9.2473  |  0:00:00s
epoch 2  | loss: 99.38498| val_0_rmse: 8.49597 |  0:00:00s
epoch 3  | loss: 75.36411| val_0_rmse: 6.80067 |  0:00:00s
epoch 4  | loss: 57.35529| val_0_rmse: 6.37007 |  0:00:00s
epoch 5  | loss: 38.96993| val_0_rmse: 5.58932 |  0:00:00s
epoch 6  | loss: 28.30346| val_0_rmse: 5.60088 |  0:00:00s
epoch 7  | loss: 17.20365| val_0_rmse: 7.92965 |  0:00:00s
epoch 8  | loss: 10.30613| val_0_rmse: 10.44797|  0:00:00s
epoch 9  | loss: 7.7702  | val_0_rmse: 13.16495|  0:00:01s
epoch 10 | loss: 8.08976 | val_0_rmse: 15.57805|  0:00:01s
epoch 11 | loss: 7.86343 | val_0_rmse: 14.23008|  0:00:01s
epoch 12 | loss: 5.54895 | val_0_rmse: 12.28363|  0:00:01s
epoch 13 | loss: 3.39895 | val_0_rmse: 9.01752 |  0:00:01s
epoch 14 | loss: 1.65068 | val_0_rmse: 6.43253 |  0:00:01s
epoch 15 | loss: 1.65619 | val_0_rmse: 4.75947 |  0:00:01s
epoch 16 | loss: 1.73733 | val_0_rmse: 3.91427 |  0:00

[I 2025-08-17 19:55:45,284] Trial 187 finished with value: 0.6608900944209133 and parameters: {'n_d': 36, 'n_a': 57, 'n_steps': 3, 'gamma': 1.5831575789153374, 'lambda_sparse': 0.0013376307591703412}. Best is trial 163 with value: 0.19407104242042633.


epoch 58 | loss: 0.10008 | val_0_rmse: 0.77577 |  0:00:06s

Early stopping occurred at epoch 58 with best_epoch = 48 and best_val_0_rmse = 0.66089




epoch 0  | loss: 113.39631| val_0_rmse: 10.54703|  0:00:00s
epoch 1  | loss: 87.45283| val_0_rmse: 7.70295 |  0:00:00s
epoch 2  | loss: 63.03805| val_0_rmse: 6.96624 |  0:00:00s
epoch 3  | loss: 44.6949 | val_0_rmse: 7.42605 |  0:00:00s
epoch 4  | loss: 30.63568| val_0_rmse: 9.19445 |  0:00:00s
epoch 5  | loss: 20.70244| val_0_rmse: 10.18909|  0:00:00s
epoch 6  | loss: 14.04576| val_0_rmse: 13.58428|  0:00:00s
epoch 7  | loss: 10.41419| val_0_rmse: 15.57213|  0:00:00s
epoch 8  | loss: 10.18152| val_0_rmse: 15.79474|  0:00:00s
epoch 9  | loss: 10.86478| val_0_rmse: 13.51003|  0:00:00s
epoch 10 | loss: 6.81488 | val_0_rmse: 10.19824|  0:00:01s
epoch 11 | loss: 4.53267 | val_0_rmse: 7.19291 |  0:00:01s
epoch 12 | loss: 2.67305 | val_0_rmse: 5.15163 |  0:00:01s
epoch 13 | loss: 2.55473 | val_0_rmse: 4.21089 |  0:00:01s
epoch 14 | loss: 2.22324 | val_0_rmse: 4.21422 |  0:00:01s
epoch 15 | loss: 1.52912 | val_0_rmse: 4.34653 |  0:00:01s
epoch 16 | loss: 0.99001 | val_0_rmse: 4.379   |  0:00:

[I 2025-08-17 19:55:50,753] Trial 188 finished with value: 0.632516688910736 and parameters: {'n_d': 33, 'n_a': 63, 'n_steps': 3, 'gamma': 1.5604031249060732, 'lambda_sparse': 0.005569162377081204}. Best is trial 163 with value: 0.19407104242042633.


epoch 53 | loss: 0.11734 | val_0_rmse: 0.86783 |  0:00:05s

Early stopping occurred at epoch 53 with best_epoch = 43 and best_val_0_rmse = 0.63252




epoch 0  | loss: 119.16286| val_0_rmse: 13.32829|  0:00:00s
epoch 1  | loss: 98.85989| val_0_rmse: 8.92526 |  0:00:00s
epoch 2  | loss: 79.18345| val_0_rmse: 8.63842 |  0:00:00s
epoch 3  | loss: 63.12259| val_0_rmse: 8.56026 |  0:00:00s
epoch 4  | loss: 43.58979| val_0_rmse: 7.3486  |  0:00:00s
epoch 5  | loss: 31.53924| val_0_rmse: 9.1963  |  0:00:00s
epoch 6  | loss: 19.91115| val_0_rmse: 9.40379 |  0:00:00s
epoch 7  | loss: 13.27411| val_0_rmse: 9.77454 |  0:00:00s
epoch 8  | loss: 9.8743  | val_0_rmse: 10.70714|  0:00:00s
epoch 9  | loss: 7.99543 | val_0_rmse: 9.85598 |  0:00:00s
epoch 10 | loss: 7.20243 | val_0_rmse: 7.6827  |  0:00:01s
epoch 11 | loss: 6.50303 | val_0_rmse: 5.98057 |  0:00:01s
epoch 12 | loss: 5.32289 | val_0_rmse: 5.06118 |  0:00:01s
epoch 13 | loss: 2.89777 | val_0_rmse: 4.67803 |  0:00:01s
epoch 14 | loss: 1.92367 | val_0_rmse: 3.69388 |  0:00:01s
epoch 15 | loss: 2.05452 | val_0_rmse: 3.85625 |  0:00:01s
epoch 16 | loss: 1.45864 | val_0_rmse: 3.64295 |  0:00:

[I 2025-08-17 19:55:53,643] Trial 189 finished with value: 2.459434259400489 and parameters: {'n_d': 34, 'n_a': 42, 'n_steps': 3, 'gamma': 1.5311655229347974, 'lambda_sparse': 0.0022061277973086033}. Best is trial 163 with value: 0.19407104242042633.


epoch 0  | loss: 116.96696| val_0_rmse: 13.96419|  0:00:00s
epoch 1  | loss: 79.32384| val_0_rmse: 11.091  |  0:00:00s
epoch 2  | loss: 50.0499 | val_0_rmse: 13.4946 |  0:00:00s
epoch 3  | loss: 34.48251| val_0_rmse: 13.4695 |  0:00:00s
epoch 4  | loss: 23.20823| val_0_rmse: 14.16052|  0:00:00s
epoch 5  | loss: 18.34992| val_0_rmse: 14.90815|  0:00:00s
epoch 6  | loss: 10.93924| val_0_rmse: 14.22101|  0:00:00s
epoch 7  | loss: 9.60578 | val_0_rmse: 13.28523|  0:00:00s
epoch 8  | loss: 7.17026 | val_0_rmse: 11.74522|  0:00:00s
epoch 9  | loss: 4.66552 | val_0_rmse: 10.1252 |  0:00:01s
epoch 10 | loss: 2.88037 | val_0_rmse: 8.41778 |  0:00:01s
epoch 11 | loss: 2.49011 | val_0_rmse: 7.55073 |  0:00:01s
epoch 12 | loss: 1.77144 | val_0_rmse: 7.57398 |  0:00:01s
epoch 13 | loss: 1.0795  | val_0_rmse: 6.99282 |  0:00:01s
epoch 14 | loss: 1.15167 | val_0_rmse: 5.99033 |  0:00:01s
epoch 15 | loss: 0.82825 | val_0_rmse: 4.85555 |  0:00:01s
epoch 16 | loss: 0.94971 | val_0_rmse: 4.53778 |  0:00:

[I 2025-08-17 19:56:03,401] Trial 190 finished with value: 0.48513169962576996 and parameters: {'n_d': 49, 'n_a': 59, 'n_steps': 3, 'gamma': 1.6488277278556596, 'lambda_sparse': 0.003280872534423717}. Best is trial 163 with value: 0.19407104242042633.


epoch 89 | loss: 0.07412 | val_0_rmse: 0.5866  |  0:00:09s

Early stopping occurred at epoch 89 with best_epoch = 79 and best_val_0_rmse = 0.48513




epoch 0  | loss: 172.24426| val_0_rmse: 13.95939|  0:00:00s
epoch 1  | loss: 133.81036| val_0_rmse: 9.50592 |  0:00:00s
epoch 2  | loss: 106.79143| val_0_rmse: 8.15142 |  0:00:00s
epoch 3  | loss: 85.25527| val_0_rmse: 7.1623  |  0:00:00s
epoch 4  | loss: 66.8633 | val_0_rmse: 6.02849 |  0:00:00s
epoch 5  | loss: 50.85338| val_0_rmse: 5.45183 |  0:00:00s
epoch 6  | loss: 35.13763| val_0_rmse: 5.74198 |  0:00:00s
epoch 7  | loss: 25.00275| val_0_rmse: 6.13811 |  0:00:00s
epoch 8  | loss: 16.56174| val_0_rmse: 6.10882 |  0:00:01s
epoch 9  | loss: 11.77906| val_0_rmse: 6.17628 |  0:00:01s
epoch 10 | loss: 8.09026 | val_0_rmse: 5.73045 |  0:00:01s
epoch 11 | loss: 7.82296 | val_0_rmse: 5.09776 |  0:00:01s
epoch 12 | loss: 7.39382 | val_0_rmse: 3.98093 |  0:00:01s
epoch 13 | loss: 5.43268 | val_0_rmse: 3.65937 |  0:00:01s
epoch 14 | loss: 4.14299 | val_0_rmse: 3.66649 |  0:00:01s
epoch 15 | loss: 2.92852 | val_0_rmse: 3.09433 |  0:00:01s
epoch 16 | loss: 1.85277 | val_0_rmse: 2.68944 |  0:0

[I 2025-08-17 19:56:11,789] Trial 191 finished with value: 0.4609464054439508 and parameters: {'n_d': 35, 'n_a': 59, 'n_steps': 3, 'gamma': 1.4623608818662353, 'lambda_sparse': 0.004934888455807854}. Best is trial 163 with value: 0.19407104242042633.



Early stopping occurred at epoch 76 with best_epoch = 66 and best_val_0_rmse = 0.46095




epoch 0  | loss: 138.90329| val_0_rmse: 11.47829|  0:00:00s
epoch 1  | loss: 105.93356| val_0_rmse: 9.88387 |  0:00:00s
epoch 2  | loss: 78.76179| val_0_rmse: 10.43324|  0:00:00s
epoch 3  | loss: 58.2214 | val_0_rmse: 9.51387 |  0:00:00s
epoch 4  | loss: 40.52709| val_0_rmse: 11.01663|  0:00:00s
epoch 5  | loss: 27.41766| val_0_rmse: 11.15988|  0:00:00s
epoch 6  | loss: 18.95267| val_0_rmse: 11.28086|  0:00:00s
epoch 7  | loss: 12.15445| val_0_rmse: 12.47329|  0:00:00s
epoch 8  | loss: 9.88098 | val_0_rmse: 12.77785|  0:00:00s
epoch 9  | loss: 7.9578  | val_0_rmse: 10.08435|  0:00:01s
epoch 10 | loss: 7.79062 | val_0_rmse: 9.1421  |  0:00:01s
epoch 11 | loss: 7.27746 | val_0_rmse: 7.30831 |  0:00:01s
epoch 12 | loss: 4.5132  | val_0_rmse: 6.09244 |  0:00:01s
epoch 13 | loss: 4.08887 | val_0_rmse: 5.6677  |  0:00:01s
epoch 14 | loss: 2.54747 | val_0_rmse: 6.31059 |  0:00:01s
epoch 15 | loss: 2.52924 | val_0_rmse: 6.1885  |  0:00:01s
epoch 16 | loss: 2.05483 | val_0_rmse: 6.32948 |  0:00

[I 2025-08-17 19:56:22,187] Trial 192 finished with value: 0.3366633490917582 and parameters: {'n_d': 32, 'n_a': 61, 'n_steps': 3, 'gamma': 1.5136907322930995, 'lambda_sparse': 0.005947768875049455}. Best is trial 163 with value: 0.19407104242042633.


epoch 98 | loss: 0.33942 | val_0_rmse: 0.70081 |  0:00:10s

Early stopping occurred at epoch 98 with best_epoch = 88 and best_val_0_rmse = 0.33666




epoch 0  | loss: 141.04356| val_0_rmse: 10.17526|  0:00:00s
epoch 1  | loss: 111.94275| val_0_rmse: 13.49471|  0:00:00s
epoch 2  | loss: 87.67428| val_0_rmse: 14.74543|  0:00:00s
epoch 3  | loss: 67.86035| val_0_rmse: 14.71713|  0:00:00s
epoch 4  | loss: 51.10736| val_0_rmse: 11.53138|  0:00:00s
epoch 5  | loss: 34.92094| val_0_rmse: 7.48583 |  0:00:00s
epoch 6  | loss: 25.00295| val_0_rmse: 6.71548 |  0:00:00s
epoch 7  | loss: 15.69178| val_0_rmse: 7.30109 |  0:00:00s
epoch 8  | loss: 10.39189| val_0_rmse: 8.7676  |  0:00:00s
epoch 9  | loss: 7.98694 | val_0_rmse: 9.65029 |  0:00:01s
epoch 10 | loss: 7.95016 | val_0_rmse: 9.85424 |  0:00:01s
epoch 11 | loss: 7.13812 | val_0_rmse: 9.73257 |  0:00:01s
epoch 12 | loss: 5.79687 | val_0_rmse: 9.51797 |  0:00:01s
epoch 13 | loss: 3.4311  | val_0_rmse: 9.02429 |  0:00:01s
epoch 14 | loss: 2.09436 | val_0_rmse: 9.60042 |  0:00:01s


[I 2025-08-17 19:56:24,045] Trial 193 finished with value: 6.715477466476607 and parameters: {'n_d': 37, 'n_a': 58, 'n_steps': 3, 'gamma': 1.4915699547448245, 'lambda_sparse': 0.0024285356120809427}. Best is trial 163 with value: 0.19407104242042633.


epoch 15 | loss: 1.77045 | val_0_rmse: 10.10901|  0:00:01s
epoch 16 | loss: 1.54715 | val_0_rmse: 9.5788  |  0:00:01s

Early stopping occurred at epoch 16 with best_epoch = 6 and best_val_0_rmse = 6.71548




epoch 0  | loss: 110.22472| val_0_rmse: 13.85635|  0:00:00s
epoch 1  | loss: 87.66036| val_0_rmse: 10.35077|  0:00:00s
epoch 2  | loss: 69.23995| val_0_rmse: 8.27435 |  0:00:00s
epoch 3  | loss: 50.51856| val_0_rmse: 8.97921 |  0:00:00s
epoch 4  | loss: 35.85549| val_0_rmse: 10.18972|  0:00:00s
epoch 5  | loss: 24.72221| val_0_rmse: 9.12164 |  0:00:00s
epoch 6  | loss: 16.79175| val_0_rmse: 7.43889 |  0:00:00s
epoch 7  | loss: 10.89871| val_0_rmse: 7.06639 |  0:00:00s
epoch 8  | loss: 9.74468 | val_0_rmse: 8.19227 |  0:00:00s
epoch 9  | loss: 8.2803  | val_0_rmse: 8.47451 |  0:00:00s
epoch 10 | loss: 7.77503 | val_0_rmse: 8.64629 |  0:00:01s
epoch 11 | loss: 5.82324 | val_0_rmse: 7.80931 |  0:00:01s
epoch 12 | loss: 3.91689 | val_0_rmse: 7.08177 |  0:00:01s
epoch 13 | loss: 2.86091 | val_0_rmse: 6.43673 |  0:00:01s
epoch 14 | loss: 2.61452 | val_0_rmse: 5.81457 |  0:00:01s
epoch 15 | loss: 2.19489 | val_0_rmse: 5.48164 |  0:00:01s
epoch 16 | loss: 1.58056 | val_0_rmse: 5.21499 |  0:00:

[I 2025-08-17 19:56:31,741] Trial 194 finished with value: 0.5639945775961056 and parameters: {'n_d': 34, 'n_a': 44, 'n_steps': 3, 'gamma': 1.5393401182842301, 'lambda_sparse': 0.008063261104883646}. Best is trial 163 with value: 0.19407104242042633.


epoch 0  | loss: 166.69518| val_0_rmse: 12.92662|  0:00:00s
epoch 1  | loss: 141.97917| val_0_rmse: 10.76201|  0:00:00s
epoch 2  | loss: 118.93889| val_0_rmse: 9.66695 |  0:00:00s
epoch 3  | loss: 101.30902| val_0_rmse: 8.82811 |  0:00:00s
epoch 4  | loss: 84.45868| val_0_rmse: 7.90221 |  0:00:00s
epoch 5  | loss: 68.07336| val_0_rmse: 7.0788  |  0:00:00s
epoch 6  | loss: 53.37679| val_0_rmse: 6.90138 |  0:00:00s
epoch 7  | loss: 40.79   | val_0_rmse: 5.10679 |  0:00:00s
epoch 8  | loss: 29.03953| val_0_rmse: 4.74457 |  0:00:00s
epoch 9  | loss: 19.54002| val_0_rmse: 4.6706  |  0:00:00s
epoch 10 | loss: 12.48247| val_0_rmse: 6.68753 |  0:00:01s
epoch 11 | loss: 7.83243 | val_0_rmse: 7.87623 |  0:00:01s
epoch 12 | loss: 6.98019 | val_0_rmse: 9.53547 |  0:00:01s
epoch 13 | loss: 6.7546  | val_0_rmse: 9.9376  |  0:00:01s
epoch 14 | loss: 7.7562  | val_0_rmse: 10.18305|  0:00:01s
epoch 15 | loss: 8.02632 | val_0_rmse: 10.49778|  0:00:01s
epoch 16 | loss: 6.97702 | val_0_rmse: 9.83156 |  0:

[I 2025-08-17 19:56:39,159] Trial 195 finished with value: 0.4628041367324569 and parameters: {'n_d': 30, 'n_a': 56, 'n_steps': 3, 'gamma': 1.5654689389297567, 'lambda_sparse': 0.007255105127564547}. Best is trial 163 with value: 0.19407104242042633.


epoch 73 | loss: 0.14932 | val_0_rmse: 0.50129 |  0:00:07s
epoch 74 | loss: 0.19705 | val_0_rmse: 0.48754 |  0:00:07s

Early stopping occurred at epoch 74 with best_epoch = 64 and best_val_0_rmse = 0.4628




epoch 0  | loss: 137.744 | val_0_rmse: 13.91348|  0:00:00s
epoch 1  | loss: 108.16188| val_0_rmse: 8.26078 |  0:00:00s
epoch 2  | loss: 81.76782| val_0_rmse: 9.66515 |  0:00:00s
epoch 3  | loss: 61.52271| val_0_rmse: 9.82091 |  0:00:00s
epoch 4  | loss: 42.20882| val_0_rmse: 10.34222|  0:00:00s
epoch 5  | loss: 28.95862| val_0_rmse: 9.44404 |  0:00:00s
epoch 6  | loss: 17.99134| val_0_rmse: 8.35008 |  0:00:00s
epoch 7  | loss: 10.70062| val_0_rmse: 7.97339 |  0:00:00s
epoch 8  | loss: 7.15107 | val_0_rmse: 7.22483 |  0:00:00s
epoch 9  | loss: 7.22113 | val_0_rmse: 6.83105 |  0:00:00s
epoch 10 | loss: 6.66297 | val_0_rmse: 6.62539 |  0:00:01s
epoch 11 | loss: 5.10796 | val_0_rmse: 5.89387 |  0:00:01s
epoch 12 | loss: 3.09528 | val_0_rmse: 5.00226 |  0:00:01s
epoch 13 | loss: 1.94657 | val_0_rmse: 4.04483 |  0:00:01s
epoch 14 | loss: 1.58204 | val_0_rmse: 3.37717 |  0:00:01s
epoch 15 | loss: 1.6543  | val_0_rmse: 2.98274 |  0:00:01s
epoch 16 | loss: 1.07197 | val_0_rmse: 2.8762  |  0:00:

[I 2025-08-17 19:56:49,159] Trial 196 finished with value: 0.3026301323351321 and parameters: {'n_d': 31, 'n_a': 54, 'n_steps': 3, 'gamma': 1.1104021273907412, 'lambda_sparse': 0.001947843929751713}. Best is trial 163 with value: 0.19407104242042633.


epoch 99 | loss: 0.06585 | val_0_rmse: 0.30263 |  0:00:09s
Stop training because you reached max_epochs = 100 with best_epoch = 99 and best_val_0_rmse = 0.30263




epoch 0  | loss: 137.97609| val_0_rmse: 10.70105|  0:00:00s
epoch 1  | loss: 108.22813| val_0_rmse: 7.57425 |  0:00:00s
epoch 2  | loss: 81.28765| val_0_rmse: 6.71345 |  0:00:00s
epoch 3  | loss: 62.15992| val_0_rmse: 6.11478 |  0:00:00s
epoch 4  | loss: 43.20688| val_0_rmse: 6.37663 |  0:00:00s
epoch 5  | loss: 28.97343| val_0_rmse: 6.39394 |  0:00:00s
epoch 6  | loss: 18.53936| val_0_rmse: 6.70047 |  0:00:00s
epoch 7  | loss: 10.97969| val_0_rmse: 6.67097 |  0:00:00s
epoch 8  | loss: 7.48733 | val_0_rmse: 6.43965 |  0:00:00s
epoch 9  | loss: 6.80532 | val_0_rmse: 5.85053 |  0:00:01s
epoch 10 | loss: 6.7793  | val_0_rmse: 5.36798 |  0:00:01s
epoch 11 | loss: 4.38558 | val_0_rmse: 4.58662 |  0:00:01s
epoch 12 | loss: 2.85464 | val_0_rmse: 3.98862 |  0:00:01s
epoch 13 | loss: 1.66034 | val_0_rmse: 3.66931 |  0:00:01s
epoch 14 | loss: 1.48615 | val_0_rmse: 3.39002 |  0:00:01s
epoch 15 | loss: 1.47746 | val_0_rmse: 3.32462 |  0:00:01s
epoch 16 | loss: 1.17285 | val_0_rmse: 3.29576 |  0:00

[I 2025-08-17 19:56:55,294] Trial 197 finished with value: 0.4476553285200525 and parameters: {'n_d': 31, 'n_a': 54, 'n_steps': 3, 'gamma': 1.0248435084975196, 'lambda_sparse': 0.001969992124209373}. Best is trial 163 with value: 0.19407104242042633.


epoch 59 | loss: 0.0947  | val_0_rmse: 0.52472 |  0:00:06s

Early stopping occurred at epoch 59 with best_epoch = 49 and best_val_0_rmse = 0.44766




epoch 0  | loss: 211.08281| val_0_rmse: 16.09362|  0:00:00s
epoch 1  | loss: 176.83742| val_0_rmse: 12.9052 |  0:00:00s
epoch 2  | loss: 147.64093| val_0_rmse: 11.51902|  0:00:00s
epoch 3  | loss: 124.82542| val_0_rmse: 10.78799|  0:00:00s
epoch 4  | loss: 106.38557| val_0_rmse: 10.06436|  0:00:00s
epoch 5  | loss: 89.16358| val_0_rmse: 9.19133 |  0:00:00s
epoch 6  | loss: 76.06256| val_0_rmse: 8.55566 |  0:00:00s
epoch 7  | loss: 62.93785| val_0_rmse: 7.60321 |  0:00:00s
epoch 8  | loss: 50.73633| val_0_rmse: 6.86022 |  0:00:00s
epoch 9  | loss: 38.42268| val_0_rmse: 6.09124 |  0:00:00s
epoch 10 | loss: 28.73636| val_0_rmse: 5.17686 |  0:00:01s
epoch 11 | loss: 19.0922 | val_0_rmse: 4.55821 |  0:00:01s
epoch 12 | loss: 11.70913| val_0_rmse: 4.82174 |  0:00:01s
epoch 13 | loss: 7.24005 | val_0_rmse: 4.96661 |  0:00:01s
epoch 14 | loss: 4.92817 | val_0_rmse: 3.93784 |  0:00:01s
epoch 15 | loss: 4.72241 | val_0_rmse: 4.35544 |  0:00:01s
epoch 16 | loss: 4.57373 | val_0_rmse: 5.34539 |  0

[I 2025-08-17 19:57:05,207] Trial 198 finished with value: 0.2930421054250124 and parameters: {'n_d': 29, 'n_a': 52, 'n_steps': 3, 'gamma': 1.2642574759716978, 'lambda_sparse': 0.0017067941640728924}. Best is trial 163 with value: 0.19407104242042633.



Early stopping occurred at epoch 99 with best_epoch = 89 and best_val_0_rmse = 0.29304




epoch 0  | loss: 190.13977| val_0_rmse: 13.58577|  0:00:00s
epoch 1  | loss: 161.87497| val_0_rmse: 12.06313|  0:00:00s
epoch 2  | loss: 137.50763| val_0_rmse: 11.05401|  0:00:00s
epoch 3  | loss: 116.74818| val_0_rmse: 10.3902 |  0:00:00s
epoch 4  | loss: 99.81029| val_0_rmse: 9.867   |  0:00:00s
epoch 5  | loss: 85.76647| val_0_rmse: 8.91866 |  0:00:00s
epoch 6  | loss: 72.9094 | val_0_rmse: 8.03181 |  0:00:00s
epoch 7  | loss: 62.27866| val_0_rmse: 7.28991 |  0:00:00s
epoch 8  | loss: 48.20228| val_0_rmse: 6.89581 |  0:00:00s
epoch 9  | loss: 37.41531| val_0_rmse: 7.53334 |  0:00:01s
epoch 10 | loss: 27.00118| val_0_rmse: 9.3132  |  0:00:01s
epoch 11 | loss: 17.71459| val_0_rmse: 12.2578 |  0:00:01s
epoch 12 | loss: 11.34722| val_0_rmse: 15.98418|  0:00:01s
epoch 13 | loss: 7.18372 | val_0_rmse: 20.42432|  0:00:01s
epoch 14 | loss: 4.98931 | val_0_rmse: 23.96934|  0:00:01s
epoch 15 | loss: 4.68488 | val_0_rmse: 23.65891|  0:00:01s
epoch 16 | loss: 5.03415 | val_0_rmse: 20.83493|  0:

[I 2025-08-17 19:57:07,225] Trial 199 finished with value: 6.895805911331691 and parameters: {'n_d': 25, 'n_a': 52, 'n_steps': 3, 'gamma': 1.0911139005617336, 'lambda_sparse': 0.0022011322558988856}. Best is trial 163 with value: 0.19407104242042633.


epoch 17 | loss: 4.76156 | val_0_rmse: 17.30763|  0:00:01s
epoch 18 | loss: 4.30708 | val_0_rmse: 13.26   |  0:00:01s

Early stopping occurred at epoch 18 with best_epoch = 8 and best_val_0_rmse = 6.89581
Best TabNet params: {'n_d': 39, 'n_a': 57, 'n_steps': 3, 'gamma': 1.521073730613384, 'lambda_sparse': 0.007316938863543923}
Best TabNet RMSE: 0.19407104242042633




epoch 0  | loss: 207.52904| val_0_rmse: 16.709  |  0:00:00s
epoch 1  | loss: 163.26042| val_0_rmse: 12.5529 |  0:00:00s
epoch 2  | loss: 131.57022| val_0_rmse: 10.7921 |  0:00:00s
epoch 3  | loss: 107.45393| val_0_rmse: 9.58088 |  0:00:00s
epoch 4  | loss: 86.54867| val_0_rmse: 8.95766 |  0:00:00s
epoch 5  | loss: 68.47581| val_0_rmse: 8.35169 |  0:00:00s
epoch 6  | loss: 53.32837| val_0_rmse: 7.73243 |  0:00:00s
epoch 7  | loss: 37.12438| val_0_rmse: 7.2653  |  0:00:00s
epoch 8  | loss: 25.71351| val_0_rmse: 6.36269 |  0:00:00s
epoch 9  | loss: 15.74526| val_0_rmse: 6.57215 |  0:00:01s
epoch 10 | loss: 9.25364 | val_0_rmse: 9.78087 |  0:00:01s
epoch 11 | loss: 5.42163 | val_0_rmse: 10.73247|  0:00:01s
epoch 12 | loss: 4.87476 | val_0_rmse: 8.46662 |  0:00:01s
epoch 13 | loss: 4.75264 | val_0_rmse: 7.00895 |  0:00:01s
epoch 14 | loss: 4.43995 | val_0_rmse: 6.75142 |  0:00:01s
epoch 15 | loss: 3.13917 | val_0_rmse: 6.30315 |  0:00:01s
epoch 16 | loss: 1.93881 | val_0_rmse: 6.47245 |  0:



In [28]:
# Optuna optimization for bayesian ridge
def objective_br(trial):
    params = {
        'alpha_1': trial.suggest_float('alpha_1', 1e-6, 1e-1, log=True),
        'alpha_2': trial.suggest_float('alpha_2', 1e-6, 1e-1, log=True),
        'lambda_1': trial.suggest_float('lambda_1', 1e-6, 1e-1, log=True),
        'lambda_2': trial.suggest_float('lambda_2', 1e-6, 1e-1, log=True)
    }
    bayesian_ridge = BayesianRidge(**params)
    bayesian_ridge.fit(X_train, y_train)
    pred = bayesian_ridge.predict(X_val)
    rmse = np.sqrt(mean_squared_error(y_val, pred))
    return rmse

# Optimize hyperparameters with Optuna
study_bayesian = optuna.create_study(direction='minimize')
study_bayesian.optimize(objective_br, n_trials=n_trials)
print("Best Bayesian params:", study_bayesian.best_params)
print("Best Bayesian RMSE:", study_bayesian.best_value)

# retrain
bayesian_best = BayesianRidge(**study_bayesian.best_params)
bayesian_best.fit(X_train, y_train)
bayesian_best_pred = bayesian_best.predict(X_val)
bayesian_best_rmse = np.sqrt(mean_squared_error(y_val, bayesian_best_pred))
print(f"Tuned Bayesian RMSE: {bayesian_best_rmse:.5f}")


[I 2025-08-17 19:57:17,647] A new study created in memory with name: no-name-2d50bf2f-425c-4cb6-a9d7-da74cd35b421
[I 2025-08-17 19:57:17,689] Trial 0 finished with value: 0.12834342437766919 and parameters: {'alpha_1': 0.00026192608138955845, 'alpha_2': 1.7238918348084623e-05, 'lambda_1': 0.025189689292711537, 'lambda_2': 5.690395952339314e-06}. Best is trial 0 with value: 0.12834342437766919.
[I 2025-08-17 19:57:17,724] Trial 1 finished with value: 0.128241709267092 and parameters: {'alpha_1': 2.329405388708585e-06, 'alpha_2': 7.994269953359608e-05, 'lambda_1': 0.00017847819814141843, 'lambda_2': 0.0026189459917066536}. Best is trial 1 with value: 0.128241709267092.
[I 2025-08-17 19:57:17,757] Trial 2 finished with value: 0.12784040321873866 and parameters: {'alpha_1': 0.00589770665936801, 'alpha_2': 0.00022271263425587587, 'lambda_1': 6.875217385976214e-05, 'lambda_2': 0.019178606453372053}. Best is trial 2 with value: 0.12784040321873866.
[I 2025-08-17 19:57:17,789] Trial 3 finished

Best Bayesian params: {'alpha_1': 0.012731171520308912, 'alpha_2': 2.9285944006861126e-06, 'lambda_1': 0.00045803986639731417, 'lambda_2': 0.09998551159924475}
Best Bayesian RMSE: 0.12711416431961847
Tuned Bayesian RMSE: 0.12711


In [None]:
print(f"1,best RF rmse:{rf_best_rmse}")
print(f"2,best XGBoost rmse:{xgb_best_rmse}")
print(f"3,best LightGBM rmse:{lgb_best_rmse}")
print(f"4,best CatBoost rmse:{catboost_best_rmse}")
print(f"5,best ElasticNet rmse:{enet_best_rmse}")
print(f"6,best Huber rmse:{huber_best_rmse}")
print(f"7,best MLP rmse:{mlp_best_rmse}")
print(f"8,best kernelRidge rmse: {kr_best_rmse}")
print(f"9,best SVR rmse:{svr_best_rmse}")
print(f"10,best ridge rmse:{ridge_best_rmse}")
print(f"11,best lasso rmse:{lasso_best_rmse}")
print(f"12,best gbr rmse: {gbr_best_rmse}")
print(f"13,best bagging rmse: {bagging_best_rmse}")
print(f"15,best bayesian rmse: {bayesian_best_rmse}")
print(f"14,best tabnet rmse: {tabnet_best_rmse}")



1,best RF rmse:0.14611456417269725
2,best XGBoost rmse:0.11912768981539701
3,best LightGBM rmse:0.1243647991046035
4,best CatBoost rmse:0.11282777979590002
5,best ElasticNet rmse:0.12616595750639695
6,best Huber rmse:0.12390182950494899
7,best MLP rmse:0.15270290898143354
8,best kernelRidge rmse: 0.12293212209914134
9,best SVR rmse:0.1189349723278028
10,best ridge rmse:0.1261635855204583
11,best lasso rmse:0.12638472958333277
12,best gbr rmse: 0.11861192174732638
13,best bagging rmse: 0.1390399794081062
15,best bayesian rmse: 0.12711416431961847
14,best tabnet rmse: 0.19407104242042633


In [30]:
# # feature importance visualization
# # Random Forest feature importance
# rf_importance = pd.Series(rf_best.feature_importances_, index=X.columns).sort_values(ascending=False)
# plt.figure(figsize=(10, 6))
# rf_importance[:30].plot(kind='bar')
# plt.title('Random Forest top 10 feature importance')
# plt.show()

# # # XGBoost feature importance
# # xgb_importance = pd.Series(xgb_best.feature_importances_, index=X.columns).sort_values(ascending=False)
# # plt.figure(figsize=(10, 6))
# # xgb_importance[:30].plot(kind='bar')
# # plt.title('XGBoost top 10 feature importance')
# # plt.show()

# # LightGBM feature importance
# lgb_importance = pd.Series(lgb_best.feature_importances_, index=X.columns).sort_values(ascending=False)
# plt.figure(figsize=(10, 6))
# lgb_importance[:30].plot(kind='bar')
# plt.title('LightGBM top 10 feature importance')
# plt.show()

# # CatBoost feature importance
# catboost_importance = pd.Series(catboost_best.feature_importances_, index=X.columns).sort_values(ascending=False)
# plt.figure(figsize=(10, 6))
# catboost_importance[:30].plot(kind='bar')
# plt.title('CatBoost top 10 feature importance')
# plt.show()

# # ElasticNet feature importance
# enet_importance = pd.Series(np.abs(enet_best.coef_), index=X.columns).sort_values(ascending=False)
# plt.figure(figsize=(10, 6))
# enet_importance[:30].plot(kind='bar')
# plt.title('ElasticNet top 10 feature importance')
# plt.show()

# # Huber feature importance
# huber_importance = pd.Series(np.abs(huber_best.coef_), index=X.columns).sort_values(ascending=False)
# plt.figure(figsize=(10, 6))
# huber_importance[:30].plot(kind='bar')
# plt.title('Huber top 10 feature importance')
# plt.show()

# # MLP feature importance
# perm_importance = permutation_importance(mlp_best, X_val, y_val, n_repeats=10, random_state=42)
# mlp_importance = pd.Series(perm_importance.importances_mean, index=X.columns).sort_values(ascending=False)
# plt.figure(figsize=(10, 6))
# mlp_importance[:30].plot(kind='bar')
# plt.title('MLP top 10 feature importance')
# plt.show()

# # KernelRidge feature importance, no feature importance available

# # SVR feature importance, no feature importance available

# # # Feature importance for GradientBoosting
# # gbr_importance = pd.Series(gbr_best.feature_importances_, index=X.columns).sort_values(ascending=False)
# # plt.figure(figsize=(10, 6))
# # gbr_importance[:10].plot(kind='bar')
# # plt.title('GradientBoosting Top 10 Feature Importance')
# # plt.show()


In [31]:
# # Random Forest cross-validation
# rf_cv_scores = cross_val_score(rf_best, X, y, cv=5, scoring='neg_root_mean_squared_error')
# print(f"Random Forest cross-validation RMSE: {-rf_cv_scores.mean():.5f} (+/- {rf_cv_scores.std() * 2:.5f})")

# # # XGBoost cross-validation
# # xgb_cv_scores = cross_val_score(xgb_best, X, y, cv=5, scoring='neg_root_mean_squared_error')
# # print(f"XGBoost cross-validation RMSE: {-xgb_cv_scores.mean():.5f} (+/- {xgb_cv_scores.std() * 2:.5f})")

# # LightGBM cross-validation
# lgb_cv_scores = cross_val_score(lgb_best, X, y, cv=5, scoring='neg_root_mean_squared_error')
# print(f"LightGBM cross-validation RMSE: {-lgb_cv_scores.mean():.5f} (+/- {lgb_cv_scores.std() * 2:.5f})")

# # CatBoost cross-validation
# catboost_cv_scores = cross_val_score(catboost_best, X, y, cv=5, scoring='neg_root_mean_squared_error')
# print(f"CatBoost cross-validation RMSE: {-catboost_cv_scores.mean():.5f} (+/- {catboost_cv_scores.std() * 2:.5f})")

# # ElasticNet cross-validation
# enet_cv_scores = cross_val_score(enet_best, X, y, cv=5, scoring='neg_root_mean_squared_error')
# print(f"ElasticNet cross-validation RMSE: {-enet_cv_scores.mean():.5f} (+/- {enet_cv_scores.std() * 2:.5f})")

# # Huber cross-validation
# huber_cv_scores = cross_val_score(huber_best, X, y, cv=5, scoring='neg_root_mean_squared_error')
# print(f"Huber cross-validation RMSE: {-huber_cv_scores.mean():.5f} (+/- {huber_cv_scores.std() * 2:.5f})")

# # MLP cross-validation
# mlp_cv_scores = cross_val_score(mlp_best, X, y, cv=5, scoring='neg_root_mean_squared_error')
# print(f"MLP cross-validation RMSE: {-mlp_cv_scores.mean():.5f} (+/- {mlp_cv_scores.std() * 2:.5f})")

# # # KernelRidge cross-validation
# # kr_cv_scores = cross_val_score(kr_best, X, y, cv=5, scoring='neg_root_mean_squared_error')
# # print(f"KernelRidge cross-validation RMSE: {-kr_cv_scores.mean():.5f} (+/- {kr_cv_scores.std() * 2:.5f})")

# # # SVR cross-validation
# # svr_cv_scores = cross_val_score(svr_best, X, y, cv=5, scoring='neg_root_mean_squared_error')
# # print(f"SVR cross-validation RMSE: {-svr_cv_scores.mean():.5f} (+/- {svr_cv_scores.std() * 2:.5f})")

# # # Cross-validation
# # ridge_cv_scores = cross_val_score(ridge_best, X, y, cv=5, scoring='neg_root_mean_squared_error')
# # print(f"Ridge CV RMSE: {-ridge_cv_scores.mean():.5f} (+/- {ridge_cv_scores.std() * 2:.5f})")

# # lasso_cv_scores = cross_val_score(lasso_best, X, y, cv=5, scoring='neg_root_mean_squared_error')
# # print(f"Lasso CV RMSE: {-lasso_cv_scores.mean():.5f} (+/- {lasso_cv_scores.std() * 2:.5f})")

# # gbr_cv_scores = cross_val_score(gbr_best, X, y, cv=5, scoring='neg_root_mean_squared_error')
# # print(f"GradientBoosting CV RMSE: {-gbr_cv_scores.mean():.5f} (+/- {gbr_cv_scores.std() * 2:.5f})")

# # br_cv_scores = cross_val_score(br_best, X, y, cv=5, scoring='neg_root_mean_squared_error')
# # print(f"Bagging CV RMSE: {-br_cv_scores.mean():.5f} (+/- {br_cv_scores.std() * 2:.5f})")


In [37]:
# predict on test set
xgb_test_pred = xgb_best.predict(X_test)
lgb_test_pred = lgb_best.predict(X_test)
catboost_test_pred = catboost_best.predict(X_test)
svr_test_pred = svr_best.predict(X_test)
gbr_test_pred = gbr_best.predict(X_test)
huber_test_pred = huber_best.predict(X_test)
kr_test_pred = kr_best.predict(X_test)
enet_test_pred = enet_best.predict(X_test)
ridge_test_pred = ridge_best.predict(X_test)
lasso_test_pred = lasso_best.predict(X_test)
bayesian_test_pred = bayesian_best.predict(X_test)

# simple ensemble (average predictions)
final_pred = (catboost_test_pred + xgb_test_pred + lgb_test_pred + 
              huber_test_pred + kr_test_pred + svr_test_pred + gbr_test_pred +
              enet_test_pred + ridge_test_pred + lasso_test_pred + bayesian_test_pred) / 11

# expm1 transformation to reverse log transformation
final_pred = np.expm1(final_pred)

# save submission file
submission = pd.DataFrame({'Id': test_ID, 'SalePrice': final_pred})
submission.to_csv('submission_baseline_6Models.csv', index=False)

In [38]:
# define base models for stacking
base_models = [
    ('xgb', XGBRegressor(**xgb_best_params, random_state=42)),
    ('lgb', LGBMRegressor(**lgb_best_params, random_state=42, verbose=-1)),
    ('catboost', CatBoostRegressor(**catboost_best_params, random_state=42, verbose=0)),
    ('svr', SVR(**svr_best_params)),
    ('gbr', GradientBoostingRegressor(**study_gbr.best_params, random_state=42)),
    ('huber', HuberRegressor(**huber_best_params)),
    ('kr', KernelRidge(**kr_best_params)),
    ('enet', ElasticNet(**enet_best_params, random_state=42)),
    ('ridge', Ridge(**study_ridge.best_params, random_state=42)),
    ('lasso', Lasso(**study_lasso.best_params, random_state=42)),
    ('bayesian', BayesianRidge(**study_bayesian.best_params))
]

# define meta learner
meta_learner = Ridge()
# first try use normal parameters for LGBMRegressor
# meta_learner = LGBMRegressor(n_estimators=100, learning_rate=0.05, max_depth=3, 
#                             num_leaves=15, random_state=42)
# initia Stacking
stacking_model = StackingRegressor(estimators=base_models, final_estimator=meta_learner, cv=5)

# train Stacking model
stacking_model.fit(X_train, y_train)

# validate Stacking model
stacking_pred = stacking_model.predict(X_val)
stacking_rmse = np.sqrt(mean_squared_error(y_val, stacking_pred))
print(f"Stacking RMSE: {stacking_rmse:.5f}")

# cross-validation
stacking_cv_scores = cross_val_score(stacking_model, X, y, cv=5, scoring='neg_root_mean_squared_error')
print(f"Stacking cross-validation RMSE: {-stacking_cv_scores.mean():.5f} (+/- {stacking_cv_scores.std() * 2:.5f})")

# predict on test set
stacking_test_pred = stacking_model.predict(X_test)
stacking_test_pred = np.expm1(stacking_test_pred)

# save submission file
print(f"Stacking RMSE: {stacking_rmse:.5f}")
submission_stacking = pd.DataFrame({'Id': test_ID, 'SalePrice': stacking_test_pred})
submission_stacking.to_csv('submission_stacking_6Models.csv', index=False)

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit

Stacking RMSE: 0.11231


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit

Stacking cross-validation RMSE: 0.10975 (+/- 0.00900)
Stacking RMSE: 0.11231
