In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import f1_score, accuracy_score, roc_auc_score, average_precision_score
import optuna

In [2]:
train_df = pd.read_csv('../../../dataset/open_world/openworld_train.csv')
test_df = pd.read_csv('../../../dataset/open_world/openworld_test.csv')

# Feature / Target Split
X = train_df.drop(columns=["label"]).values
y = train_df["label"].values

X_test = test_df.drop(columns=["label"]).values
y_test = test_df["label"].values

# Train / Validation Split
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

In [3]:
def objective(trial):
    # Search space
    hidden_layer_1 = trial.suggest_int("hidden_layer_1", 128, 1024, step=64)
    hidden_layer_2 = trial.suggest_int("hidden_layer_2", 64, 512, step=32)
    alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
    learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)
    batch_size = trial.suggest_categorical("batch_size", [64, 128, 256])
    activation = trial.suggest_categorical("activation", ["relu", "tanh"])
    solver = trial.suggest_categorical("solver", ["adam", "sgd"])

    # Define model
    mlp = MLPClassifier(
        hidden_layer_sizes=(hidden_layer_1, hidden_layer_2),
        activation=activation,
        solver=solver,
        alpha=alpha,
        batch_size=batch_size,
        learning_rate='adaptive',
        learning_rate_init=learning_rate_init,
        max_iter=300,
        early_stopping=True,
        n_iter_no_change=15,
        validation_fraction=0.15,
        random_state=42,
        verbose=False
    )

    # Train
    mlp.fit(X_train, y_train)

    # Validation performance
    y_pred = mlp.predict(X_val)
    f1_macro = f1_score(y_val, y_pred, average='macro')
    return f1_macro

In [4]:
# Optuna Study
study = optuna.create_study(
    direction="maximize",
    sampler=optuna.samplers.TPESampler(seed=42)
)
study.optimize(objective, n_trials=150, show_progress_bar=True)

print("\n========== OPTUNA BEST RESULT ==========")
print(f"Best Macro-F1: {study.best_value:.4f}")
print("parameters:", study.best_params)

[I 2025-11-21 15:32:48,543] A new study created in memory with name: no-name-f9621ca7-a6db-4ef4-9a75-974b59a330e7


  0%|          | 0/150 [00:00<?, ?it/s]

  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 15:34:00,183] Trial 0 finished with value: 0.6134562481390978 and parameters: {'hidden_layer_1': 448, 'hidden_layer_2': 512, 'alpha': 0.0008471801418819979, 'learning_rate_init': 0.00041282053438262235, 'batch_size': 64, 'activation': 'relu', 'solver': 'adam'}. Best is trial 0 with value: 0.6134562481390978.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)
  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 15:38:45,688] Trial 1 finished with value: 0.49296503989033474 and parameters: {'hidden_layer_1': 1024, 'hidden_layer_2': 448, 'alpha': 7.068974950624607e-06, 'learning_rate_init': 3.095566460242367e-05, 'batch_size': 256, 'activation': 'relu', 'solver': 'adam'}. Best is trial 0 with value: 0.6134562481390978.
[I 2025-11-21 15:39:26,768] Trial 2 finished with value: 0.6340744008251018 and parameters: {'hidden_layer_1': 384, 'hidden_layer_2': 224, 'alpha': 6.672367170464208e-05, 'learning_rate_init': 0.0013157287601765638, 'batch_size': 256, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 2 with value: 0.6340744008251018.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 15:41:57,014] Trial 3 finished with value: 0.008370059299905946 and parameters: {'hidden_layer_1': 1024, 'hidden_layer_2': 512, 'alpha': 0.0017123375973163992, 'learning_rate_init': 6.639623079859457e-05, 'batch_size': 128, 'activation': 'tanh', 'solver': 'sgd'}. Best is trial 2 with value: 0.6340744008251018.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 15:43:22,089] Trial 4 finished with value: 0.6316528014401727 and parameters: {'hidden_layer_1': 320, 'hidden_layer_2': 352, 'alpha': 1.7654048052495086e-05, 'learning_rate_init': 0.00025330746540014494, 'batch_size': 256, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 2 with value: 0.6340744008251018.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 15:44:06,066] Trial 5 finished with value: 0.005360646189687942 and parameters: {'hidden_layer_1': 960, 'hidden_layer_2': 96, 'alpha': 6.0803901902966035e-06, 'learning_rate_init': 1.3245461546001868e-05, 'batch_size': 128, 'activation': 'relu', 'solver': 'sgd'}. Best is trial 2 with value: 0.6340744008251018.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 15:47:32,548] Trial 6 finished with value: 0.6104788022291427 and parameters: {'hidden_layer_1': 256, 'hidden_layer_2': 448, 'alpha': 1.987021538542864e-06, 'learning_rate_init': 0.004608697883952073, 'batch_size': 64, 'activation': 'relu', 'solver': 'sgd'}. Best is trial 2 with value: 0.6340744008251018.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 15:48:19,161] Trial 7 finished with value: 0.616371545772826 and parameters: {'hidden_layer_1': 192, 'hidden_layer_2': 224, 'alpha': 2.907208890659845e-06, 'learning_rate_init': 0.00213545417892913, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 2 with value: 0.6340744008251018.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 15:49:23,540] Trial 8 finished with value: 0.6397995835977203 and parameters: {'hidden_layer_1': 960, 'hidden_layer_2': 288, 'alpha': 3.0086868214458464e-06, 'learning_rate_init': 0.0008414460701586532, 'batch_size': 256, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 8 with value: 0.6397995835977203.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 15:50:38,375] Trial 9 finished with value: 0.41931778008973786 and parameters: {'hidden_layer_1': 192, 'hidden_layer_2': 64, 'alpha': 0.00035127047262708476, 'learning_rate_init': 7.054030995136229e-05, 'batch_size': 128, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 8 with value: 0.6397995835977203.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)
  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 15:53:02,657] Trial 10 finished with value: 0.2831748335349944 and parameters: {'hidden_layer_1': 768, 'hidden_layer_2': 320, 'alpha': 0.007553503645583194, 'learning_rate_init': 0.0006482380671822557, 'batch_size': 256, 'activation': 'tanh', 'solver': 'sgd'}. Best is trial 8 with value: 0.6397995835977203.
[I 2025-11-21 15:53:36,553] Trial 11 finished with value: 0.6251781716266652 and parameters: {'hidden_layer_1': 640, 'hidden_layer_2': 192, 'alpha': 6.192544914403147e-05, 'learning_rate_init': 0.001244055158571796, 'batch_size': 256, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 8 with value: 0.6397995835977203.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 15:54:01,626] Trial 12 finished with value: 0.6230297434852737 and parameters: {'hidden_layer_1': 576, 'hidden_layer_2': 224, 'alpha': 5.49729819794493e-05, 'learning_rate_init': 0.0011554511248583782, 'batch_size': 256, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 8 with value: 0.6397995835977203.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 15:54:27,170] Trial 13 finished with value: 0.6015199180588718 and parameters: {'hidden_layer_1': 832, 'hidden_layer_2': 160, 'alpha': 0.00019692289489323686, 'learning_rate_init': 0.004364562056343845, 'batch_size': 256, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 8 with value: 0.6397995835977203.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)
  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 15:56:10,769] Trial 14 finished with value: 0.6131554978855678 and parameters: {'hidden_layer_1': 448, 'hidden_layer_2': 288, 'alpha': 1.0306393270981497e-06, 'learning_rate_init': 0.00013343415568646332, 'batch_size': 256, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 8 with value: 0.6397995835977203.
[I 2025-11-21 15:56:39,835] Trial 15 finished with value: 0.6444198779345015 and parameters: {'hidden_layer_1': 384, 'hidden_layer_2': 288, 'alpha': 2.7663253455418257e-05, 'learning_rate_init': 0.0010368835361489396, 'batch_size': 256, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 15 with value: 0.6444198779345015.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 15:57:32,099] Trial 16 finished with value: 0.6471143405021894 and parameters: {'hidden_layer_1': 576, 'hidden_layer_2': 384, 'alpha': 1.920439873351091e-05, 'learning_rate_init': 0.0005497752315406461, 'batch_size': 256, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 16 with value: 0.6471143405021894.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 15:58:53,478] Trial 17 finished with value: 0.6408031519232765 and parameters: {'hidden_layer_1': 576, 'hidden_layer_2': 384, 'alpha': 1.737795755869654e-05, 'learning_rate_init': 0.00028239263759544315, 'batch_size': 256, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 16 with value: 0.6471143405021894.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 16:03:27,704] Trial 18 finished with value: 0.6006812758145311 and parameters: {'hidden_layer_1': 512, 'hidden_layer_2': 416, 'alpha': 2.050884892193023e-05, 'learning_rate_init': 0.0023312181631612505, 'batch_size': 64, 'activation': 'relu', 'solver': 'sgd'}. Best is trial 16 with value: 0.6471143405021894.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 16:04:57,720] Trial 19 finished with value: 0.6406570634235146 and parameters: {'hidden_layer_1': 704, 'hidden_layer_2': 352, 'alpha': 0.0002570080611962311, 'learning_rate_init': 0.0005290401470990577, 'batch_size': 128, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 16 with value: 0.6471143405021894.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)
  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 16:06:19,175] Trial 20 finished with value: 0.5943414561671854 and parameters: {'hidden_layer_1': 128, 'hidden_layer_2': 320, 'alpha': 1.1926770255342822e-05, 'learning_rate_init': 0.0001551397907662186, 'batch_size': 256, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 16 with value: 0.6471143405021894.
[I 2025-11-21 16:07:45,908] Trial 21 finished with value: 0.646168694632705 and parameters: {'hidden_layer_1': 576, 'hidden_layer_2': 384, 'alpha': 1.810486208098891e-05, 'learning_rate_init': 0.0003330986312806557, 'batch_size': 256, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 16 with value: 0.6471143405021894.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 16:08:48,786] Trial 22 finished with value: 0.624184370818711 and parameters: {'hidden_layer_1': 448, 'hidden_layer_2': 416, 'alpha': 3.509328471463672e-05, 'learning_rate_init': 0.00037439834644845897, 'batch_size': 256, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 16 with value: 0.6471143405021894.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 16:11:52,727] Trial 23 finished with value: 0.6174572668278225 and parameters: {'hidden_layer_1': 704, 'hidden_layer_2': 384, 'alpha': 7.040140127555034e-06, 'learning_rate_init': 0.00012326487634404927, 'batch_size': 256, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 16 with value: 0.6471143405021894.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 16:12:10,215] Trial 24 finished with value: 0.6382133534618786 and parameters: {'hidden_layer_1': 320, 'hidden_layer_2': 256, 'alpha': 0.00013313326194278128, 'learning_rate_init': 0.002300637697369454, 'batch_size': 256, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 16 with value: 0.6471143405021894.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 16:13:05,093] Trial 25 finished with value: 0.6507442263431682 and parameters: {'hidden_layer_1': 576, 'hidden_layer_2': 480, 'alpha': 3.4807850102120494e-05, 'learning_rate_init': 0.0006964185073118472, 'batch_size': 256, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 25 with value: 0.6507442263431682.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 16:14:04,040] Trial 26 finished with value: 0.005341880341880343 and parameters: {'hidden_layer_1': 576, 'hidden_layer_2': 480, 'alpha': 3.9562078528949583e-05, 'learning_rate_init': 0.00047516469354736703, 'batch_size': 256, 'activation': 'relu', 'solver': 'sgd'}. Best is trial 25 with value: 0.6507442263431682.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 16:16:52,249] Trial 27 finished with value: 0.6306053099689479 and parameters: {'hidden_layer_1': 832, 'hidden_layer_2': 448, 'alpha': 0.00011540709088006491, 'learning_rate_init': 0.00019060242146367193, 'batch_size': 256, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 25 with value: 0.6507442263431682.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 16:18:17,096] Trial 28 finished with value: 0.6453795065606825 and parameters: {'hidden_layer_1': 640, 'hidden_layer_2': 480, 'alpha': 9.71526939953522e-06, 'learning_rate_init': 0.0007761079996407639, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 25 with value: 0.6507442263431682.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 16:19:44,637] Trial 29 finished with value: 0.598245930085144 and parameters: {'hidden_layer_1': 512, 'hidden_layer_2': 512, 'alpha': 4.115370168805781e-06, 'learning_rate_init': 0.00039304880572964696, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 25 with value: 0.6507442263431682.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 16:24:13,264] Trial 30 finished with value: 0.6349563514529729 and parameters: {'hidden_layer_1': 512, 'hidden_layer_2': 384, 'alpha': 0.0004802655115516326, 'learning_rate_init': 7.603049354321362e-05, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 25 with value: 0.6507442263431682.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 16:26:15,983] Trial 31 finished with value: 0.6558617267159786 and parameters: {'hidden_layer_1': 640, 'hidden_layer_2': 480, 'alpha': 1.1179777686366611e-05, 'learning_rate_init': 0.0006270127149028523, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 31 with value: 0.6558617267159786.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 16:28:13,843] Trial 32 finished with value: 0.6504109274754372 and parameters: {'hidden_layer_1': 640, 'hidden_layer_2': 480, 'alpha': 1.2332325006987787e-05, 'learning_rate_init': 0.0003696595203403023, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 31 with value: 0.6558617267159786.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 16:30:13,657] Trial 33 finished with value: 0.643924667025295 and parameters: {'hidden_layer_1': 704, 'hidden_layer_2': 480, 'alpha': 9.385528420580168e-06, 'learning_rate_init': 0.0006601953046225278, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 31 with value: 0.6558617267159786.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 16:31:33,161] Trial 34 finished with value: 0.6393789489384812 and parameters: {'hidden_layer_1': 640, 'hidden_layer_2': 512, 'alpha': 4.673229368737605e-06, 'learning_rate_init': 0.0017601473828586284, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 31 with value: 0.6558617267159786.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 16:34:35,922] Trial 35 finished with value: 0.6352417937022726 and parameters: {'hidden_layer_1': 768, 'hidden_layer_2': 416, 'alpha': 8.326675438938822e-05, 'learning_rate_init': 0.0002323851270969518, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 31 with value: 0.6558617267159786.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 16:36:45,411] Trial 36 finished with value: 0.6146261627954611 and parameters: {'hidden_layer_1': 768, 'hidden_layer_2': 480, 'alpha': 3.595769631735636e-05, 'learning_rate_init': 0.00046812256146432736, 'batch_size': 64, 'activation': 'relu', 'solver': 'adam'}. Best is trial 31 with value: 0.6558617267159786.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 16:40:00,967] Trial 37 finished with value: 0.019735150653025734 and parameters: {'hidden_layer_1': 640, 'hidden_layer_2': 448, 'alpha': 1.1040472377843214e-05, 'learning_rate_init': 3.627144819808868e-05, 'batch_size': 64, 'activation': 'tanh', 'solver': 'sgd'}. Best is trial 31 with value: 0.6558617267159786.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 16:41:29,314] Trial 38 finished with value: 0.656569498691885 and parameters: {'hidden_layer_1': 384, 'hidden_layer_2': 512, 'alpha': 1.4699261178529493e-06, 'learning_rate_init': 0.0014891684559147189, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 38 with value: 0.656569498691885.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 16:45:26,397] Trial 39 finished with value: 0.6084645176389234 and parameters: {'hidden_layer_1': 384, 'hidden_layer_2': 512, 'alpha': 1.0515605902381855e-06, 'learning_rate_init': 0.003142414370323677, 'batch_size': 64, 'activation': 'relu', 'solver': 'sgd'}. Best is trial 38 with value: 0.656569498691885.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 16:46:14,031] Trial 40 finished with value: 0.6430311297551322 and parameters: {'hidden_layer_1': 320, 'hidden_layer_2': 448, 'alpha': 1.598967529270828e-06, 'learning_rate_init': 0.0017003000432847457, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 38 with value: 0.656569498691885.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 16:47:19,729] Trial 41 finished with value: 0.6542409995285284 and parameters: {'hidden_layer_1': 512, 'hidden_layer_2': 480, 'alpha': 2.7823910022167066e-06, 'learning_rate_init': 0.0008670298005532077, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 38 with value: 0.656569498691885.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 16:48:49,412] Trial 42 finished with value: 0.6591666466170767 and parameters: {'hidden_layer_1': 384, 'hidden_layer_2': 512, 'alpha': 2.191059964207072e-06, 'learning_rate_init': 0.0009009402094883184, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 42 with value: 0.6591666466170767.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 16:50:00,930] Trial 43 finished with value: 0.6569961134460823 and parameters: {'hidden_layer_1': 384, 'hidden_layer_2': 512, 'alpha': 2.4011967543100347e-06, 'learning_rate_init': 0.0009576198500132323, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 42 with value: 0.6591666466170767.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 16:51:35,139] Trial 44 finished with value: 0.667698580452547 and parameters: {'hidden_layer_1': 384, 'hidden_layer_2': 512, 'alpha': 1.9155906182703047e-06, 'learning_rate_init': 0.0009118650899147087, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 16:52:20,634] Trial 45 finished with value: 0.6360095200739474 and parameters: {'hidden_layer_1': 256, 'hidden_layer_2': 512, 'alpha': 1.7857842152058444e-06, 'learning_rate_init': 0.0014951379280133642, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 16:53:44,813] Trial 46 finished with value: 0.6371733529859963 and parameters: {'hidden_layer_1': 320, 'hidden_layer_2': 512, 'alpha': 2.6268137177121397e-06, 'learning_rate_init': 0.001152471991100674, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 16:54:45,860] Trial 47 finished with value: 0.6103433038715903 and parameters: {'hidden_layer_1': 256, 'hidden_layer_2': 512, 'alpha': 4.613865678108876e-06, 'learning_rate_init': 0.003169055027149769, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)
  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 16:59:17,533] Trial 48 finished with value: 0.5891175619114127 and parameters: {'hidden_layer_1': 384, 'hidden_layer_2': 416, 'alpha': 1.654034285178306e-06, 'learning_rate_init': 0.0009289835585600671, 'batch_size': 64, 'activation': 'tanh', 'solver': 'sgd'}. Best is trial 44 with value: 0.667698580452547.
[I 2025-11-21 17:02:23,974] Trial 49 finished with value: 0.2783111357154328 and parameters: {'hidden_layer_1': 448, 'hidden_layer_2': 128, 'alpha': 1.2782078455888587e-06, 'learning_rate_init': 1.0090519786202995e-05, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:03:01,646] Trial 50 finished with value: 0.5977744884102515 and parameters: {'hidden_layer_1': 384, 'hidden_layer_2': 448, 'alpha': 2.3082065191273114e-06, 'learning_rate_init': 0.0013232613077136246, 'batch_size': 64, 'activation': 'relu', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:04:30,812] Trial 51 finished with value: 0.6498220783093549 and parameters: {'hidden_layer_1': 448, 'hidden_layer_2': 480, 'alpha': 3.298769533466665e-06, 'learning_rate_init': 0.0009461182374730152, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:06:12,296] Trial 52 finished with value: 0.6485529919945452 and parameters: {'hidden_layer_1': 512, 'hidden_layer_2': 512, 'alpha': 6.134182238649716e-06, 'learning_rate_init': 0.0008064732828844774, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:06:52,603] Trial 53 finished with value: 0.641129012377893 and parameters: {'hidden_layer_1': 384, 'hidden_layer_2': 448, 'alpha': 3.2837825338001262e-06, 'learning_rate_init': 0.0016836694234716452, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:08:02,830] Trial 54 finished with value: 0.639999699657765 and parameters: {'hidden_layer_1': 448, 'hidden_layer_2': 512, 'alpha': 2.1872422633814724e-06, 'learning_rate_init': 0.0006137106162846979, 'batch_size': 128, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:08:32,726] Trial 55 finished with value: 0.616859915562752 and parameters: {'hidden_layer_1': 192, 'hidden_layer_2': 480, 'alpha': 0.0015862372801127623, 'learning_rate_init': 0.002662542326350355, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:09:18,972] Trial 56 finished with value: 0.6429089181549714 and parameters: {'hidden_layer_1': 256, 'hidden_layer_2': 480, 'alpha': 1.3251452838411612e-06, 'learning_rate_init': 0.0011625706195046643, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:09:59,073] Trial 57 finished with value: 0.6283233013336574 and parameters: {'hidden_layer_1': 320, 'hidden_layer_2': 448, 'alpha': 4.554373603517711e-06, 'learning_rate_init': 0.002072507926888919, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:11:43,251] Trial 58 finished with value: 0.6354065623473133 and parameters: {'hidden_layer_1': 512, 'hidden_layer_2': 512, 'alpha': 2.2016688006419814e-06, 'learning_rate_init': 0.00030100370984645147, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)
  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:14:34,095] Trial 59 finished with value: 0.47284310737979734 and parameters: {'hidden_layer_1': 448, 'hidden_layer_2': 416, 'alpha': 3.515907708483209e-06, 'learning_rate_init': 0.0009392171759207132, 'batch_size': 128, 'activation': 'tanh', 'solver': 'sgd'}. Best is trial 44 with value: 0.667698580452547.
[I 2025-11-21 17:15:12,715] Trial 60 finished with value: 0.6374604917560619 and parameters: {'hidden_layer_1': 384, 'hidden_layer_2': 480, 'alpha': 6.688310397493626e-06, 'learning_rate_init': 0.0013899341785613654, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:16:19,767] Trial 61 finished with value: 0.6446991915435687 and parameters: {'hidden_layer_1': 512, 'hidden_layer_2': 512, 'alpha': 1.0690083721794564e-06, 'learning_rate_init': 0.0007547626666262743, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:17:44,733] Trial 62 finished with value: 0.6557118545549675 and parameters: {'hidden_layer_1': 576, 'hidden_layer_2': 480, 'alpha': 1.534263539999271e-06, 'learning_rate_init': 0.000659087687122733, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:19:07,480] Trial 63 finished with value: 0.6454515683096772 and parameters: {'hidden_layer_1': 448, 'hidden_layer_2': 448, 'alpha': 1.5838297851417378e-06, 'learning_rate_init': 0.0005784056791738514, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:20:17,679] Trial 64 finished with value: 0.6451219192641192 and parameters: {'hidden_layer_1': 320, 'hidden_layer_2': 480, 'alpha': 0.007741587907226375, 'learning_rate_init': 0.0004578027518673042, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:21:38,249] Trial 65 finished with value: 0.6540167119281759 and parameters: {'hidden_layer_1': 576, 'hidden_layer_2': 512, 'alpha': 2.6039430848970407e-06, 'learning_rate_init': 0.0010711044524806918, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:22:59,021] Trial 66 finished with value: 0.6376004610138718 and parameters: {'hidden_layer_1': 512, 'hidden_layer_2': 448, 'alpha': 1.3677921298943944e-06, 'learning_rate_init': 0.0007936177367521179, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:25:12,590] Trial 67 finished with value: 0.6424978756610444 and parameters: {'hidden_layer_1': 1024, 'hidden_layer_2': 352, 'alpha': 5.293157062576024e-06, 'learning_rate_init': 0.0005310090360537231, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:26:07,136] Trial 68 finished with value: 0.6316269640783988 and parameters: {'hidden_layer_1': 320, 'hidden_layer_2': 480, 'alpha': 1.9033280104668945e-06, 'learning_rate_init': 0.001993405819694247, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:26:57,226] Trial 69 finished with value: 0.6336086820098737 and parameters: {'hidden_layer_1': 384, 'hidden_layer_2': 416, 'alpha': 3.050723991866696e-06, 'learning_rate_init': 0.0014613478683779982, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:28:11,135] Trial 70 finished with value: 0.5935766216819981 and parameters: {'hidden_layer_1': 576, 'hidden_layer_2': 480, 'alpha': 0.003944568768649444, 'learning_rate_init': 0.0037453784880482643, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:29:44,620] Trial 71 finished with value: 0.6386822364196694 and parameters: {'hidden_layer_1': 704, 'hidden_layer_2': 512, 'alpha': 2.4899917368319816e-06, 'learning_rate_init': 0.0010090372790807804, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:30:51,097] Trial 72 finished with value: 0.6467450170793131 and parameters: {'hidden_layer_1': 576, 'hidden_layer_2': 512, 'alpha': 7.906103741652217e-06, 'learning_rate_init': 0.0011542987894577388, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:32:09,958] Trial 73 finished with value: 0.5594274655377395 and parameters: {'hidden_layer_1': 640, 'hidden_layer_2': 64, 'alpha': 4.085272355621325e-06, 'learning_rate_init': 0.0006636542901291401, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:33:53,063] Trial 74 finished with value: 0.6558633346174766 and parameters: {'hidden_layer_1': 576, 'hidden_layer_2': 512, 'alpha': 1.0594882646063644e-06, 'learning_rate_init': 0.000900551822919949, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:35:26,533] Trial 75 finished with value: 0.6572752460709833 and parameters: {'hidden_layer_1': 512, 'hidden_layer_2': 480, 'alpha': 1.227462004396831e-06, 'learning_rate_init': 0.0008791677747373319, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)
  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:43:10,571] Trial 76 finished with value: 0.46932382407300305 and parameters: {'hidden_layer_1': 704, 'hidden_layer_2': 512, 'alpha': 1.0802644225542731e-06, 'learning_rate_init': 0.00044854041260869573, 'batch_size': 64, 'activation': 'tanh', 'solver': 'sgd'}. Best is trial 44 with value: 0.667698580452547.
[I 2025-11-21 17:44:39,845] Trial 77 finished with value: 0.6526104255864593 and parameters: {'hidden_layer_1': 384, 'hidden_layer_2': 448, 'alpha': 1.4131224075198918e-06, 'learning_rate_init': 0.0006043417028894402, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:46:04,666] Trial 78 finished with value: 0.5512586458706542 and parameters: {'hidden_layer_1': 640, 'hidden_layer_2': 192, 'alpha': 1.7682148882332016e-06, 'learning_rate_init': 0.0001908313931015108, 'batch_size': 64, 'activation': 'relu', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:47:20,237] Trial 79 finished with value: 0.6436756556568272 and parameters: {'hidden_layer_1': 448, 'hidden_layer_2': 480, 'alpha': 1.0952405674625198e-06, 'learning_rate_init': 0.0016235755024457418, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:48:54,811] Trial 80 finished with value: 0.6379450003140269 and parameters: {'hidden_layer_1': 448, 'hidden_layer_2': 512, 'alpha': 2.038531901391174e-06, 'learning_rate_init': 0.0003384276474602169, 'batch_size': 128, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:50:56,839] Trial 81 finished with value: 0.6629021445646764 and parameters: {'hidden_layer_1': 512, 'hidden_layer_2': 480, 'alpha': 1.3536581816294769e-06, 'learning_rate_init': 0.000860293579702326, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:52:38,019] Trial 82 finished with value: 0.658053028513364 and parameters: {'hidden_layer_1': 576, 'hidden_layer_2': 480, 'alpha': 1.3543988104873667e-06, 'learning_rate_init': 0.0007190766418775571, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:54:53,451] Trial 83 finished with value: 0.6565080740080235 and parameters: {'hidden_layer_1': 512, 'hidden_layer_2': 512, 'alpha': 1.2736382690532421e-06, 'learning_rate_init': 0.0012618465904188341, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:56:28,998] Trial 84 finished with value: 0.6358762465320075 and parameters: {'hidden_layer_1': 512, 'hidden_layer_2': 512, 'alpha': 1.3072230295105988e-06, 'learning_rate_init': 0.0013036812665077214, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:57:45,611] Trial 85 finished with value: 0.627054875771515 and parameters: {'hidden_layer_1': 512, 'hidden_layer_2': 512, 'alpha': 1.923787622969196e-06, 'learning_rate_init': 0.000846681429060376, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:58:40,887] Trial 86 finished with value: 0.621042726036818 and parameters: {'hidden_layer_1': 448, 'hidden_layer_2': 512, 'alpha': 1.0664852810054421e-06, 'learning_rate_init': 0.001819510069110183, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 17:59:46,295] Trial 87 finished with value: 0.6390781655402574 and parameters: {'hidden_layer_1': 384, 'hidden_layer_2': 256, 'alpha': 1.0011048409199271e-06, 'learning_rate_init': 0.001016947287442725, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)
  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 18:04:29,059] Trial 88 finished with value: 0.6265926920497676 and parameters: {'hidden_layer_1': 512, 'hidden_layer_2': 320, 'alpha': 3.6575584860037336e-06, 'learning_rate_init': 0.0025016100066021446, 'batch_size': 64, 'activation': 'tanh', 'solver': 'sgd'}. Best is trial 44 with value: 0.667698580452547.
[I 2025-11-21 18:05:34,857] Trial 89 finished with value: 0.6493790012415341 and parameters: {'hidden_layer_1': 576, 'hidden_layer_2': 480, 'alpha': 2.621106321142354e-06, 'learning_rate_init': 0.0013078094414173203, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 18:06:55,780] Trial 90 finished with value: 0.6468517236882838 and parameters: {'hidden_layer_1': 256, 'hidden_layer_2': 448, 'alpha': 1.349411955659644e-06, 'learning_rate_init': 0.0007546052717433339, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 18:09:22,375] Trial 91 finished with value: 0.6527600960084978 and parameters: {'hidden_layer_1': 640, 'hidden_layer_2': 480, 'alpha': 1.7558232598923704e-06, 'learning_rate_init': 0.000505881870913555, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 18:11:04,413] Trial 92 finished with value: 0.6569339337162502 and parameters: {'hidden_layer_1': 576, 'hidden_layer_2': 512, 'alpha': 2.342758763912146e-06, 'learning_rate_init': 0.0009319169199609046, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 18:12:38,177] Trial 93 finished with value: 0.6535376092106157 and parameters: {'hidden_layer_1': 576, 'hidden_layer_2': 512, 'alpha': 2.4553526691541985e-06, 'learning_rate_init': 0.0009077634318947841, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 18:13:34,214] Trial 94 finished with value: 0.6314609731292232 and parameters: {'hidden_layer_1': 320, 'hidden_layer_2': 512, 'alpha': 2.1061988715775974e-06, 'learning_rate_init': 0.001503058510561171, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 18:14:40,611] Trial 95 finished with value: 0.644303865074963 and parameters: {'hidden_layer_1': 448, 'hidden_layer_2': 480, 'alpha': 3.0161352459120275e-06, 'learning_rate_init': 0.0010957147104064824, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 18:16:28,532] Trial 96 finished with value: 0.6315062330114831 and parameters: {'hidden_layer_1': 512, 'hidden_layer_2': 512, 'alpha': 1.3094653616513166e-06, 'learning_rate_init': 0.0007284064127185063, 'batch_size': 64, 'activation': 'relu', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 18:17:31,778] Trial 97 finished with value: 0.6267017150595716 and parameters: {'hidden_layer_1': 576, 'hidden_layer_2': 512, 'alpha': 1.6122094098087826e-06, 'learning_rate_init': 0.001994666726550759, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 18:18:54,355] Trial 98 finished with value: 0.6505546213628849 and parameters: {'hidden_layer_1': 384, 'hidden_layer_2': 480, 'alpha': 0.0005892600252649451, 'learning_rate_init': 0.0011740609544446281, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 18:20:00,054] Trial 99 finished with value: 0.6362988264922106 and parameters: {'hidden_layer_1': 448, 'hidden_layer_2': 448, 'alpha': 1.2228390754327804e-06, 'learning_rate_init': 0.0008649565277183604, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 18:25:33,518] Trial 100 finished with value: 0.6269542875835478 and parameters: {'hidden_layer_1': 896, 'hidden_layer_2': 512, 'alpha': 3.605678279554463e-06, 'learning_rate_init': 0.0001057201975332779, 'batch_size': 128, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 18:27:48,288] Trial 101 finished with value: 0.6549207891264818 and parameters: {'hidden_layer_1': 640, 'hidden_layer_2': 480, 'alpha': 4.992935084736769e-06, 'learning_rate_init': 0.0005601300604093272, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 18:29:47,895] Trial 102 finished with value: 0.6543019138549248 and parameters: {'hidden_layer_1': 640, 'hidden_layer_2': 480, 'alpha': 1.3569012533520522e-05, 'learning_rate_init': 0.0006996834906856145, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 18:30:53,886] Trial 103 finished with value: 0.637445919290825 and parameters: {'hidden_layer_1': 512, 'hidden_layer_2': 512, 'alpha': 2.0452384904675944e-06, 'learning_rate_init': 0.0009379587301406426, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)
  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 18:34:09,988] Trial 104 finished with value: 0.44983629811376796 and parameters: {'hidden_layer_1': 576, 'hidden_layer_2': 96, 'alpha': 1.6725845336872095e-06, 'learning_rate_init': 3.190778113482805e-05, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 18:42:06,954] Trial 105 finished with value: 0.5392008713568458 and parameters: {'hidden_layer_1': 704, 'hidden_layer_2': 480, 'alpha': 4.8604092648309066e-05, 'learning_rate_init': 2.1068281923262874e-05, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 18:49:39,798] Trial 106 finished with value: 0.6141587545845985 and parameters: {'hidden_layer_1': 576, 'hidden_layer_2': 512, 'alpha': 2.977357497607786e-06, 'learning_rate_init': 0.0012402311821699576, 'batch_size': 64, 'activation': 'tanh', 'solver': 'sgd'}. Best is trial 44 with value: 0.667698580452547.
[I 2025-11-21 18:50:33,469] Trial 107 finished with value: 0.647756327237231 and parameters: {'hidden_layer_1': 384, 'hidden_layer_2': 448, 'alpha': 1.4946667110953472e-06, 'learning_rate_init': 0.0010414033343574566, 'batch_size': 256, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 18:52:16,608] Trial 108 finished with value: 0.6420479886932512 and parameters: {'hidden_layer_1': 320, 'hidden_layer_2': 512, 'alpha': 2.1935714201224265e-06, 'learning_rate_init': 0.0006265263443805456, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 18:53:07,780] Trial 109 finished with value: 0.6406876066420956 and parameters: {'hidden_layer_1': 512, 'hidden_layer_2': 480, 'alpha': 1.2580896337515034e-06, 'learning_rate_init': 0.001524418336059754, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 18:54:49,865] Trial 110 finished with value: 0.6244547727594085 and parameters: {'hidden_layer_1': 448, 'hidden_layer_2': 480, 'alpha': 2.7376482094792848e-05, 'learning_rate_init': 0.00042792748620098186, 'batch_size': 64, 'activation': 'relu', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 18:56:16,434] Trial 111 finished with value: 0.6448308198655608 and parameters: {'hidden_layer_1': 576, 'hidden_layer_2': 480, 'alpha': 1.5240717038429347e-06, 'learning_rate_init': 0.0008013871996503353, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 18:59:30,271] Trial 112 finished with value: 0.6510164847569321 and parameters: {'hidden_layer_1': 640, 'hidden_layer_2': 480, 'alpha': 2.4186216401571383e-06, 'learning_rate_init': 0.0006778968269198997, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 19:02:26,003] Trial 113 finished with value: 0.6563526510555676 and parameters: {'hidden_layer_1': 576, 'hidden_layer_2': 512, 'alpha': 1.0077273288776286e-06, 'learning_rate_init': 0.0009758789288097616, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 19:04:50,264] Trial 114 finished with value: 0.6538464927926482 and parameters: {'hidden_layer_1': 640, 'hidden_layer_2': 512, 'alpha': 1.2228029213423854e-06, 'learning_rate_init': 0.0009970121781987052, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 19:06:16,983] Trial 115 finished with value: 0.6429314382806771 and parameters: {'hidden_layer_1': 512, 'hidden_layer_2': 512, 'alpha': 1.8897791015863929e-06, 'learning_rate_init': 0.0012492441117730345, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 19:08:34,771] Trial 116 finished with value: 0.6571183360985585 and parameters: {'hidden_layer_1': 576, 'hidden_layer_2': 512, 'alpha': 1.016449753856909e-06, 'learning_rate_init': 0.0008544472360181623, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 19:10:45,007] Trial 117 finished with value: 0.642816234777436 and parameters: {'hidden_layer_1': 576, 'hidden_layer_2': 512, 'alpha': 1.0002044008407406e-06, 'learning_rate_init': 0.0018049937500664486, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 19:11:56,533] Trial 118 finished with value: 0.6487848756455744 and parameters: {'hidden_layer_1': 512, 'hidden_layer_2': 512, 'alpha': 0.0001927852014741095, 'learning_rate_init': 0.0013955554762107264, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 19:13:42,456] Trial 119 finished with value: 0.661063986392976 and parameters: {'hidden_layer_1': 384, 'hidden_layer_2': 512, 'alpha': 1.1841041224387257e-06, 'learning_rate_init': 0.0008584710187193815, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 19:14:41,736] Trial 120 finished with value: 0.6498175402084297 and parameters: {'hidden_layer_1': 320, 'hidden_layer_2': 512, 'alpha': 1.5631742565863305e-06, 'learning_rate_init': 0.0008480365083651021, 'batch_size': 128, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 19:16:15,514] Trial 121 finished with value: 0.6575033275314626 and parameters: {'hidden_layer_1': 384, 'hidden_layer_2': 512, 'alpha': 1.003932962347365e-06, 'learning_rate_init': 0.0010995920928127823, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 19:17:09,220] Trial 122 finished with value: 0.6517500148524582 and parameters: {'hidden_layer_1': 384, 'hidden_layer_2': 512, 'alpha': 1.196778910438838e-06, 'learning_rate_init': 0.0010858504098515789, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 19:17:55,378] Trial 123 finished with value: 0.6412569008500653 and parameters: {'hidden_layer_1': 384, 'hidden_layer_2': 512, 'alpha': 1.7865253425038423e-06, 'learning_rate_init': 0.0007678121447296049, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 19:18:43,037] Trial 124 finished with value: 0.6259459858972035 and parameters: {'hidden_layer_1': 384, 'hidden_layer_2': 480, 'alpha': 1.4140087335561675e-06, 'learning_rate_init': 0.0011822185747246755, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 19:19:31,138] Trial 125 finished with value: 0.6326807323202291 and parameters: {'hidden_layer_1': 128, 'hidden_layer_2': 512, 'alpha': 1.1971032315250312e-06, 'learning_rate_init': 0.0009652802802110723, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)
  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 19:25:36,313] Trial 126 finished with value: 0.61707825743988 and parameters: {'hidden_layer_1': 448, 'hidden_layer_2': 512, 'alpha': 2.279838358875923e-06, 'learning_rate_init': 0.0013405108820800991, 'batch_size': 64, 'activation': 'tanh', 'solver': 'sgd'}. Best is trial 44 with value: 0.667698580452547.
[I 2025-11-21 19:25:59,971] Trial 127 finished with value: 0.6346161459172013 and parameters: {'hidden_layer_1': 320, 'hidden_layer_2': 480, 'alpha': 1.0030204439506584e-06, 'learning_rate_init': 0.0016745624388737838, 'batch_size': 256, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 19:28:02,075] Trial 128 finished with value: 0.6542694346016956 and parameters: {'hidden_layer_1': 384, 'hidden_layer_2': 512, 'alpha': 1.931688892247281e-06, 'learning_rate_init': 0.0005509829915303736, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 19:29:00,559] Trial 129 finished with value: 0.6311317899748751 and parameters: {'hidden_layer_1': 448, 'hidden_layer_2': 480, 'alpha': 2.7763597015539054e-06, 'learning_rate_init': 0.002283341432234142, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 19:30:06,795] Trial 130 finished with value: 0.6233786819640662 and parameters: {'hidden_layer_1': 384, 'hidden_layer_2': 512, 'alpha': 1.4644860251525059e-06, 'learning_rate_init': 0.001075587250356338, 'batch_size': 64, 'activation': 'relu', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 19:32:48,451] Trial 131 finished with value: 0.658782582504888 and parameters: {'hidden_layer_1': 576, 'hidden_layer_2': 512, 'alpha': 1.0209725746564483e-06, 'learning_rate_init': 0.0008754445565622074, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 19:35:07,633] Trial 132 finished with value: 0.6541655344972032 and parameters: {'hidden_layer_1': 576, 'hidden_layer_2': 512, 'alpha': 1.206352542620663e-06, 'learning_rate_init': 0.0007224918728768257, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 19:36:32,025] Trial 133 finished with value: 0.6530489837551368 and parameters: {'hidden_layer_1': 512, 'hidden_layer_2': 480, 'alpha': 1.7438166181055796e-06, 'learning_rate_init': 0.000853095084657771, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 19:38:48,615] Trial 134 finished with value: 0.6459461471823033 and parameters: {'hidden_layer_1': 576, 'hidden_layer_2': 512, 'alpha': 1.4303275428429374e-06, 'learning_rate_init': 0.001461379724112213, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 19:40:18,910] Trial 135 finished with value: 0.6368479669174052 and parameters: {'hidden_layer_1': 512, 'hidden_layer_2': 512, 'alpha': 1.0250166371531002e-06, 'learning_rate_init': 0.0009561578548283954, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 19:41:51,900] Trial 136 finished with value: 0.6386360159760683 and parameters: {'hidden_layer_1': 448, 'hidden_layer_2': 512, 'alpha': 2.1050812146045505e-06, 'learning_rate_init': 0.0007713538762560596, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 19:43:07,797] Trial 137 finished with value: 0.6507934734318108 and parameters: {'hidden_layer_1': 320, 'hidden_layer_2': 480, 'alpha': 1.243821641111251e-06, 'learning_rate_init': 0.0011739951261683782, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 19:44:57,009] Trial 138 finished with value: 0.6479393391641414 and parameters: {'hidden_layer_1': 384, 'hidden_layer_2': 480, 'alpha': 3.996804261690784e-06, 'learning_rate_init': 0.0009035447329770322, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 19:47:46,625] Trial 139 finished with value: 0.6516892464685017 and parameters: {'hidden_layer_1': 576, 'hidden_layer_2': 512, 'alpha': 1.6335548662921326e-06, 'learning_rate_init': 0.0006578963738856307, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 19:50:00,879] Trial 140 finished with value: 0.6415104393231763 and parameters: {'hidden_layer_1': 448, 'hidden_layer_2': 448, 'alpha': 2.5617815254145193e-06, 'learning_rate_init': 0.0005055324606044344, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 19:52:22,631] Trial 141 finished with value: 0.6561148980807481 and parameters: {'hidden_layer_1': 576, 'hidden_layer_2': 512, 'alpha': 1.201406581268211e-06, 'learning_rate_init': 0.0008448624190523389, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 19:54:04,654] Trial 142 finished with value: 0.6477412474639187 and parameters: {'hidden_layer_1': 576, 'hidden_layer_2': 512, 'alpha': 1.4094965579179727e-06, 'learning_rate_init': 0.0010468955192714795, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 19:56:09,631] Trial 143 finished with value: 0.6603293416832859 and parameters: {'hidden_layer_1': 576, 'hidden_layer_2': 512, 'alpha': 1.1929561849674481e-06, 'learning_rate_init': 0.0008288900710124422, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 19:57:21,675] Trial 144 finished with value: 0.6376291563442216 and parameters: {'hidden_layer_1': 640, 'hidden_layer_2': 480, 'alpha': 1.8084409934654142e-06, 'learning_rate_init': 0.001245705667207058, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 19:59:08,959] Trial 145 finished with value: 0.6472558519934719 and parameters: {'hidden_layer_1': 512, 'hidden_layer_2': 512, 'alpha': 1.473544316846165e-06, 'learning_rate_init': 0.0006056683300426901, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 20:00:27,724] Trial 146 finished with value: 0.6460452681625058 and parameters: {'hidden_layer_1': 384, 'hidden_layer_2': 480, 'alpha': 1.0282458899994136e-06, 'learning_rate_init': 0.0007311988877580698, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 20:02:16,894] Trial 147 finished with value: 0.6554203673370271 and parameters: {'hidden_layer_1': 576, 'hidden_layer_2': 512, 'alpha': 2.1262346276391147e-06, 'learning_rate_init': 0.0010423194633431508, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.


  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)
  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-5, 5e-3)


[I 2025-11-21 20:06:09,958] Trial 148 finished with value: 0.4577785606464843 and parameters: {'hidden_layer_1': 512, 'hidden_layer_2': 512, 'alpha': 1.2340497302721044e-06, 'learning_rate_init': 0.0008286240702189782, 'batch_size': 128, 'activation': 'tanh', 'solver': 'sgd'}. Best is trial 44 with value: 0.667698580452547.
[I 2025-11-21 20:06:42,495] Trial 149 finished with value: 0.6289337033955753 and parameters: {'hidden_layer_1': 256, 'hidden_layer_2': 288, 'alpha': 3.1305622011066945e-06, 'learning_rate_init': 0.0014256987620001435, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 44 with value: 0.667698580452547.

Best Macro-F1: 0.6677
parameters: {'hidden_layer_1': 384, 'hidden_layer_2': 512, 'alpha': 1.9155906182703047e-06, 'learning_rate_init': 0.0009118650899147087, 'batch_size': 64, 'activation': 'tanh', 'solver': 'adam'}


In [7]:
# Retrain w/ Best params
best_params = study.best_params
best_model = MLPClassifier(
    hidden_layer_sizes=(best_params["hidden_layer_1"], best_params["hidden_layer_2"]),
    activation=best_params["activation"],
    solver=best_params["solver"],
    alpha=best_params["alpha"],
    batch_size=best_params["batch_size"],
    learning_rate='adaptive',
    learning_rate_init=best_params["learning_rate_init"],
    max_iter=400,
    early_stopping=True,
    n_iter_no_change=15,
    validation_fraction=0.15,
    random_state=42,
    verbose=True
)

print("\n[INFO] Training final model with best params...")
best_model.fit(X_train, y_train)


[INFO] Training final model with best params...
Iteration 1, loss = 2.86273885
Validation score: 0.383005
Iteration 2, loss = 2.37586234
Validation score: 0.426519
Iteration 3, loss = 2.13979097
Validation score: 0.464696
Iteration 4, loss = 1.95503616
Validation score: 0.491379
Iteration 5, loss = 1.78341084
Validation score: 0.525862
Iteration 6, loss = 1.63375976
Validation score: 0.547619
Iteration 7, loss = 1.49842910
Validation score: 0.556650
Iteration 8, loss = 1.37884310
Validation score: 0.569376
Iteration 9, loss = 1.27729898
Validation score: 0.594417
Iteration 10, loss = 1.18715710
Validation score: 0.599343
Iteration 11, loss = 1.11325013
Validation score: 0.605090
Iteration 12, loss = 1.04027024
Validation score: 0.635057
Iteration 13, loss = 0.96878574
Validation score: 0.627668
Iteration 14, loss = 0.91658545
Validation score: 0.638752
Iteration 15, loss = 0.85928099
Validation score: 0.637931
Iteration 16, loss = 0.81078983
Validation score: 0.633005
Iteration 17, lo

In [10]:
test_pred = best_model.predict(X_test)
test_proba = best_model.predict_proba(X_test)

# Accuracy / F1
test_acc = accuracy_score(y_test, test_pred)
test_f1_macro = f1_score(y_test, test_pred, average='macro')
test_f1_micro = f1_score(y_test, test_pred, average='micro')
test_f1_weighted = f1_score(y_test, test_pred, average='weighted')

# ROC-AUC (multi-class, one-vs-rest 방식)
test_roc_auc = roc_auc_score(
    y_test, test_proba,
    multi_class='ovr',
    average='macro'
)

# PR-AUC (multi-class macro)
test_pr_auc = average_precision_score(
    y_test, test_proba,
    average='macro'
)

print("\n========== FINAL TEST RESULTS ==========")
print(f"Accuracy        : {test_acc:.4f}")
print(f"F1 (macro)      : {test_f1_macro:.4f}")
print(f"F1 (micro)      : {test_f1_micro:.4f}")
print(f"F1 (weighted)   : {test_f1_weighted:.4f}")
print(f"ROC-AUC (macro) : {test_roc_auc:.4f}")
print(f"PR-AUC (macro)  : {test_pr_auc:.4f}")


Accuracy        : 0.7234
F1 (macro)      : 0.6584
F1 (micro)      : 0.7234
F1 (weighted)   : 0.7192
ROC-AUC (macro) : 0.9837
PR-AUC (macro)  : 0.7075
