In [2]:
import numpy as np
import pandas as pd
import optuna
from optuna.samplers import TPESampler
from optuna.pruners import MedianPruner

from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score, accuracy_score, classification_report, roc_auc_score, precision_recall_curve, auc

In [3]:
train_df = pd.read_csv("../../../dataset/open_world/openworld_train.csv")
test_df = pd.read_csv("../../../dataset/open_world/openworld_test.csv")

In [4]:
# feature/label 분리
target_col = train_df.columns[-1]   # 마지막 컬럼이 label

# open-world label 통합
train_df[target_col] = train_df[target_col].apply(lambda x: 0 if x == 95 else 1)
test_df[target_col]  = test_df[target_col].apply(lambda x: 0 if x == 95 else 1)

X_train = train_df.drop(columns=[target_col]).values
y_train = train_df[target_col].astype(np.float32).values
X_test  = test_df.drop(columns=[target_col]).values
y_test  = test_df[target_col].astype(np.float32).values

In [5]:
# Optuna Objective(K-Fold + Pruning)
def objective(trial):
    # Search Space
    hidden_layer_1 = trial.suggest_int("hidden_layer_1", 64, 512, step=32)
    hidden_layer_2 = trial.suggest_int("hidden_layer_2", 32, 256, step=16)
    alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
    learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)
    batch_size = trial.suggest_categorical("batch_size", [128, 256])
    activation = trial.suggest_categorical("activation", ["relu", "tanh"])
    solver = trial.suggest_categorical("solver", ["adam", "sgd"])

    # K-Fold 설정
    kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    f1_scores = []

    for fold_idx, (tr_idx, val_idx) in enumerate(kfold.split(X_train, y_train)):

        X_tr, X_val = X_train[tr_idx], X_train[val_idx]
        y_tr, y_val = y_train[tr_idx], y_train[val_idx]

        # MLP model
        mlp = MLPClassifier(
            hidden_layer_sizes=(hidden_layer_1, hidden_layer_2),
            activation=activation,
            solver=solver,
            alpha=alpha,
            batch_size=batch_size,
            learning_rate='adaptive',
            learning_rate_init=learning_rate_init,
            max_iter=300,
            early_stopping=True,
            validation_fraction=0.1,
            n_iter_no_change=10,
            random_state=42,
        )

        # train
        mlp.fit(X_tr, y_tr)

        # val
        pred = mlp.predict(X_val)
        f1_macro = f1_score(y_val, pred, average='macro')
        f1_scores.append(f1_macro)

        # Optuna pruning
        trial.report(f1_macro, fold_idx)
        if trial.should_prune():
            raise optuna.TrialPruned()

    return np.mean(f1_scores)

In [6]:
# Optuna Study
study = optuna.create_study(
    direction="maximize",
    sampler=TPESampler(seed=42),
    pruner=MedianPruner(n_warmup_steps=5)
)
study.optimize(objective, n_trials=100, show_progress_bar=True)

print("\n========== OPTUNA RESULTS ==========")
print(f"Best Macro-F1: {study.best_value:.4f}")
print("parameters:", study.best_params)

[I 2025-11-20 17:14:44,248] A new study created in memory with name: no-name-ccc00ab8-08d3-42ed-b0b4-d7a1dc03dadf


  0%|          | 0/100 [00:00<?, ?it/s]

  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:17:29,400] Trial 0 finished with value: 0.7757940771314631 and parameters: {'hidden_layer_1': 224, 'hidden_layer_2': 256, 'alpha': 0.001570297088405539, 'learning_rate_init': 0.0010401663679887319, 'batch_size': 128, 'activation': 'tanh', 'solver': 'sgd'}. Best is trial 0 with value: 0.7757940771314631.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:18:12,954] Trial 1 finished with value: 0.8046833755179289 and parameters: {'hidden_layer_1': 64, 'hidden_layer_2': 256, 'alpha': 0.00314288089084011, 'learning_rate_init': 0.00022948683681130568, 'batch_size': 256, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 1 with value: 0.8046833755179289.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:18:59,887] Trial 2 finished with value: 0.8277125650412998 and parameters: {'hidden_layer_1': 352, 'hidden_layer_2': 64, 'alpha': 7.52374288453485e-05, 'learning_rate_init': 0.0004192159350410976, 'batch_size': 256, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 2 with value: 0.8277125650412998.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:19:40,022] Trial 3 finished with value: 0.8451244430223426 and parameters: {'hidden_layer_1': 352, 'hidden_layer_2': 64, 'alpha': 1.5673095467235405e-05, 'learning_rate_init': 0.004093813608598782, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 3 with value: 0.8451244430223426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:20:09,325] Trial 4 finished with value: 0.8360313460351397 and parameters: {'hidden_layer_1': 96, 'hidden_layer_2': 144, 'alpha': 1.2681352169084594e-05, 'learning_rate_init': 0.0035067764992972182, 'batch_size': 256, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 3 with value: 0.8451244430223426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:23:34,994] Trial 5 finished with value: 0.7992596890961847 and parameters: {'hidden_layer_1': 512, 'hidden_layer_2': 208, 'alpha': 0.006584106160121612, 'learning_rate_init': 0.00331348361550895, 'batch_size': 256, 'activation': 'tanh', 'solver': 'sgd'}. Best is trial 3 with value: 0.8451244430223426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:24:50,732] Trial 6 finished with value: 0.733210711115856 and parameters: {'hidden_layer_1': 224, 'hidden_layer_2': 96, 'alpha': 0.003063462210622081, 'learning_rate_init': 0.00040375061884407573, 'batch_size': 256, 'activation': 'tanh', 'solver': 'sgd'}. Best is trial 3 with value: 0.8451244430223426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:25:22,031] Trial 7 finished with value: 0.8417958671882555 and parameters: {'hidden_layer_1': 416, 'hidden_layer_2': 64, 'alpha': 1.0388823104027935e-05, 'learning_rate_init': 0.0024290950368254976, 'batch_size': 256, 'activation': 'relu', 'solver': 'adam'}. Best is trial 3 with value: 0.8451244430223426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:26:55,183] Trial 8 finished with value: 0.8291030121825422 and parameters: {'hidden_layer_1': 448, 'hidden_layer_2': 176, 'alpha': 9.833181933644887e-05, 'learning_rate_init': 0.00012822825454807568, 'batch_size': 256, 'activation': 'relu', 'solver': 'adam'}. Best is trial 3 with value: 0.8451244430223426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:29:04,162] Trial 9 finished with value: 0.7943307047398384 and parameters: {'hidden_layer_1': 96, 'hidden_layer_2': 192, 'alpha': 0.0019158219548093154, 'learning_rate_init': 0.0008986552644007198, 'batch_size': 128, 'activation': 'relu', 'solver': 'sgd'}. Best is trial 3 with value: 0.8451244430223426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:29:35,606] Trial 10 finished with value: 0.8404363152192692 and parameters: {'hidden_layer_1': 288, 'hidden_layer_2': 32, 'alpha': 0.0003581883805792668, 'learning_rate_init': 0.0016050941365082067, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 3 with value: 0.8451244430223426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:30:17,769] Trial 11 finished with value: 0.8427112505302439 and parameters: {'hidden_layer_1': 384, 'hidden_layer_2': 112, 'alpha': 1.0253234397482582e-05, 'learning_rate_init': 0.0048368428298536876, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 3 with value: 0.8451244430223426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:30:55,457] Trial 12 finished with value: 0.8406031854900341 and parameters: {'hidden_layer_1': 352, 'hidden_layer_2': 128, 'alpha': 3.407759409615393e-05, 'learning_rate_init': 0.004980354763158108, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 3 with value: 0.8451244430223426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:31:31,150] Trial 13 finished with value: 0.8435827203161201 and parameters: {'hidden_layer_1': 352, 'hidden_layer_2': 96, 'alpha': 3.069654326477668e-05, 'learning_rate_init': 0.002033526288012722, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 3 with value: 0.8451244430223426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:32:11,501] Trial 14 finished with value: 0.8493686499044426 and parameters: {'hidden_layer_1': 288, 'hidden_layer_2': 80, 'alpha': 3.813848083517318e-05, 'learning_rate_init': 0.0017053755428803074, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:32:52,075] Trial 15 finished with value: 0.8435581065176357 and parameters: {'hidden_layer_1': 256, 'hidden_layer_2': 32, 'alpha': 0.00027135172616328393, 'learning_rate_init': 0.00132337133155335, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:33:41,099] Trial 16 finished with value: 0.8371839704418523 and parameters: {'hidden_layer_1': 160, 'hidden_layer_2': 64, 'alpha': 3.998247203661405e-05, 'learning_rate_init': 0.0005285228085105334, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:34:13,639] Trial 17 finished with value: 0.8395083854693706 and parameters: {'hidden_layer_1': 288, 'hidden_layer_2': 80, 'alpha': 0.00021352767099247087, 'learning_rate_init': 0.002535641987753074, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:37:24,918] Trial 18 finished with value: 0.8316413023460232 and parameters: {'hidden_layer_1': 480, 'hidden_layer_2': 32, 'alpha': 9.637313004562953e-05, 'learning_rate_init': 0.0015738530831925976, 'batch_size': 128, 'activation': 'relu', 'solver': 'sgd'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:38:21,438] Trial 19 finished with value: 0.8417614812682113 and parameters: {'hidden_layer_1': 192, 'hidden_layer_2': 160, 'alpha': 2.0283770639780877e-05, 'learning_rate_init': 0.0007154731028191787, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:39:04,612] Trial 20 finished with value: 0.8455142715874266 and parameters: {'hidden_layer_1': 320, 'hidden_layer_2': 128, 'alpha': 0.0006430024554463524, 'learning_rate_init': 0.0028448583597538743, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:39:57,312] Trial 21 finished with value: 0.8471863088421842 and parameters: {'hidden_layer_1': 320, 'hidden_layer_2': 128, 'alpha': 0.0005731048160936769, 'learning_rate_init': 0.0033269392984951185, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:40:51,224] Trial 22 finished with value: 0.8452985304708378 and parameters: {'hidden_layer_1': 320, 'hidden_layer_2': 128, 'alpha': 0.0006562876352613938, 'learning_rate_init': 0.002677477281203617, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:41:33,612] Trial 23 finished with value: 0.8464277046867579 and parameters: {'hidden_layer_1': 288, 'hidden_layer_2': 144, 'alpha': 0.0006571340646858928, 'learning_rate_init': 0.0020823080081435025, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:42:19,317] Trial 24 finished with value: 0.8420869626338146 and parameters: {'hidden_layer_1': 256, 'hidden_layer_2': 160, 'alpha': 0.0006384668873264385, 'learning_rate_init': 0.0018224234626667973, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:42:55,992] Trial 25 finished with value: 0.8408991813209497 and parameters: {'hidden_layer_1': 160, 'hidden_layer_2': 96, 'alpha': 0.0001800233600547761, 'learning_rate_init': 0.0012603943868572482, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:47:12,484] Trial 26 finished with value: 0.8195519914383222 and parameters: {'hidden_layer_1': 416, 'hidden_layer_2': 224, 'alpha': 0.001264350061476842, 'learning_rate_init': 0.001985022147574902, 'batch_size': 128, 'activation': 'relu', 'solver': 'sgd'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:48:10,537] Trial 27 finished with value: 0.8412952335472624 and parameters: {'hidden_layer_1': 288, 'hidden_layer_2': 144, 'alpha': 0.00040971166554145896, 'learning_rate_init': 0.0008616759570028959, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:49:03,889] Trial 28 finished with value: 0.842807260590478 and parameters: {'hidden_layer_1': 224, 'hidden_layer_2': 112, 'alpha': 0.0008670725269066357, 'learning_rate_init': 0.001147840764134855, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:52:13,205] Trial 29 finished with value: 0.8089094637973646 and parameters: {'hidden_layer_1': 256, 'hidden_layer_2': 176, 'alpha': 0.0015772341675155746, 'learning_rate_init': 0.0020868928703084853, 'batch_size': 128, 'activation': 'tanh', 'solver': 'sgd'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:52:39,348] Trial 30 finished with value: 0.8357354164030031 and parameters: {'hidden_layer_1': 192, 'hidden_layer_2': 112, 'alpha': 0.00013733652581082975, 'learning_rate_init': 0.003543385421239546, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:53:26,213] Trial 31 finished with value: 0.8408136908532265 and parameters: {'hidden_layer_1': 320, 'hidden_layer_2': 144, 'alpha': 0.00047049277070372044, 'learning_rate_init': 0.002819376772404894, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:54:03,981] Trial 32 finished with value: 0.840628129570678 and parameters: {'hidden_layer_1': 320, 'hidden_layer_2': 128, 'alpha': 0.0008563572861931799, 'learning_rate_init': 0.0030141304493282724, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:55:00,025] Trial 33 finished with value: 0.8424931857775417 and parameters: {'hidden_layer_1': 384, 'hidden_layer_2': 160, 'alpha': 0.0031238878736949216, 'learning_rate_init': 0.0015407674946107022, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:55:43,708] Trial 34 finished with value: 0.8349207703269317 and parameters: {'hidden_layer_1': 320, 'hidden_layer_2': 128, 'alpha': 0.0012267499241789767, 'learning_rate_init': 0.004138419913440651, 'batch_size': 128, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:56:19,471] Trial 35 finished with value: 0.8386666701338192 and parameters: {'hidden_layer_1': 384, 'hidden_layer_2': 80, 'alpha': 6.389793379677481e-05, 'learning_rate_init': 0.0022575348106354228, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:56:53,478] Trial 36 finished with value: 0.8409825929377778 and parameters: {'hidden_layer_1': 288, 'hidden_layer_2': 224, 'alpha': 0.0057383407557110196, 'learning_rate_init': 0.003981912580135088, 'batch_size': 256, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 17:57:41,169] Trial 37 finished with value: 0.8352680534917443 and parameters: {'hidden_layer_1': 256, 'hidden_layer_2': 80, 'alpha': 0.0005507117319855848, 'learning_rate_init': 0.00026587903615561144, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:04:04,692] Trial 38 finished with value: 0.7561900517049711 and parameters: {'hidden_layer_1': 352, 'hidden_layer_2': 48, 'alpha': 0.0002047858966664323, 'learning_rate_init': 0.001003905172769249, 'batch_size': 256, 'activation': 'tanh', 'solver': 'sgd'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:04:40,810] Trial 39 finished with value: 0.8444450853787927 and parameters: {'hidden_layer_1': 224, 'hidden_layer_2': 176, 'alpha': 0.0024425497181117957, 'learning_rate_init': 0.0035388605537052505, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:05:11,555] Trial 40 finished with value: 0.8442439860843048 and parameters: {'hidden_layer_1': 416, 'hidden_layer_2': 112, 'alpha': 0.004627715566872652, 'learning_rate_init': 0.0030800297557187816, 'batch_size': 256, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:05:48,306] Trial 41 finished with value: 0.8472416659379345 and parameters: {'hidden_layer_1': 320, 'hidden_layer_2': 128, 'alpha': 0.0007636924249149345, 'learning_rate_init': 0.002520425435835414, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:06:27,090] Trial 42 finished with value: 0.8447978820909748 and parameters: {'hidden_layer_1': 320, 'hidden_layer_2': 144, 'alpha': 0.0009433839422379491, 'learning_rate_init': 0.0024217960710599482, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:07:08,439] Trial 43 finished with value: 0.8438184808663622 and parameters: {'hidden_layer_1': 288, 'hidden_layer_2': 144, 'alpha': 0.001104402854243263, 'learning_rate_init': 0.0017008432436530119, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:07:43,494] Trial 44 finished with value: 0.8403828576305535 and parameters: {'hidden_layer_1': 352, 'hidden_layer_2': 96, 'alpha': 0.0003408622663526359, 'learning_rate_init': 0.004049438154982519, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:08:28,959] Trial 45 finished with value: 0.8437792183966415 and parameters: {'hidden_layer_1': 256, 'hidden_layer_2': 128, 'alpha': 0.00026418325319587966, 'learning_rate_init': 0.0013916215306686163, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:10:06,580] Trial 46 finished with value: 0.7103018936202816 and parameters: {'hidden_layer_1': 384, 'hidden_layer_2': 160, 'alpha': 0.001957886017602413, 'learning_rate_init': 0.00011804334080273245, 'batch_size': 256, 'activation': 'tanh', 'solver': 'sgd'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:10:54,999] Trial 47 finished with value: 0.8477381407670712 and parameters: {'hidden_layer_1': 320, 'hidden_layer_2': 192, 'alpha': 0.00070686990753678, 'learning_rate_init': 0.0031469501170793518, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:11:50,465] Trial 48 finished with value: 0.8376800108056001 and parameters: {'hidden_layer_1': 352, 'hidden_layer_2': 208, 'alpha': 0.0004174495797624334, 'learning_rate_init': 0.004985882863416894, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:12:54,368] Trial 49 finished with value: 0.8460567648223428 and parameters: {'hidden_layer_1': 288, 'hidden_layer_2': 240, 'alpha': 0.00014544370727990203, 'learning_rate_init': 0.002275654071592751, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:14:20,633] Trial 50 finished with value: 0.8365486454266928 and parameters: {'hidden_layer_1': 224, 'hidden_layer_2': 192, 'alpha': 0.0016208643487033933, 'learning_rate_init': 0.00015762094141831773, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:15:18,976] Trial 51 finished with value: 0.8455279345783534 and parameters: {'hidden_layer_1': 288, 'hidden_layer_2': 256, 'alpha': 5.200490073426652e-05, 'learning_rate_init': 0.0021799713464221966, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:16:04,067] Trial 52 finished with value: 0.842137480636568 and parameters: {'hidden_layer_1': 288, 'hidden_layer_2': 240, 'alpha': 2.045427784419049e-05, 'learning_rate_init': 0.0033385881333160973, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:16:50,778] Trial 53 finished with value: 0.8434763184299037 and parameters: {'hidden_layer_1': 320, 'hidden_layer_2': 240, 'alpha': 0.009614036798751101, 'learning_rate_init': 0.002567668495795264, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:17:42,916] Trial 54 finished with value: 0.8447406946426315 and parameters: {'hidden_layer_1': 352, 'hidden_layer_2': 192, 'alpha': 0.00013376663137852296, 'learning_rate_init': 0.0018676042368654642, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:18:34,647] Trial 55 finished with value: 0.8387125406322997 and parameters: {'hidden_layer_1': 256, 'hidden_layer_2': 176, 'alpha': 0.0007826383286017978, 'learning_rate_init': 0.00045332398729437163, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:19:06,575] Trial 56 finished with value: 0.8387374506545212 and parameters: {'hidden_layer_1': 192, 'hidden_layer_2': 256, 'alpha': 0.00026554078116225606, 'learning_rate_init': 0.004329316697460291, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:21:11,816] Trial 57 finished with value: 0.7978297080293614 and parameters: {'hidden_layer_1': 288, 'hidden_layer_2': 208, 'alpha': 0.00010000889393941878, 'learning_rate_init': 0.0014726819529397462, 'batch_size': 256, 'activation': 'relu', 'solver': 'sgd'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:21:47,416] Trial 58 finished with value: 0.847585888815164 and parameters: {'hidden_layer_1': 320, 'hidden_layer_2': 48, 'alpha': 0.0005310746846333009, 'learning_rate_init': 0.002370995479951581, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:22:25,118] Trial 59 finished with value: 0.8463369824818463 and parameters: {'hidden_layer_1': 448, 'hidden_layer_2': 48, 'alpha': 0.0005722512717158249, 'learning_rate_init': 0.0017959712997335502, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:22:45,053] Trial 60 finished with value: 0.8378248537407392 and parameters: {'hidden_layer_1': 64, 'hidden_layer_2': 64, 'alpha': 0.00048662209181979523, 'learning_rate_init': 0.0033497799531686475, 'batch_size': 128, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:23:21,448] Trial 61 finished with value: 0.8462205888075589 and parameters: {'hidden_layer_1': 512, 'hidden_layer_2': 48, 'alpha': 0.0006327068890506017, 'learning_rate_init': 0.0016653163191461691, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:24:09,480] Trial 62 finished with value: 0.8438032187259475 and parameters: {'hidden_layer_1': 480, 'hidden_layer_2': 48, 'alpha': 0.0003239902054109937, 'learning_rate_init': 0.0019124525736780849, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:25:42,275] Trial 63 finished with value: 0.8452769685633953 and parameters: {'hidden_layer_1': 416, 'hidden_layer_2': 48, 'alpha': 0.0009876943180796773, 'learning_rate_init': 0.0026324742340120637, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:26:20,140] Trial 64 finished with value: 0.8452141872129275 and parameters: {'hidden_layer_1': 448, 'hidden_layer_2': 32, 'alpha': 0.0007262959569667743, 'learning_rate_init': 0.001186716286778714, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:26:49,377] Trial 65 finished with value: 0.845237867508264 and parameters: {'hidden_layer_1': 320, 'hidden_layer_2': 64, 'alpha': 0.0004754454715172179, 'learning_rate_init': 0.003080486312237379, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:27:29,369] Trial 66 finished with value: 0.8447697365027537 and parameters: {'hidden_layer_1': 384, 'hidden_layer_2': 80, 'alpha': 0.0013838801106771208, 'learning_rate_init': 0.0020523298515260193, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:28:18,174] Trial 67 finished with value: 0.8439398142444237 and parameters: {'hidden_layer_1': 480, 'hidden_layer_2': 112, 'alpha': 0.0005494192661430637, 'learning_rate_init': 0.000666525254252687, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:30:33,614] Trial 68 finished with value: 0.8372101643883104 and parameters: {'hidden_layer_1': 448, 'hidden_layer_2': 32, 'alpha': 0.00035410699269815705, 'learning_rate_init': 0.0037486443387318657, 'batch_size': 128, 'activation': 'relu', 'solver': 'sgd'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:34:20,793] Trial 69 finished with value: 0.8436860128446163 and parameters: {'hidden_layer_1': 320, 'hidden_layer_2': 80, 'alpha': 0.0005804170794886694, 'learning_rate_init': 0.00238968761856435, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:35:04,283] Trial 70 finished with value: 0.8442986351807811 and parameters: {'hidden_layer_1': 352, 'hidden_layer_2': 96, 'alpha': 0.0007768739693034444, 'learning_rate_init': 0.0028616723161496343, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:35:51,580] Trial 71 finished with value: 0.8459071321801124 and parameters: {'hidden_layer_1': 512, 'hidden_layer_2': 48, 'alpha': 0.0004078283697068134, 'learning_rate_init': 0.0017193355108192203, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:36:39,323] Trial 72 finished with value: 0.8450733212554837 and parameters: {'hidden_layer_1': 512, 'hidden_layer_2': 64, 'alpha': 0.000662104793891937, 'learning_rate_init': 0.0010210265695110487, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:37:13,078] Trial 73 finished with value: 0.84128824665212 and parameters: {'hidden_layer_1': 480, 'hidden_layer_2': 48, 'alpha': 0.001076492265202361, 'learning_rate_init': 0.0016857251470472903, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:37:48,796] Trial 74 finished with value: 0.8437398020772138 and parameters: {'hidden_layer_1': 448, 'hidden_layer_2': 48, 'alpha': 0.0005673486885227132, 'learning_rate_init': 0.0013358144286185092, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:38:20,772] Trial 75 finished with value: 0.8390605003083053 and parameters: {'hidden_layer_1': 320, 'hidden_layer_2': 32, 'alpha': 0.0002676930364786302, 'learning_rate_init': 0.0020691795921306782, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:38:43,718] Trial 76 finished with value: 0.8386525132200893 and parameters: {'hidden_layer_1': 128, 'hidden_layer_2': 160, 'alpha': 0.0008480367672068363, 'learning_rate_init': 0.004429697037080993, 'batch_size': 256, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:39:35,155] Trial 77 finished with value: 0.8483238606581203 and parameters: {'hidden_layer_1': 416, 'hidden_layer_2': 64, 'alpha': 0.002399302934053856, 'learning_rate_init': 0.002574588294632823, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:40:13,813] Trial 78 finished with value: 0.8456362823074259 and parameters: {'hidden_layer_1': 416, 'hidden_layer_2': 64, 'alpha': 0.0023192668605903793, 'learning_rate_init': 0.002700295868189129, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:42:39,124] Trial 79 finished with value: 0.8319656697061169 and parameters: {'hidden_layer_1': 256, 'hidden_layer_2': 128, 'alpha': 0.00022015305659142767, 'learning_rate_init': 0.003087535619968924, 'batch_size': 128, 'activation': 'relu', 'solver': 'sgd'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:43:24,806] Trial 80 finished with value: 0.8443637892076259 and parameters: {'hidden_layer_1': 384, 'hidden_layer_2': 112, 'alpha': 0.0013556474632545703, 'learning_rate_init': 0.0024086731625441032, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:44:10,410] Trial 81 finished with value: 0.8461480510807113 and parameters: {'hidden_layer_1': 512, 'hidden_layer_2': 48, 'alpha': 0.00041431656342101825, 'learning_rate_init': 0.0018139972536639437, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:44:52,580] Trial 82 finished with value: 0.8428161288541028 and parameters: {'hidden_layer_1': 448, 'hidden_layer_2': 64, 'alpha': 0.0034863853500870794, 'learning_rate_init': 0.0015142437791537111, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:45:28,612] Trial 83 finished with value: 0.8439264712901652 and parameters: {'hidden_layer_1': 352, 'hidden_layer_2': 144, 'alpha': 0.0006995460672356313, 'learning_rate_init': 0.0038230685096925913, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:46:11,185] Trial 84 finished with value: 0.842490626475817 and parameters: {'hidden_layer_1': 288, 'hidden_layer_2': 96, 'alpha': 0.0010341587420483658, 'learning_rate_init': 0.0021787561683295777, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:46:50,681] Trial 85 finished with value: 0.846783317733529 and parameters: {'hidden_layer_1': 320, 'hidden_layer_2': 80, 'alpha': 0.0016727313394917325, 'learning_rate_init': 0.002846990135569038, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:47:30,605] Trial 86 finished with value: 0.8461302395896461 and parameters: {'hidden_layer_1': 320, 'hidden_layer_2': 80, 'alpha': 0.004125533429027105, 'learning_rate_init': 0.0027542724506474953, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:47:58,669] Trial 87 finished with value: 0.8421074945073437 and parameters: {'hidden_layer_1': 320, 'hidden_layer_2': 80, 'alpha': 0.0018692744512697997, 'learning_rate_init': 0.0031982221629593553, 'batch_size': 256, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:48:48,535] Trial 88 finished with value: 0.8412151523988379 and parameters: {'hidden_layer_1': 288, 'hidden_layer_2': 144, 'alpha': 0.002452845840747384, 'learning_rate_init': 0.0035024689895222973, 'batch_size': 128, 'activation': 'tanh', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:49:27,479] Trial 89 finished with value: 0.8442089485153798 and parameters: {'hidden_layer_1': 256, 'hidden_layer_2': 64, 'alpha': 0.0018171769435711758, 'learning_rate_init': 0.002444284270937227, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:50:10,666] Trial 90 finished with value: 0.844141376756045 and parameters: {'hidden_layer_1': 416, 'hidden_layer_2': 96, 'alpha': 1.2167088534091671e-05, 'learning_rate_init': 0.002925426860432069, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:50:42,080] Trial 91 finished with value: 0.8431417344262588 and parameters: {'hidden_layer_1': 352, 'hidden_layer_2': 32, 'alpha': 0.00119110622980072, 'learning_rate_init': 0.0019366003251740426, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:51:14,351] Trial 92 finished with value: 0.8379896189917861 and parameters: {'hidden_layer_1': 288, 'hidden_layer_2': 64, 'alpha': 0.0005040331439570987, 'learning_rate_init': 0.0022488843455409063, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:51:50,406] Trial 93 finished with value: 0.8439074008372588 and parameters: {'hidden_layer_1': 384, 'hidden_layer_2': 48, 'alpha': 0.0006158812419445812, 'learning_rate_init': 0.0025672433244354528, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:52:58,815] Trial 94 finished with value: 0.8414977695026679 and parameters: {'hidden_layer_1': 320, 'hidden_layer_2': 160, 'alpha': 0.0008692517507226801, 'learning_rate_init': 0.0003353462490250933, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:53:36,501] Trial 95 finished with value: 0.8457986811160747 and parameters: {'hidden_layer_1': 352, 'hidden_layer_2': 64, 'alpha': 0.0027754073916598874, 'learning_rate_init': 0.0016053154310374407, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:54:29,956] Trial 96 finished with value: 0.8456334376886498 and parameters: {'hidden_layer_1': 480, 'hidden_layer_2': 128, 'alpha': 0.0015525507045987448, 'learning_rate_init': 0.001790798174405736, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:57:02,939] Trial 97 finished with value: 0.8045083205237871 and parameters: {'hidden_layer_1': 288, 'hidden_layer_2': 48, 'alpha': 0.0004370536498253204, 'learning_rate_init': 0.0011016374994181838, 'batch_size': 128, 'activation': 'relu', 'solver': 'sgd'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:57:57,718] Trial 98 finished with value: 0.8410435062957615 and parameters: {'hidden_layer_1': 256, 'hidden_layer_2': 32, 'alpha': 2.7438865239425338e-05, 'learning_rate_init': 0.0008891270396260732, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.


  alpha = trial.suggest_loguniform("alpha", 1e-5, 1e-2)
  learning_rate_init = trial.suggest_loguniform("learning_rate_init", 1e-4, 5e-3)


[I 2025-11-20 18:58:50,126] Trial 99 finished with value: 0.8434997625002595 and parameters: {'hidden_layer_1': 320, 'hidden_layer_2': 80, 'alpha': 0.0003695408973446799, 'learning_rate_init': 0.004521425069641083, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}. Best is trial 14 with value: 0.8493686499044426.

Best Macro-F1: 0.8494
parameters: {'hidden_layer_1': 288, 'hidden_layer_2': 80, 'alpha': 3.813848083517318e-05, 'learning_rate_init': 0.0017053755428803074, 'batch_size': 128, 'activation': 'relu', 'solver': 'adam'}


In [7]:
best = study.best_params

mlp_best = MLPClassifier(
    hidden_layer_sizes=(best["hidden_layer_1"], best["hidden_layer_2"]),
    activation=best["activation"],
    solver=best["solver"],
    alpha=best["alpha"],
    batch_size=best["batch_size"],
    learning_rate="adaptive",
    learning_rate_init=best["learning_rate_init"],
    max_iter=300,
    early_stopping=True,
    validation_fraction=0.1,
    n_iter_no_change=10,
    random_state=42
)

print("\n[INFO] Training best MLP model with full training data...")
mlp_best.fit(X_train, y_train)


[INFO] Training best MLP model with full training data...


In [9]:
test_pred = mlp_best.predict(X_test)
test_pred_proba = mlp_best.predict_proba(X_test)[:, 1]   # positive class 확률

test_acc = accuracy_score(y_test, test_pred)
test_f1_macro = f1_score(y_test, test_pred, average='macro')
test_f1_micro = f1_score(y_test, test_pred, average='micro')
test_f1_weighted = f1_score(y_test, test_pred, average='weighted')

# ROC-AUC (binary)
test_roc_auc = roc_auc_score(y_test, test_pred_proba)

# PR-AUC
prec, rec, _ = precision_recall_curve(y_test, test_pred_proba)
test_pr_auc = auc(rec, prec)

print("\n========== [TEST RESULTS] ==========")
print(f"Accuracy        : {test_acc:.4f}")
print(f"F1 (macro)      : {test_f1_macro:.4f}")
print(f"F1 (micro)      : {test_f1_micro:.4f}")
print(f"F1 (weighted)   : {test_f1_weighted:.4f}")
print(f"ROC-AUC         : {test_roc_auc:.4f}")
print(f"PR-AUC          : {test_pr_auc:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, test_pred, digits=4))


Accuracy        : 0.8709
F1 (macro)      : 0.8538
F1 (micro)      : 0.8709
F1 (weighted)   : 0.8693
ROC-AUC         : 0.9358
PR-AUC          : 0.9625

Classification Report:
              precision    recall  f1-score   support

         0.0     0.8447    0.7667    0.8038      3000
         1.0     0.8829    0.9258    0.9038      5700

    accuracy                         0.8709      8700
   macro avg     0.8638    0.8462    0.8538      8700
weighted avg     0.8697    0.8709    0.8693      8700

