In [1]:
import sys
import os

# Set path to root of the project (adjust if needed)
project_root = os.path.abspath("..")  # one level up
if project_root not in sys.path:
    sys.path.append(project_root)

In [9]:
# 1. Imports and Setup
import optuna
import joblib
import os
import numpy as np
import pandas as pd

from models.tabnet_model import build_tabnet_model
from training.tabnet_trainer import train_tabnet_model
from sklearn.metrics import f1_score, roc_auc_score
from pytorch_tabnet.tab_model import TabNetClassifier
from utils.style_utils import styled_print


# 2. Load the original non-SMOTE, scaled dataset
data_path = "../artifacts/tabnet/data_scaled_nosmote_for_tabnet.pkl"
X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test = joblib.load(data_path)

styled_print("🗂️ Loaded the original non-SMOTE, scaled dataset from: artifacts/tabnet/data_scaled_nosmote_for_tabnet.pkl")

#  3. Define Optuna Objective Function
def objective(trial):
    params = {
        "n_d": trial.suggest_categorical("n_d", [8, 16, 32, 64]),
        "n_a": trial.suggest_categorical("n_a", [8, 16, 32, 64]),
        "n_steps": trial.suggest_int("n_steps", 3, 7),
        "gamma": trial.suggest_float("gamma", 1.0, 2.0, step=0.5),
        "lambda_sparse": trial.suggest_float("lambda_sparse", 1e-5, 1e-2, log=True),
        "optimizer_params": dict(lr=trial.suggest_float("lr", 1e-4, 1e-2, log=True)),
        "scheduler_params": {"step_size": 10, "gamma": 0.95},
        "scheduler_fn": torch.optim.lr_scheduler.StepLR,
        "mask_type": "entmax",  # better for sparse data
        "device_name": "cuda" if torch.cuda.is_available() else "cpu",
    }

    model = TabNetClassifier(**params)
    model.fit(
        X_train=X_train_scaled.values,
        y_train=y_train.values,
        eval_set=[(X_val_scaled.values, y_val.values)],
        eval_metric=["auc", "f1_score"],
        patience=10,
        max_epochs=50,
        batch_size=256,
        virtual_batch_size=128
    )

    preds = model.predict(X_val_scaled.values)
    score = f1_score(y_val, preds)

    return score  # or ROC AUC


In [11]:
# 4. Run Tuning (the Optuna Study)
import torch

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=20)

study.optimize(objective, n_trials=20, show_progress_bar=True)


[I 2025-03-28 04:23:58,491] A new study created in memory with name: no-name-b8ec8690-c754-483f-8568-a73c771b7e6a


epoch 0  | loss: 0.02779 | val_0_auc: 0.86418 | val_0_f1_score: 0.62609 |  0:00:21s


[W 2025-03-28 04:24:26,928] Trial 0 failed with parameters: {'n_d': 8, 'n_a': 64, 'n_steps': 6, 'gamma': 2.0, 'lambda_sparse': 0.00021095907036417487, 'lr': 0.0031501942004146014} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.12/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/var/folders/mh/1vf7k6ld4wjfcwchwd55lt4m0000gn/T/ipykernel_16585/1649855047.py", line 37, in objective
    model.fit(
  File "/opt/anaconda3/lib/python3.12/site-packages/pytorch_tabnet/abstract_model.py", line 258, in fit
    self._train_epoch(train_dataloader)
  File "/opt/anaconda3/lib/python3.12/site-packages/pytorch_tabnet/abstract_model.py", line 489, in _train_epoch
    batch_logs = self._train_batch(X, y)
                 ^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.12/site-packages/pytorch_tabnet/abstract_model.py", line 534

KeyboardInterrupt: 