# Importing Packages and Inicial Data Preview

In [10]:
# Importing packages

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from pprint import pprint

from imblearn.under_sampling import RandomUnderSampler, TomekLinks
from imblearn.over_sampling import SMOTE
from imblearn.combine import SMOTEENN
from imblearn.pipeline import Pipeline
from collections import Counter

from sklearn.feature_selection import SelectKBest, chi2
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split, cross_val_predict
from sklearn.metrics import classification_report, confusion_matrix, make_scorer, accuracy_score, precision_score, recall_score, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.decomposition import PCA

from sklearn.model_selection import StratifiedKFold, cross_validate
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC

from sklearn.kernel_approximation import Nystroem
from sklearn.pipeline import make_pipeline

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from scikeras.wrappers import KerasClassifier

import umap.umap_ as umap
from functools import partial

import optuna
from optuna.visualization import plot_pareto_front
import plotly

In [4]:
import sklearn
import optuna
import scikeras
import tensorflow as tf
import imblearn

print("scikit-learn:", sklearn.__version__)
print("optuna:", optuna.__version__)
print("scikeras:", scikeras.__version__)
print("tensorflow:", tf.__version__)
print("imblearn:", imblearn.__version__)

scikit-learn: 1.6.1
optuna: 4.3.0
scikeras: 0.13.0
tensorflow: 2.19.0
imblearn: 0.13.0


In [5]:
# loading dataset and handle a subset of it
path_2 = "datasets/diabetes_binary_health_indicators_BRFSS2015.csv"

df = pd.read_csv(path_2)

# Models Implementation

In [6]:
# Step 1: Split features and target
X = df.drop(columns=['Diabetes_binary'])
y = df['Diabetes_binary']
print("Before balancing:", Counter(y))

# Step 2: Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# Step 3: Apply undersampling
undersample = RandomUnderSampler(sampling_strategy={0.0: 35000}, random_state=17)
X_bal, y_bal = undersample.fit_resample(X_train, y_train)
print("After undersampling:", Counter(y_bal))

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

pca = PCA(n_components=5) 

X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

Before balancing: Counter({0.0: 218334, 1.0: 35346})
After undersampling: Counter({0.0: 35000, 1.0: 28277})


## Testing Hyperparameter Tuning With Optuna Optimizations

### SVM Model

In [None]:
# Defining tuning optimization for SVM

def objective_svc(trial):
    # Suggest hyperparameters to tune
    C = trial.suggest_loguniform("C", 1e-3, 100)
    gamma = trial.suggest_loguniform("gamma", 1e-4, 1e-1)
    n_components = trial.suggest_int("n_components", 200, 1000)  # optional tuning for Nystroem dimension

    # Build pipeline with these hyperparameters
    pipeline = Pipeline([
        ('scaler', StandardScaler()),
        ('nystroem', Nystroem(kernel='rbf', gamma=gamma, n_components=n_components, random_state=42)),
        ('clf', LinearSVC(C=C, class_weight='balanced', random_state=42, max_iter=10000)),
    ])

    
    scoring = {
        "accuracy": make_scorer(accuracy_score),
        "precision": make_scorer(precision_score, average='macro'),
        "recall": make_scorer(recall_score, average='macro'),
    }

    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    # Cross-validate with multiple metrics (define 'scoring' dict before calling this)
    scores = cross_validate(pipeline, X_train, y_train, scoring=scoring, cv=cv, n_jobs=-1)
    
    # Return tuple of metrics for multi-objective optimization
    return scores["test_accuracy"].mean(), scores["test_precision"].mean(), scores["test_recall"].mean()


study_svc = optuna.create_study(directions=["maximize", "maximize", "maximize"], study_name="SVC")
study_svc.optimize(objective_svc, n_trials=30)

plot_pareto_front(study_svc, target_names=["Accuracy", "Precision", "Recall"]).show()

[I 2025-05-16 02:12:06,588] A new study created in memory with name: SVC
  C = trial.suggest_loguniform("C", 1e-3, 100)
  gamma = trial.suggest_loguniform("gamma", 1e-4, 1e-1)
[I 2025-05-16 02:15:40,987] Trial 0 finished with values: [0.7001882149107228, 0.6199978994232449, 0.7355297319278328] and parameters: {'C': 13.41805372370113, 'gamma': 0.005197743336103416, 'n_components': 852}.
  C = trial.suggest_loguniform("C", 1e-3, 100)
  gamma = trial.suggest_loguniform("gamma", 1e-4, 1e-1)
[I 2025-05-16 02:16:47,205] Trial 1 finished with values: [0.7100480791635179, 0.6208792922839985, 0.7333440615850118] and parameters: {'C': 0.10485807095156621, 'gamma': 0.004503234900695931, 'n_components': 652}.
  C = trial.suggest_loguniform("C", 1e-3, 100)
  gamma = trial.suggest_loguniform("gamma", 1e-4, 1e-1)
[I 2025-05-16 02:17:26,399] Trial 2 finished with values: [0.7351239670485005, 0.6227756650704975, 0.7238298103545742] and parameters: {'C': 0.003577803754690045, 'gamma': 0.0009837451954632

### Best SVM Model

In [5]:
# Best parameters for each model

best_trial_svm = max(study_svc.best_trials, key=lambda t: sum(t.values))
print("SVC Best Params:", best_trial_svm.params)
print("Accuracy:", best_trial_svm.values[0])
print("Precision:", best_trial_svm.values[1])
print("Recall:", best_trial_svm.values[2])

est_trial = study_svc.best_trials[0] 

best_params = best_trial_svm.params
print("Best params:", best_params)

# Build pipeline with the best parameters
best_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('nystroem', Nystroem(kernel='rbf',
                         gamma=best_params["gamma"],
                         n_components=best_params["n_components"],
                         random_state=42)),
    ('clf', LinearSVC(C=best_params["C"],
                      class_weight='balanced',
                      random_state=42,
                      max_iter=10000))
])

# Fit the pipeline on full training data
best_pipeline.fit(X_train, y_train)

# Predict on test set
y_pred = best_pipeline.predict(X_test)

# Print classification report
print("Classification Report for Best SVC Model:")
print(classification_report(y_test, y_pred))

SVC Best Params: {'C': 0.0022349107112217994, 'gamma': 0.0001809469736933914, 'n_components': 353}
Accuracy: 0.7400169452918518
Precision: 0.6237037325723807
Recall: 0.7227155576261659
Best params: {'C': 0.0022349107112217994, 'gamma': 0.0001809469736933914, 'n_components': 353}


Classification Report for Best SVC Model:
              precision    recall  f1-score   support

         0.0       0.94      0.75      0.83     43667
         1.0       0.31      0.69      0.43      7069

    accuracy                           0.74     50736
   macro avg       0.62      0.72      0.63     50736
weighted avg       0.85      0.74      0.78     50736



### SVM Model

In [None]:
# Defining tuning optimization for RF

def objective_rf(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 600),
        "max_depth": trial.suggest_int("max_depth", 5, 60),
        "min_samples_split": trial.suggest_int("min_samples_split", 2, 10),
        "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 10),
        "class_weight": "balanced",
        "n_jobs": -1
    }

    model = RandomForestClassifier(**params)

    scoring = {
        "accuracy": make_scorer(accuracy_score),
        "precision": make_scorer(precision_score, average='macro'),
        "recall": make_scorer(recall_score, average='macro'),
    }

    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    scores = cross_validate(model, X_train, y_train, scoring=scoring, cv=cv)
    return scores["test_accuracy"].mean(), scores["test_precision"].mean(), scores["test_recall"].mean()

study_rf = optuna.create_study(directions=["maximize", "maximize", "maximize"], study_name="RF")
study_rf.optimize(objective_rf, n_trials=30)

plot_pareto_front(study_rf, target_names=["Accuracy", "Precision", "Recall"]).show()

[I 2025-05-16 02:26:19,040] A new study created in memory with name: RF

suggest_int() got {'step'} as positional arguments but they were expected to be given as keyword arguments.


The distribution is specified by [100, 300] and step=600, but the range is not divisible by `step`. It will be replaced by [100, 100].


suggest_int() got {'step'} as positional arguments but they were expected to be given as keyword arguments.


The distribution is specified by [5, 20] and step=60, but the range is not divisible by `step`. It will be replaced by [5, 5].


suggest_int() got {'step'} as positional arguments but they were expected to be given as keyword arguments.


The distribution is specified by [1, 5] and step=10, but the range is not divisible by `step`. It will be replaced by [1, 1].

[I 2025-05-16 02:26:45,424] Trial 0 finished with values: [0.6764772480278651, 0.613951735543477, 0.7295056717235772] and parameters: {'n_estimators': 100, 'max_depth': 5, 'min_samples_split': 8, 'min_sam

### Best RF Model

In [8]:
best_trial_rf = max(study_rf.best_trials, key=lambda t: sum(t.values))
print("SVC Best Params:", best_trial_rf.params)
print("Accuracy:", best_trial_rf.values[0])
print("Precision:", best_trial_rf.values[1])
print("Recall:", best_trial_rf.values[2])

best_trial_rf = study_rf.best_trials[0]

best_params_rf = best_trial_rf.params
print("Best RF params:", best_params_rf)

# Building the RF model with the best hyperparameters
best_rf = RandomForestClassifier(
    n_estimators=best_params_rf["n_estimators"],
    max_depth=best_params_rf["max_depth"],
    min_samples_split=best_params_rf["min_samples_split"],
    min_samples_leaf=best_params_rf["min_samples_leaf"],
    class_weight="balanced",
    n_jobs=-1,
    random_state=42
)

best_rf.fit(X_train, y_train)
y_pred_rf = best_rf.predict(X_test)

# Print classification report
print("Classification Report for Best RF Model:")
print(classification_report(y_test, y_pred_rf))

SVC Best Params: {'n_estimators': 100, 'max_depth': 5, 'min_samples_split': 8, 'min_samples_leaf': 1}
Accuracy: 0.6767827717348881
Precision: 0.613960007738758
Recall: 0.7294311269164213
Best RF params: {'n_estimators': 100, 'max_depth': 5, 'min_samples_split': 8, 'min_samples_leaf': 1}
Classification Report for Best RF Model:
              precision    recall  f1-score   support

         0.0       0.95      0.65      0.77     43667
         1.0       0.27      0.80      0.41      7069

    accuracy                           0.67     50736
   macro avg       0.61      0.73      0.59     50736
weighted avg       0.86      0.67      0.72     50736



### NN Model

In [7]:
# Defining tuning optimization for NN

X_train_final, X_val, y_train_final, y_val = train_test_split(
    X_train, y_train, test_size=0.2, stratify=y_train, random_state=42
)

def create_model(input_dim, units1, dropout1, n_layers, units_i, dropout_i, lr):
    model = Sequential()
    model.add(Dense(units1, activation="relu", input_shape=(input_dim,)))
    model.add(Dropout(dropout1))
    
    for _ in range(n_layers):
        model.add(Dense(units_i, activation="relu"))
        model.add(Dropout(dropout_i))
    
    model.add(Dense(1, activation="sigmoid"))
    model.compile(optimizer=Adam(learning_rate=lr),
                  loss="binary_crossentropy",
                  metrics=["accuracy"])
    return model

def objective_nn(trial):
    units1 = trial.suggest_int("units1", 64, 256)
    dropout1 = trial.suggest_float("dropout1", 0.2, 0.5)
    n_layers = 2
    units_i = trial.suggest_int("units_i", 32, 128)
    dropout_i = trial.suggest_float("dropout_i", 0.2, 0.5)
    lr = trial.suggest_float("lr", 1e-4, 1e-2, log=True)
    batch_size = trial.suggest_categorical("batch_size", [32, 64, 128])

    model = create_model(X_train_final.shape[1], units1, dropout1, n_layers, units_i, dropout_i, lr)

    early_stop = EarlyStopping(monitor="val_loss", patience=3, restore_best_weights=True)

    model.fit(
        X_train_final, y_train_final,
        validation_data=(X_val, y_val),
        epochs=20,
        batch_size=batch_size,
        callbacks=[early_stop],
        verbose=0
    )

    # Predict on validation set
    y_pred_probs = model.predict(X_val)
    y_pred = (y_pred_probs > 0.5).astype(int)

    val_accuracy = accuracy_score(y_val, y_pred)
    val_precision = precision_score(y_val, y_pred, average='macro')
    val_recall = recall_score(y_val, y_pred, average='macro')

    return val_accuracy, val_precision, val_recall

study_nn = optuna.create_study(directions=["maximize", "maximize", "maximize"], study_name="NN")
study_nn.optimize(objective_nn, n_trials=30, timeout=1800)

plot_pareto_front(study_nn, target_names=["Accuracy", "Precision", "Recall"]).show()

[I 2025-05-16 12:05:09,716] A new study created in memory with name: NN
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 764us/step


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2025-05-16 12:05:27,763] Trial 0 finished with values: [0.8606765379782699, 0.43033826898913496, 0.5] and parameters: {'units1': 244, 'dropout1': 0.3629071781776245, 'units_i': 88, 'dropout_i': 0.4577229596298902, 'lr': 0.00520307117566572, 'batch_size': 128}.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 772us/step


[I 2025-05-16 12:07:32,128] Trial 1 finished with values: [0.8633866318460667, 0.7360560907223695, 0.5249914592007958] and parameters: {'units1': 164, 'dropout1': 0.20728777624609063, 'units_i': 74, 'dropout_i': 0.3936184502775568, 'lr': 0.000889674078257198, 'batch_size': 32}.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 789us/step


[I 2025-05-16 12:07:55,897] Trial 2 finished with values: [0.8627953386385474, 0.7323619821119429, 0.520498095258777] and parameters: {'units1': 256, 'dropout1': 0.438413813012857, 'units_i': 60, 'dropout_i': 0.35615670809820044, 'lr': 0.006367644676044167, 'batch_size': 128}.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 819us/step


[I 2025-05-16 12:08:37,501] Trial 3 finished with values: [0.8608243612801498, 0.7200179371266073, 0.5017161779922296] and parameters: {'units1': 208, 'dropout1': 0.482862916500018, 'units_i': 110, 'dropout_i': 0.4454742371795648, 'lr': 0.0008550772885345812, 'batch_size': 64}.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 940us/step


[I 2025-05-16 12:08:56,889] Trial 4 finished with values: [0.8629431619404272, 0.7068819466324646, 0.545409021139117] and parameters: {'units1': 201, 'dropout1': 0.2846930877974463, 'units_i': 115, 'dropout_i': 0.22929167454062677, 'lr': 0.008630730971426165, 'batch_size': 128}.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 768us/step


[I 2025-05-16 12:09:26,444] Trial 5 finished with values: [0.8625982409027076, 0.7287180651040871, 0.5197166519945632] and parameters: {'units1': 80, 'dropout1': 0.32966718147756036, 'units_i': 99, 'dropout_i': 0.32309618589268524, 'lr': 0.00018707740161058673, 'batch_size': 128}.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 798us/step


[I 2025-05-16 12:09:53,120] Trial 6 finished with values: [0.8607258124122299, 0.7220705900715512, 0.5005473577887963] and parameters: {'units1': 97, 'dropout1': 0.42330520812887473, 'units_i': 124, 'dropout_i': 0.2777272173881462, 'lr': 0.0028426078229259063, 'batch_size': 64}.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 832us/step


[I 2025-05-16 12:12:03,454] Trial 7 finished with values: [0.8621054965631082, 0.7513377983362319, 0.5106860520662508] and parameters: {'units1': 255, 'dropout1': 0.36468596956908605, 'units_i': 106, 'dropout_i': 0.4371972957648319, 'lr': 0.0006012142382166228, 'batch_size': 32}.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 905us/step


[I 2025-05-16 12:13:03,976] Trial 8 finished with values: [0.8628692502894872, 0.7371287032315461, 0.5197258824863984] and parameters: {'units1': 133, 'dropout1': 0.3679337863547258, 'units_i': 122, 'dropout_i': 0.23439252623706253, 'lr': 0.00017049940820299916, 'batch_size': 64}.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 745us/step


[I 2025-05-16 12:14:04,452] Trial 9 finished with values: [0.8615388405725689, 0.7469368931028382, 0.5067257332090722] and parameters: {'units1': 96, 'dropout1': 0.3781027825609714, 'units_i': 44, 'dropout_i': 0.45984548873049835, 'lr': 0.00019408562207146854, 'batch_size': 32}.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 805us/step


[I 2025-05-16 12:14:57,486] Trial 10 finished with values: [0.8632634457611669, 0.7388423100896939, 0.5228449661574787] and parameters: {'units1': 223, 'dropout1': 0.2777818691921731, 'units_i': 100, 'dropout_i': 0.23724623531095546, 'lr': 0.00031433178856724763, 'batch_size': 128}.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 790us/step


[I 2025-05-16 12:16:15,081] Trial 11 finished with values: [0.8633373574121067, 0.7347107250946678, 0.5250369384187243] and parameters: {'units1': 232, 'dropout1': 0.4055128795990776, 'units_i': 78, 'dropout_i': 0.40731618354103477, 'lr': 0.00016154593128617665, 'batch_size': 32}.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 787us/step


[I 2025-05-16 12:17:23,621] Trial 12 finished with values: [0.8628199758555274, 0.7569225251259559, 0.5152509795280498] and parameters: {'units1': 167, 'dropout1': 0.4257894782478894, 'units_i': 98, 'dropout_i': 0.27764688662593295, 'lr': 0.00010999393913363023, 'batch_size': 64}.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 810us/step


[I 2025-05-16 12:18:37,207] Trial 13 finished with values: [0.8627707014215674, 0.7433186696440269, 0.5172972836436748] and parameters: {'units1': 214, 'dropout1': 0.4375724801860726, 'units_i': 52, 'dropout_i': 0.2916719372454951, 'lr': 0.003755642576419195, 'batch_size': 32}.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 777us/step


[I 2025-05-16 12:20:18,116] Trial 14 finished with values: [0.8632141713272069, 0.7288005601768536, 0.5263733627899158] and parameters: {'units1': 219, 'dropout1': 0.35988604486033504, 'units_i': 90, 'dropout_i': 0.35980149497478087, 'lr': 0.00014365701017487233, 'batch_size': 32}.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 764us/step


[I 2025-05-16 12:20:50,534] Trial 15 finished with values: [0.8623518687329079, 0.7760067201750985, 0.5101622374732456] and parameters: {'units1': 86, 'dropout1': 0.343719727890171, 'units_i': 72, 'dropout_i': 0.31609477137387, 'lr': 0.0002279311582385667, 'batch_size': 128}.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 890us/step


[I 2025-05-16 12:22:25,991] Trial 16 finished with values: [0.8614156544876691, 0.7642575536844531, 0.5048756586691174] and parameters: {'units1': 166, 'dropout1': 0.48309999622173777, 'units_i': 44, 'dropout_i': 0.4008776032597685, 'lr': 0.00022262584679738534, 'batch_size': 32}.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 909us/step


[I 2025-05-16 12:23:08,276] Trial 17 finished with values: [0.8626721525536475, 0.742440208290146, 0.5166471958211258] and parameters: {'units1': 94, 'dropout1': 0.3081654393960935, 'units_i': 94, 'dropout_i': 0.4599837657263933, 'lr': 0.00015205673189982408, 'batch_size': 128}.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 774us/step


[I 2025-05-16 12:23:48,115] Trial 18 finished with values: [0.8625736036857277, 0.7520459393758991, 0.5141444923525615] and parameters: {'units1': 113, 'dropout1': 0.33467326150776733, 'units_i': 64, 'dropout_i': 0.45146966242109854, 'lr': 0.0005506945684652027, 'batch_size': 64}.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 775us/step


[I 2025-05-16 12:24:37,830] Trial 19 finished with values: [0.8621547709970682, 0.7475181226274974, 0.5115298283584095] and parameters: {'units1': 229, 'dropout1': 0.37405598949645685, 'units_i': 71, 'dropout_i': 0.4264837970610226, 'lr': 0.00012759819926187022, 'batch_size': 128}.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 829us/step


[I 2025-05-16 12:26:45,711] Trial 20 finished with values: [0.8621794082140481, 0.7455919179978535, 0.5119887688174092] and parameters: {'units1': 190, 'dropout1': 0.4612668719826898, 'units_i': 121, 'dropout_i': 0.2190261761978075, 'lr': 0.0018655282289966235, 'batch_size': 32}.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 744us/step


[I 2025-05-16 12:27:24,102] Trial 21 finished with values: [0.8621794082140481, 0.7532635712564801, 0.5110254086814814] and parameters: {'units1': 75, 'dropout1': 0.39213008965549534, 'units_i': 63, 'dropout_i': 0.2773343241313159, 'lr': 0.00023549246340175318, 'batch_size': 128}.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 745us/step


[I 2025-05-16 12:27:51,916] Trial 22 finished with values: [0.8620315849121684, 0.749984170954675, 0.5102725908251796] and parameters: {'units1': 208, 'dropout1': 0.4936304239982966, 'units_i': 39, 'dropout_i': 0.237223327528369, 'lr': 0.0008486294815421953, 'batch_size': 128}.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 798us/step


[I 2025-05-16 12:28:37,185] Trial 23 finished with values: [0.8631648968932469, 0.7280566896319899, 0.5261965281303225] and parameters: {'units1': 183, 'dropout1': 0.30150828972653143, 'units_i': 103, 'dropout_i': 0.4662194928686211, 'lr': 0.00025622337662954334, 'batch_size': 128}.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 753us/step


[I 2025-05-16 12:29:07,103] Trial 24 finished with values: [0.8608243612801498, 0.7486548627747138, 0.5011233409855047] and parameters: {'units1': 188, 'dropout1': 0.25973682190373315, 'units_i': 47, 'dropout_i': 0.4615209969878122, 'lr': 0.007282456326567421, 'batch_size': 64}.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 805us/step


[I 2025-05-16 12:29:53,030] Trial 25 finished with values: [0.8619083988272684, 0.7360235365852346, 0.5110902828154867] and parameters: {'units1': 155, 'dropout1': 0.2741106660714904, 'units_i': 113, 'dropout_i': 0.43697835988660005, 'lr': 0.0033736358805797847, 'batch_size': 32}.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 861us/step


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2025-05-16 12:30:20,321] Trial 26 finished with values: [0.8606765379782699, 0.43033826898913496, 0.5] and parameters: {'units1': 224, 'dropout1': 0.3638327354916796, 'units_i': 106, 'dropout_i': 0.4596354411885222, 'lr': 0.004247784907298227, 'batch_size': 128}.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2025-05-16 12:30:37,056] Trial 27 finished with values: [0.8606765379782699, 0.43033826898913496, 0.5] and parameters: {'units1': 151, 'dropout1': 0.4438133607563678, 'units_i': 75, 'dropout_i': 0.49630014534168765, 'lr': 0.005305204648616968, 'batch_size': 128}.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 768us/step


[I 2025-05-16 12:31:01,167] Trial 28 finished with values: [0.8619083988272684, 0.7566892759668911, 0.5087189347885873] and parameters: {'units1': 102, 'dropout1': 0.24821819224738612, 'units_i': 52, 'dropout_i': 0.391968817301507, 'lr': 0.0014483562879309174, 'batch_size': 128}.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 737us/step


[I 2025-05-16 12:32:19,993] Trial 29 finished with values: [0.8611200078839094, 0.7784799606215194, 0.502628976698108] and parameters: {'units1': 130, 'dropout1': 0.4344128077735063, 'units_i': 38, 'dropout_i': 0.361623081212049, 'lr': 0.0009789351958430152, 'batch_size': 32}.


### Best NN Model

In [None]:
# Retrieve the best trial based on the sum of multi-objective metrics
best_trial_nn = max(study_nn.best_trials, key=lambda t: sum(t.values))

print("Best NN Parameters:", best_trial_nn.params)
print("Accuracy:", best_trial_nn.values[0])
print("Precision:", best_trial_nn.values[1])
print("Recall:", best_trial_nn.values[2])

# Extract parameters
params = best_trial_nn.params
units1 = params["units1"]
dropout1 = params["dropout1"]
units_i = params["units_i"]
dropout_i = params["dropout_i"]
lr = params["lr"]
batch_size = params["batch_size"]
n_layers = 2  # fixed as per your tuning setup

# Create the best model
def create_model_for_best(input_dim, units1, dropout1, n_layers, units_i, dropout_i, learning_rate):
    model = Sequential()
    model.add(Dense(units1, activation="relu", input_shape=(input_dim,)))
    model.add(Dropout(dropout1))
    for _ in range(n_layers):
        model.add(Dense(units_i, activation="relu"))
        model.add(Dropout(dropout_i))
    model.add(Dense(1, activation="sigmoid"))
    model.compile(
        optimizer=Adam(learning_rate=learning_rate),
        loss="binary_crossentropy",
        metrics=["accuracy", "Precision", "Recall"]
    )
    return model

# Instantiate and train the best model
best_model = create_model_for_best(X_train.shape[1], units1, dropout1, n_layers, units_i, dropout_i, lr)

early_stop = EarlyStopping(monitor="val_loss", patience=3, restore_best_weights=True)

classes = np.unique(y_train)
weights = compute_class_weight(class_weight='balanced', classes=classes, y=y_train)
class_weights = dict(zip(classes, weights))

best_model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=20,
    batch_size=batch_size,
    class_weight=class_weights,
    callbacks=[early_stop],
    verbose=0
)

y_pred_nn = best_model.predict(X_test)
y_pred_nn = (y_pred_nn > 0.5).astype(int)

print("Classification Report for Best NN Model:")
print(classification_report(y_test, y_pred_nn))


Best NN Parameters: {'units1': 86, 'dropout1': 0.343719727890171, 'units_i': 72, 'dropout_i': 0.31609477137387, 'lr': 0.0002279311582385667, 'batch_size': 128}
Accuracy: 0.8623518687329079
Precision: 0.7760067201750985
Recall: 0.5101622374732456



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



[1m1586/1586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 736us/step
Classification Report for Best NN Model:
              precision    recall  f1-score   support

         0.0       0.95      0.65      0.78     43667
         1.0       0.28      0.81      0.41      7069

    accuracy                           0.68     50736
   macro avg       0.62      0.73      0.59     50736
weighted avg       0.86      0.68      0.73     50736



# Best Model Obtained

In [None]:
# Best model and the parameters tuned
print("The best model obtained (SVC) and it's params:", best_trial_svc.params)

cm = confusion_matrix(y_test, y_pred_model)

# Display the confusion matrix
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot(cmap="Blues")
plt.show()