In [6]:
import tensorflow as tf
import pandas as pd
import numpy as np
import time
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error


X_train = pd.read_csv('../data/processed/X_train.csv')
X_test = pd.read_csv('../data/processed/X_test.csv')
y_train_cat = pd.read_csv('../data/processed/y_train_cat.csv')
y_test_cat = pd.read_csv('../data/processed/y_test_cat.csv')

In [7]:
from sklearn.metrics import roc_auc_score
n_col = X_train.shape[1]
model_classifier = tf.keras.Sequential([
    tf.keras.Input(shape=(n_col,)),
    tf.keras.layers.Dense(64, activation='relu',
                              kernel_regularizer=tf.keras.regularizers.l2(0.01)),
        tf.keras.layers.Dropout(0.2),

        tf.keras.layers.Dense(32, activation='relu',
                              kernel_regularizer=tf.keras.regularizers.l2(0.01)),
        tf.keras.layers.Dropout(0.2),

        tf.keras.layers.Dense(1, activation='sigmoid')
    ])

model_classifier.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
history = model_classifier.fit(
        X_train, y_train_cat,
        validation_split=0.2,
        epochs=15,
        batch_size=64,
        verbose=0
    )

y_pred_prob = model_classifier.predict(X_test)
loss, accuracy = model_classifier.evaluate(X_test, y_test_cat, verbose=0)
auc = roc_auc_score(y_test_cat, y_pred_prob)

print(f"   Accuracy : {accuracy:.2%}")
print(f"   AUC      : {auc:.4f}")
print(f"   Loss     : {loss:.4f}")

[1m766/766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
   Accuracy : 74.46%
   AUC      : 0.8414
   Loss     : 0.5238


In [8]:
import optuna


def objective_classification(trial):
    activation_chosen = trial.suggest_categorical('activation', ['relu', 'elu', 'swish'])
    units_1 = trial.suggest_int('units_1', 16, 100)
    units_2 = trial.suggest_int('units_2', 16, 100)
    dropout_rate_1 = trial.suggest_float('dropout_rate_1', 0.0, 0.5)
    dropout_rate_2 = trial.suggest_float('dropout_rate_2', 0.1, 0.5)
    l2_reg = trial.suggest_float('l2_reg', 0.00001, 0.01, log=True)
    learning_rate = trial.suggest_float('learning_rate', 0.0001, 0.01, log=True)

    n_col = X_train.shape[1]
    model_classifier_opti = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(n_col,)),
        tf.keras.layers.Dense(units_1, activation=activation_chosen, kernel_regularizer=tf.keras.regularizers.l2(l2_reg)),
        tf.keras.layers.Dropout(dropout_rate_1),

        tf.keras.layers.Dense(units_2, activation=activation_chosen, kernel_regularizer=tf.keras.regularizers.l2(l2_reg)),
        tf.keras.layers.Dropout(dropout_rate_2),

        tf.keras.layers.Dense(1, activation='sigmoid')
    ])

    model_classifier_opti.compile(
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate),
        loss = 'binary_crossentropy',
        metrics=['accuracy'],
    )

    history = model_classifier_opti.fit(
        X_train, y_train_cat,
        validation_split=0.2,
        epochs=15,
        batch_size=64,
        verbose=0
    )
    return history.history['val_loss'][-1]

storage_name = "sqlite:///../db.sqlite3"
study = optuna.create_study(
    study_name="optimization_winner",
    storage=storage_name,
    direction='minimize',
    load_if_exists=True
)
study.optimize(objective_classification, n_trials=30)
print(f"Best loss :{study.best_value}")
print(f"Meilleur hyperparametres: {study.best_params}")

  from .autonotebook import tqdm as notebook_tqdm
[I 2025-12-17 16:41:33,625] A new study created in RDB with name: optimization_winner
[I 2025-12-17 16:42:26,447] Trial 0 finished with value: 0.4718772768974304 and parameters: {'activation': 'relu', 'units_1': 17, 'units_2': 17, 'dropout_rate_1': 0.4325303059921805, 'dropout_rate_2': 0.15131994276965172, 'l2_reg': 0.0003356917114802452, 'learning_rate': 0.0017442382553700883}. Best is trial 0 with value: 0.4718772768974304.
[I 2025-12-17 16:43:22,378] Trial 1 finished with value: 0.5435472726821899 and parameters: {'activation': 'elu', 'units_1': 44, 'units_2': 91, 'dropout_rate_1': 0.3697713432270313, 'dropout_rate_2': 0.38292973978477296, 'l2_reg': 0.007251826427343721, 'learning_rate': 0.00810328606972994}. Best is trial 0 with value: 0.4718772768974304.
[I 2025-12-17 16:44:26,881] Trial 2 finished with value: 0.4904896020889282 and parameters: {'activation': 'swish', 'units_1': 100, 'units_2': 64, 'dropout_rate_1': 0.3323072050344

Best loss :0.4452647864818573
Meilleur hyperparametres: {'activation': 'relu', 'units_1': 70, 'units_2': 16, 'dropout_rate_1': 0.20919892048363387, 'dropout_rate_2': 0.44724275744610087, 'l2_reg': 2.0239547386813323e-05, 'learning_rate': 0.0009850649524323052}


In [9]:
from sklearn.metrics import roc_auc_score

best = study.best_params

model_final = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train.shape[1],)),

    tf.keras.layers.Dense(best['units_1'], activation=best['activation'],
                          kernel_regularizer=tf.keras.regularizers.l2(best['l2_reg'])),
    tf.keras.layers.Dropout(best['dropout_rate_1']),

    tf.keras.layers.Dense(best['units_2'], activation=best['activation'],
                          kernel_regularizer=tf.keras.regularizers.l2(best['l2_reg'])),
    tf.keras.layers.Dropout(best['dropout_rate_2']),

    tf.keras.layers.Dense(1, activation='sigmoid')
])

model_final.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=best['learning_rate']),
    loss='binary_crossentropy',
    metrics=['accuracy', tf.keras.metrics.AUC(name='auc')]
)


history_final = model_final.fit(
    X_train, y_train_cat,
    validation_split=0.2,
    epochs=60,
    batch_size=32,
    verbose=1
)

y_pred_prob = model_final.predict(X_test)
y_pred_class = (y_pred_prob > 0.5).astype(int)
loss, accuracy, auc_keras = model_final.evaluate(X_test, y_test_cat, verbose=0)
auc_score = roc_auc_score(y_test_cat, y_pred_prob)

print(f"Accuracy : {accuracy:.2%}")
print(f"AUC ROC  : {auc_score:.4f}")
print(f"Loss     : {loss:.4f}")

Epoch 1/60
[1m2449/2449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.7281 - auc: 0.8213 - loss: 0.5103 - val_accuracy: 0.7493 - val_auc: 0.8541 - val_loss: 0.4617
Epoch 2/60
[1m2449/2449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.7444 - auc: 0.8433 - loss: 0.4783 - val_accuracy: 0.7547 - val_auc: 0.8575 - val_loss: 0.4575
Epoch 3/60
[1m2449/2449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - accuracy: 0.7509 - auc: 0.8497 - loss: 0.4711 - val_accuracy: 0.7515 - val_auc: 0.8574 - val_loss: 0.4554
Epoch 4/60
[1m2449/2449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - accuracy: 0.7510 - auc: 0.8512 - loss: 0.4684 - val_accuracy: 0.7550 - val_auc: 0.8605 - val_loss: 0.4506
Epoch 5/60
[1m2449/2449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - accuracy: 0.7526 - auc: 0.8530 - loss: 0.4658 - val_accuracy: 0.7593 - val_auc: 0.8608 - val_loss: 0.4520
Epoch 6/60
[1m244