In [31]:
import pandas as pd
import numpy as np
import sys
import os

# Allows to get the module in utils
sys.path.append(os.path.abspath(".."))


from utils.load import load_data
from utils.preprocessing import preprocess
from utils.split import split_data

from sklearn.metrics import classification_report, confusion_matrix
# from sklearn.preprocessing import StandardScaler
# from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
# from sklearn.compose import ColumnTransformer

import tensorflow as tf

import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.utils import class_weight

import datetime

In [32]:
# Load data
df = load_data()

In [33]:
# Split and preprocess datas set
X_train, X_val, X_test, y_train, y_val, y_test = split_data(df)

X_train_processed, X_test_processed, X_val_processed, y_test_encoding, y_train_encoded, y_val_encoding, pipeline, le = preprocess(X_train, X_val, X_test, y_train, y_val, y_test)

In [34]:
X_train_dense = X_train_processed.toarray() if hasattr(X_train_processed, "toarray") else X_train_processed
X_val_dense = X_val_processed.toarray() if hasattr(X_val_processed, "toarray") else X_val_processed
X_test_dense = X_test_processed.toarray() if hasattr(X_test_processed, "toarray") else X_test_processed

In [35]:
y_train_vector = y_train_encoded.reshape(-1)
y_val_vector = y_val_encoding.reshape(-1)
y_test_vector = y_test_encoding.reshape(-1)

In [36]:
input_dim = X_train_dense.shape[1]

model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(input_dim,)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
    loss='binary_crossentropy',
    metrics=['accuracy', 'recall', 'auc']
)

model.summary()

### Exemple of checkpoints use

model.compile(loss=..., optimizer=...,
              metrics=['accuracy'])

EPOCHS = 10
checkpoint_filepath = '/tmp/ckpt/checkpoint.model.keras'
model_checkpoint_callback = keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

- Model is saved at the end of every epoch, if it's the best seen so far.
model.fit(epochs=EPOCHS, callbacks=[model_checkpoint_callback])

- The model (that are considered the best) can be loaded as -
keras.models.load_model(checkpoint_filepath)

- Alternatively, one could checkpoint just the model weights as -
checkpoint_filepath = '/tmp/ckpt/checkpoint.weights.h5'
model_checkpoint_callback = keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

- Model weights are saved at the end of every epoch, if it's the best seen so far.
model.fit(epochs=EPOCHS, callbacks=[model_checkpoint_callback])

- The model weights (that are considered the best) can be loaded as -
model.load_weights(checkpoint_filepath)


In [37]:
checkpoint_path = "checkpoints/churn_models.keras"
os.makedirs(os.path.dirname(checkpoint_path), exist_ok=True)
model_ckpt = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path,
    monitor='val_accuracy',
    mode='max',  
    save_best_only=True,# sauvegarde le modèle qui maximise l’accuracy de validation
    verbose=1
)

In [38]:
history = model.fit(
    X_train_dense, y_train_vector,
    validation_data=(X_val_dense, y_val_vector),
    epochs=20,
    batch_size=16,
    callbacks=[model_ckpt],
    verbose=1
)

Epoch 1/20
[1m278/282[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.7692 - auc: 0.8058 - loss: 0.4662 - recall: 0.5035
Epoch 1: val_accuracy improved from -inf to 0.80302, saving model to checkpoints/churn_models.keras
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.7694 - auc: 0.8060 - loss: 0.4658 - recall: 0.5034 - val_accuracy: 0.8030 - val_auc: 0.8398 - val_loss: 0.4336 - val_recall: 0.4649
Epoch 2/20
[1m273/282[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.7910 - auc: 0.8262 - loss: 0.4340 - recall: 0.4523
Epoch 2: val_accuracy did not improve from 0.80302
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.7911 - auc: 0.8265 - loss: 0.4338 - recall: 0.4535 - val_accuracy: 0.7959 - val_auc: 0.8437 - val_loss: 0.4315 - val_recall: 0.4047
Epoch 3/20
[1m279/282[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.8048 

In [39]:
y_pred_probs = model.predict(X_test_dense)
y_pred = np.argmax(y_pred_probs, axis=1)
print("\nClassification Report :")
print(classification_report(y_test_vector, y_pred))

[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Classification Report :
              precision    recall  f1-score   support

           0       0.73      1.00      0.85      1035
           1       0.00      0.00      0.00       374

    accuracy                           0.73      1409
   macro avg       0.37      0.50      0.42      1409
weighted avg       0.54      0.73      0.62      1409



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [40]:
print("Matrice de Confusion :")
cm = confusion_matrix(y_test_vector, y_pred)
print(cm)

Matrice de Confusion :
[[1035    0]
 [ 374    0]]


In [41]:
y_pred_probs = model.predict(X_test_dense)

probs = y_pred_probs.ravel()

y_pred = (probs >= 0.5)
print("\nClassification Report :")
print(classification_report(y_test_vector, y_pred))
y_test_vector.shape, y_pred_probs.shape

[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 680us/step

Classification Report :
              precision    recall  f1-score   support

           0       0.83      0.89      0.86      1035
           1       0.63      0.51      0.56       374

    accuracy                           0.79      1409
   macro avg       0.73      0.70      0.71      1409
weighted avg       0.78      0.79      0.78      1409



((1409,), (1409, 1))

In [42]:
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [43]:
early_stop = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',    # surveille la perte de validation
    patience=3,            # tolère 3 époques sans amélioration
    restore_best_weights=True
)

In [44]:
log_dir = os.path.join(
    "logs", "fit", datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
)
tensorboard_cb = tf.keras.callbacks.TensorBoard(
    log_dir=log_dir,
    histogram_freq=1,       # enregistre les histogrammes de poids chaque époque
    write_graph=True,       # sauvegarde le graph du modèle
    write_images=True
)

In [45]:
history = model.fit(
    X_train_dense, y_train_vector,
    validation_split=0.2,
    epochs=20,
    batch_size=16,
    verbose=1,
    callbacks=[early_stop, model_ckpt, tensorboard_cb]
)

Epoch 1/20
[1m220/226[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.8321 - auc: 0.8932 - loss: 0.3486 - recall: 0.5682
Epoch 1: val_accuracy improved from 0.81366 to 0.81486, saving model to checkpoints/churn_models.keras
[1m226/226[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.8321 - auc: 0.8932 - loss: 0.3486 - recall: 0.5686 - val_accuracy: 0.8149 - val_auc: 0.8888 - val_loss: 0.3669 - val_recall: 0.6064
Epoch 2/20
[1m220/226[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.8340 - auc: 0.8982 - loss: 0.3408 - recall: 0.5724
Epoch 2: val_accuracy improved from 0.81486 to 0.82373, saving model to checkpoints/churn_models.keras
[1m226/226[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8338 - auc: 0.8981 - loss: 0.3410 - recall: 0.5726 - val_accuracy: 0.8237 - val_auc: 0.8899 - val_loss: 0.3619 - val_recall: 0.6024
Epoch 3/20
[1m223/226[0m [32m━━━━━━━━━━━━━━━━━━

In [46]:
history = model.fit(
    X_train_dense, y_train_vector,
    validation_data=(X_val_dense, y_val_vector),
    epochs=20,
    batch_size=16,
    callbacks=[model_ckpt,tensorboard_cb],
    verbose=1
)

Epoch 1/20
[1m279/282[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 6ms/step - accuracy: 0.8337 - auc: 0.9002 - loss: 0.3409 - recall: 0.6008
Epoch 1: val_accuracy did not improve from 0.82373
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.8338 - auc: 0.9002 - loss: 0.3410 - recall: 0.6008 - val_accuracy: 0.8004 - val_auc: 0.8188 - val_loss: 0.5066 - val_recall: 0.4582
Epoch 2/20
[1m276/282[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 6ms/step - accuracy: 0.8480 - auc: 0.9066 - loss: 0.3308 - recall: 0.6110
Epoch 2: val_accuracy did not improve from 0.82373
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.8477 - auc: 0.9064 - loss: 0.3311 - recall: 0.6106 - val_accuracy: 0.7879 - val_auc: 0.8128 - val_loss: 0.5025 - val_recall: 0.4649
Epoch 3/20
[1m274/282[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.8442 - auc: 0.9079 - loss: 0.3273 - recall: 0.6117
Epo

In [47]:
%tensorboard --logdir logs/fit
# If it does not appear in the notebook, 

Reusing TensorBoard on port 6006 (pid 105293), started 18:31:04 ago. (Use '!kill 105293' to kill it.)

In [48]:
print(y_train_vector)

[0 0 0 ... 0 0 0]


In [49]:
x_train=0
for i in y_train_vector:
    if i == 1:
        x_train+=1
x_val=0
for i in y_val_vector:
    if i == 1:
        x_val+=1

x_test=0
for i in y_test_vector:
    if i == 1:
        x_test+=1

print(x_train, x_val, x_test)


1196 299 374


In [50]:
y_val_vector.shape

(1127,)