In [1]:
import pandas as pd
import numpy as np
df = pd.read_csv("../../data/WA_Fn-UseC_-Telco-Customer-Churn.csv")
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [2]:
df.columns

Index(['customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents',
       'tenure', 'PhoneService', 'MultipleLines', 'InternetService',
       'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport',
       'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling',
       'PaymentMethod', 'MonthlyCharges', 'TotalCharges', 'Churn'],
      dtype='object')

In [3]:
df2 = df.copy()

In [4]:
df2["gender"]= df["gender"].apply(lambda x: 0 if x ==  'Male' else 1)
for col in ["Partner", "PhoneService", 'PaperlessBilling', "Dependents", 'Churn']:
    df2[col]= df[col].apply(lambda x: 0 if x ==  'No' else 1)

In [5]:
col = 'TotalCharges'
df[col].unique()

array(['29.85', '1889.5', '108.15', ..., '346.45', '306.6', '6844.5'],
      dtype=object)

In [6]:
feature_of_interest = [
    'gender', 'SeniorCitizen', 'Partner', 'Dependents',
    'tenure', 'PhoneService', 'MultipleLines', 'InternetService',
    'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport',
    'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling',
    'PaymentMethod', 'MonthlyCharges', 'TotalCharges'
]

target = 'Churn'

categorical_features = [
    "DeviceProtection",
    "MultipleLines",	
    "InternetService",	
    "OnlineSecurity",
    "OnlineBackup",
    'TechSupport',
    'StreamingTV',
    'StreamingMovies',
    'Contract',
    'PaymentMethod'
]

numerical_features = [
    'gender', 'SeniorCitizen', 'Partner', 'Dependents',
    'tenure', 'PhoneService', 'PaperlessBilling',
    'MonthlyCharges', 'TotalCharges'
]

In [7]:
for col in numerical_features:
    try:
        df2[col] = pd.to_numeric(df2[col], errors="coerce")
    except:
        print(col)

In [8]:
df2.set_index("customerID", inplace=True)

In [9]:
X = df2.drop(columns=[target])
y = df2[target]
X.shape, y.shape

((7043, 19), (7043,))

In [10]:
from sklearn.model_selection import train_test_split

# Séparation train/val/test (80/20 puis 20% de train pour val)
X_train_0, X_test, y_train_0, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# On prend 20% de X_train pour validation
X_train, X_val, y_train, y_val = train_test_split(
    X_train_0, y_train_0, test_size=0.2, random_state=42, stratify=y_train_0
)
X_train.shape, X_test.shape, X_val.shape, X_test.shape, y_val.shape, y_test.shape

((4507, 19), (1409, 19), (1127, 19), (1409, 19), (1127,), (1409,))

In [11]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ],
    remainder='passthrough'
)

In [12]:
X_train = preprocessor.fit_transform(X_train)
X_val  = preprocessor.transform(X_val)
X_test  = preprocessor.transform(X_test)
X_train[:5], X_val[:5], X_test[:5]

(array([[-0.99226422, -0.43927126, -0.96618059, -0.65427327, -0.2553918 ,
          0.3387717 , -1.21450143, -0.17476006, -0.34239952,  1.        ,
          0.        ,  0.        ,  1.        ,  0.        ,  0.        ,
          1.        ,  0.        ,  0.        ,  1.        ,  0.        ,
          0.        ,  1.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  1.        ,  0.        ,  0.        ,  1.        ,
          1.        ,  0.        ,  0.        ,  0.        ,  1.        ,
          0.        ,  0.        ,  1.        ,  0.        ,  0.        ],
        [-0.99226422,  2.27649768,  1.0350032 , -0.65427327,  1.61043169,
          0.3387717 ,  0.82338314,  1.71099346,  2.68697743,  0.        ,
          0.        ,  1.        ,  0.        ,  0.        ,  1.        ,
          0.        ,  1.        ,  0.        ,  0.        ,  0.        ,
          1.        ,  0.        ,  0.        ,  1.        ,  0.        ,
          0.        ,  1.        ,  0

In [13]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = 'GPU:0'

import tensorflow as tf
print("GPUs disponibles :", tf.config.list_physical_devices('GPU'))

2025-05-19 16:19:49.888732: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


GPUs disponibles : []


In [19]:
def build_model():
    # Réseau avec 2 couches cachées de 64 neurones chacune
    # et une couche de sortie avec activation softmax pour classification
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(X_train.shape[1],)),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])

    # Définition de la fonction de perte, de l'optimiseur et des métriques
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )

    model.summary()

    return model

In [20]:
model = build_model()

In [21]:
X_train

array([[-0.99226422, -0.43927126, -0.96618059, ...,  1.        ,
         0.        ,  0.        ],
       [-0.99226422,  2.27649768,  1.0350032 , ...,  0.        ,
         0.        ,  0.        ],
       [-0.99226422, -0.43927126,  1.0350032 , ...,  1.        ,
         0.        ,  0.        ],
       ...,
       [ 1.00779609, -0.43927126, -0.96618059, ...,  1.        ,
         0.        ,  0.        ],
       [ 1.00779609, -0.43927126, -0.96618059, ...,  0.        ,
         0.        ,  1.        ],
       [-0.99226422, -0.43927126,  1.0350032 , ...,  0.        ,
         1.        ,  0.        ]])

In [22]:
X_train

array([[-0.99226422, -0.43927126, -0.96618059, ...,  1.        ,
         0.        ,  0.        ],
       [-0.99226422,  2.27649768,  1.0350032 , ...,  0.        ,
         0.        ,  0.        ],
       [-0.99226422, -0.43927126,  1.0350032 , ...,  1.        ,
         0.        ,  0.        ],
       ...,
       [ 1.00779609, -0.43927126, -0.96618059, ...,  1.        ,
         0.        ,  0.        ],
       [ 1.00779609, -0.43927126, -0.96618059, ...,  0.        ,
         0.        ,  1.        ],
       [-0.99226422, -0.43927126,  1.0350032 , ...,  0.        ,
         1.        ,  0.        ]])

In [24]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=100,
    batch_size=16,
    verbose=1
)

Epoch 1/100


[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9065 - loss: 0.2175 - val_accuracy: 0.7498 - val_loss: 0.6619
Epoch 2/100
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9227 - loss: 0.2036 - val_accuracy: 0.7365 - val_loss: 0.7029
Epoch 3/100
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9167 - loss: 0.1999 - val_accuracy: 0.7453 - val_loss: 0.6990
Epoch 4/100
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9036 - loss: 0.2120 - val_accuracy: 0.7480 - val_loss: 0.6803
Epoch 5/100
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9121 - loss: 0.2068 - val_accuracy: 0.7551 - val_loss: 0.7073
Epoch 6/100
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9114 - loss: 0.2048 - val_accuracy: 0.7587 - val_loss: 0.6973
Epoch 7/100
[1m282/282[0m [32m━