In [47]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score, roc_curve, roc_auc_score
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l1, l2
from tensorflow.keras import backend as K

# Loading the dataset

In [48]:
df = pd.read_csv('/home/administrator/Documents/PROGRAMMING/ML/Datasets/Telco_Customer_Churn.csv')
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [49]:
df.shape

(7043, 21)

In [50]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 21 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   customerID        7043 non-null   object 
 1   gender            7043 non-null   object 
 2   SeniorCitizen     7043 non-null   int64  
 3   Partner           7043 non-null   object 
 4   Dependents        7043 non-null   object 
 5   tenure            7043 non-null   int64  
 6   PhoneService      7043 non-null   object 
 7   MultipleLines     7043 non-null   object 
 8   InternetService   7043 non-null   object 
 9   OnlineSecurity    7043 non-null   object 
 10  OnlineBackup      7043 non-null   object 
 11  DeviceProtection  7043 non-null   object 
 12  TechSupport       7043 non-null   object 
 13  StreamingTV       7043 non-null   object 
 14  StreamingMovies   7043 non-null   object 
 15  Contract          7043 non-null   object 
 16  PaperlessBilling  7043 non-null   object 


# Handling Missing Values

In [51]:
df.isnull().sum()

customerID          0
gender              0
SeniorCitizen       0
Partner             0
Dependents          0
tenure              0
PhoneService        0
MultipleLines       0
InternetService     0
OnlineSecurity      0
OnlineBackup        0
DeviceProtection    0
TechSupport         0
StreamingTV         0
StreamingMovies     0
Contract            0
PaperlessBilling    0
PaymentMethod       0
MonthlyCharges      0
TotalCharges        0
Churn               0
dtype: int64

In [52]:
df = df.drop(columns=['customerID'],axis=1)

In [53]:
df['TotalCharges'] = df['TotalCharges'].replace(' ', '0')

# Label Encoding Categorical Variables

In [54]:
le = LabelEncoder()
cat_columns = ['gender', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport','StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling' ,'PaymentMethod', 'Churn']
for col in cat_columns:
    df[col] = le.fit_transform(df[col])

In [55]:
df.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,0,0,1,0,1,0,1,0,0,2,0,0,0,0,0,1,2,29.85,29.85,0
1,1,0,0,0,34,1,0,0,2,0,2,0,0,0,1,0,3,56.95,1889.5,0
2,1,0,0,0,2,1,0,0,2,2,0,0,0,0,0,1,3,53.85,108.15,1
3,1,0,0,0,45,0,1,0,2,0,2,2,0,0,1,0,0,42.3,1840.75,0
4,0,0,0,0,2,1,0,1,0,0,0,0,0,0,0,1,2,70.7,151.65,1


In [56]:
df['TotalCharges']= df['TotalCharges'].astype(float)

# Split the dataset into training and testing set

In [57]:
X = df.drop(columns=['Churn'])
Y = df['Churn']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Standardize the data

In [58]:
# Standardize numerical features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 2: Build Baseline DNN Model

In [59]:
baseline_model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])
baseline_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [60]:
# Train the baseline model
baseline_history = baseline_model.fit(X_train_scaled, Y_train, epochs=20, batch_size=32, validation_data=(X_test_scaled, Y_test), verbose=0)

In [61]:
# Evaluate the performance of the baseline model
baseline_loss, baseline_accuracy = baseline_model.evaluate(X_test_scaled, Y_test)
print("Baseline Model - Loss: {:.4f}, Accuracy: {:.4f}".format(baseline_loss, baseline_accuracy))

Baseline Model - Loss: 0.4120, Accuracy: 0.8183


# Step 3: Implement Dropout

In [62]:
dropout_model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dropout(0.5),
    Dense(32, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

In [63]:
dropout_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [66]:
# Train the model with dropout
dropout_history = dropout_model.fit(X_train_scaled, Y_train, epochs=20, batch_size=32, validation_data=(X_test_scaled, Y_test), verbose=0)

In [67]:
# Evaluate the performance of the model with dropout
dropout_loss, dropout_accuracy = dropout_model.evaluate(X_test_scaled, Y_test)
print("Model with Dropout - Loss: {:.4f}, Accuracy: {:.4f}".format(dropout_loss, dropout_accuracy))

Model with Dropout - Loss: 0.4099, Accuracy: 0.8112


# Step 4: Implement Layer-wise Dropout

In [68]:
layerwise_dropout_model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])
layerwise_dropout_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Train the model with layer-wise dropout
layerwise_dropout_history = layerwise_dropout_model.fit(X_train_scaled, Y_train, epochs=20, batch_size=32, validation_data=(X_test_scaled, Y_test), verbose=0)

In [None]:
# Evaluate the performance of the model with layer-wise dropout
layerwise_dropout_loss, layerwise_dropout_accuracy = layerwise_dropout_model.evaluate(X_test_scaled, Y_test)
print("Model with Layer-wise Dropout - Loss: {:.4f}, Accuracy: {:.4f}".format(layerwise_dropout_loss, layerwise_dropout_accuracy))

# Step 5: Implement Monte Carlo Dropout

In [None]:
class MCDropout(tf.keras.layers.Dropout):
    def call(self, inputs, training=None):
        if training is None:
            training = K.learning_phase()
        noise_shape = self._get_noise_shape(inputs)
        def dropped_inputs():
            return K.dropout(inputs, self.rate, noise_shape,
                             seed=self.seed)
        return K.in_train_phase(dropped_inputs, inputs,
                                training=training)

In [None]:
mc_dropout_model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    MCDropout(0.5),
    Dense(32, activation='relu'),
    MCDropout(0.5),
    Dense(1, activation='sigmoid')
])
mc_dropout_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Train the model with Monte Carlo dropout
mc_dropout_history = mc_dropout_model.fit(X_train_scaled, Y_train, epochs=20, batch_size=32, validation_data=(X_test_scaled, Y_test), verbose=0)

In [None]:
# Evaluate the performance of the model with Monte Carlo dropout
mc_dropout_loss, mc_dropout_accuracy = mc_dropout_model.evaluate(X_test_scaled, Y_test)
print("Model with Monte Carlo Dropout - Loss: {:.4f}, Accuracy: {:.4f}".format(mc_dropout_loss, mc_dropout_accuracy))

# Step 6: Comparison and Visualization

In [None]:
# Compare the performance of the models
print("\nPerformance Comparison:")
print("Baseline Model - Accuracy: {:.4f}".format(baseline_accuracy))
print("Model with Dropout - Accuracy: {:.4f}".format(dropout_accuracy))
print("Model with Layer-wise Dropout - Accuracy: {:.4f}".format(layerwise_dropout_accuracy))
print("Model with Monte Carlo Dropout - Accuracy: {:.4f}".format(mc_dropout_accuracy))

In [None]:
# Visualize the training and validation accuracy/loss curves
plt.figure(figsize=(12, 6))
plt.plot(baseline_history.history['accuracy'], label='Baseline Training Accuracy')
plt.plot(baseline_history.history['val_accuracy'], label='Baseline Validation Accuracy')
plt.plot(dropout_history.history['accuracy'], label='Model with Dropout Training Accuracy')
plt.plot(dropout_history.history['val_accuracy'], label='Model with Dropout Validation Accuracy')
plt.plot(layerwise_dropout_history.history['accuracy'], label='Model with Layer-wise Dropout Training Accuracy')
plt.plot(layerwise_dropout_history.history['val_accuracy'], label='Model with Layer-wise Dropout Validation Accuracy')
plt.plot(mc_dropout_history.history['accuracy'], label='Model with Monte Carlo Dropout Training Accuracy')
plt.plot(mc_dropout_history.history['val_accuracy'], label='Model with Monte Carlo Dropout Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()
plt.grid(True)
plt.show()