In [1]:
import tensorflow as tf  




In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder
import pickle
from sklearn.preprocessing import OneHotEncoder



In [3]:
## Load the dataset
data=pd.read_csv("Churn_Modelling.csv")
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
data.shape

(10000, 14)

In [5]:
# Drop irrelevant features
# 'RowNumber', 'CustomerId', and 'Surname' are not useful for prediction
data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1, inplace=True)
# Axis = 1 means we are dropping columns, not rows 

In [6]:
data.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [7]:
## Encode categorical variables
label_enoder_gender = LabelEncoder()
data['Gender']= label_enoder_gender.fit_transform(data['Gender'])


In [8]:
data.head()


Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,0,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,0,41,1,83807.86,1,0,1,112542.58,0
2,502,France,0,42,8,159660.8,3,1,0,113931.57,1
3,699,France,0,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,0,43,2,125510.82,1,1,1,79084.1,0


In [9]:
# Encode Geography to OHE
onehot_encoder_geo=OneHotEncoder()
geo_enocder=onehot_encoder_geo.fit_transform(data[['Geography']])
geo_enocder


<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 10000 stored elements and shape (10000, 3)>

In [10]:
geo_enocder.toarray()

array([[1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       ...,
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.]])

In [11]:
onehot_encoder_geo.get_feature_names_out(['Geography'])

array(['Geography_France', 'Geography_Germany', 'Geography_Spain'],
      dtype=object)

In [12]:
# Get in dataframe format
geo_enocded_df=pd.DataFrame(geo_enocder.toarray(),columns=onehot_encoder_geo.get_feature_names_out(['Geography']))

In [13]:
geo_enocded_df

Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain
0,1.0,0.0,0.0
1,0.0,0.0,1.0
2,1.0,0.0,0.0
3,1.0,0.0,0.0
4,0.0,0.0,1.0
...,...,...,...
9995,1.0,0.0,0.0
9996,1.0,0.0,0.0
9997,1.0,0.0,0.0
9998,0.0,1.0,0.0


In [14]:
# Add coloumns to the original dataframe
data = pd.concat([data, geo_enocded_df], axis=1)

In [15]:
data.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,France,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,Spain,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,France,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,France,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,Spain,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [16]:
data.drop(['Geography'], axis=1, inplace=True)

In [17]:
data.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [18]:
## Save the encoders and scalers
with open('label_encoder_gender.pkl', 'wb') as f:
    pickle.dump(label_enoder_gender, f)

In [19]:
## Save the encoders and scalers
with open('onehot_encoder.pkl', 'wb') as f:
    pickle.dump(onehot_encoder_geo, f)

In [20]:
data.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [21]:
# Divide the dataset into features and target variable
X= data.drop('Exited', axis=1) #axis=1 means we are dropping columns, not rows
Y= data['Exited']

In [22]:
X

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.00,1,1,1,101348.88,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0.0,0.0,1.0
2,502,0,42,8,159660.80,3,1,0,113931.57,1.0,0.0,0.0
3,699,0,39,1,0.00,2,0,0,93826.63,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.10,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...
9995,771,1,39,5,0.00,2,1,0,96270.64,1.0,0.0,0.0
9996,516,1,35,10,57369.61,1,1,1,101699.77,1.0,0.0,0.0
9997,709,0,36,7,0.00,1,0,1,42085.58,1.0,0.0,0.0
9998,772,1,42,3,75075.31,2,1,0,92888.52,0.0,1.0,0.0


In [23]:
Y

0       1
1       0
2       1
3       0
4       0
       ..
9995    0
9996    0
9997    1
9998    1
9999    0
Name: Exited, Length: 10000, dtype: int64

In [24]:
# Create training and testing datasets
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,random_state=42)

# Scale the features
# Required for ANN to work properly because it is sensitive to the scale of the input features
scaler = StandardScaler()
scaler= StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [25]:
X_train

array([[ 0.35649971,  0.91324755, -0.6557859 , ...,  1.00150113,
        -0.57946723, -0.57638802],
       [-0.20389777,  0.91324755,  0.29493847, ..., -0.99850112,
         1.72572313, -0.57638802],
       [-0.96147213,  0.91324755, -1.41636539, ..., -0.99850112,
        -0.57946723,  1.73494238],
       ...,
       [ 0.86500853, -1.09499335, -0.08535128, ...,  1.00150113,
        -0.57946723, -0.57638802],
       [ 0.15932282,  0.91324755,  0.3900109 , ...,  1.00150113,
        -0.57946723, -0.57638802],
       [ 0.47065475,  0.91324755,  1.15059039, ..., -0.99850112,
         1.72572313, -0.57638802]])

In [26]:
# Save scaler as pickle file
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

# ANN Implementation

In [38]:
# ANN Implementation
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
import datetime

In [None]:
X_train.shape[1] # Number of features in the training set

12

In [None]:
## Build the ANN model
# Dense for fully connected layers
# Dropout for regularization to prevent overfitting
model = Sequential([
    #relu is for activation function use for non-linearity
    #sigmoid is for output layer for binary classification
    #Total hidden layers = 2
    #First hidden layer has 64 neurons, second hidden layer has 32 neurons
    Dense(64 ,activation='relu',input_shape=(X_train.shape[1],)), #for first hidden layer, input_shape should be the number of features in X_train
    Dense(32, activation='relu'), # no input_shape needed for subsequent layers
    Dense(1, activation='sigmoid')  # Output layer for binary classification
    
])

In [None]:
model.summary()
# 832 = 64*12 + 64
# 2080 = 32*64 + 32
# 33 = 1*32 + 1

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_1 (Dense)             (None, 64)                832       
                                                                 
 dense_2 (Dense)             (None, 32)                2080      
                                                                 
 dense_3 (Dense)             (None, 1)                 33        
                                                                 
Total params: 2945 (11.50 KB)
Trainable params: 2945 (11.50 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [53]:
# Model Optimiser 
import tensorflow
opt=tensorflow.keras.optimizers.Adam(learning_rate=0.01)  # Adam optimizer with a learning rate of 0.01
loss=tensorflow.keras.losses.BinaryCrossentropy()
loss

<keras.src.losses.BinaryCrossentropy at 0x20357514ca0>

In [55]:
## Compile the model
# Adam optimizer is a popular choice for training neural networks why?
# The Adam optimizer is an adaptive learning rate optimization algorithm that combines the advantages of two other extensions of stochastic gradient descent. It is efficient in terms of memory and computational cost, making it suitable for large datasets and high-dimensional spaces.
# binary_crossentropy is used for binary classification problems and sigmoid activation function in the output layer
# metrics=['accuracy'] is used to evaluate the model's performance during training and testing
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])



In [74]:
## Set up the tensorboard why?
# TensorBoard is a visualization tool that helps in monitoring and debugging the training process of machine learning models. It provides insights into the model's performance, allowing for better understanding and optimization.
import datetime
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
log_dir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
#Why time stamp is used?
# To create a unique log directory for each run, allowing for comparison of different training runs in TensorBoard

In [75]:
tensorflow_callback =TensorBoard(log_dir=log_dir, histogram_freq=1) #why?
# The histogram_freq parameter controls how often to compute activation and weight histograms for the layers of the model. Setting it to 1 means that histograms will be computed every epoch, which can help in visualizing the distribution of activations and weights during training.

In [76]:
## Set up early stopping to prevent overfitting
# epochs=100, patience=5, restore_best_weights=True
# no need to train for 100 epochs, if the model is not improving for 5 epochs, stop training
# we can use early stopping to prevent overfitting if not imporving
# patience is the number of epochs to wait before stopping the training if the model is not improving
#restore_best_weights=True means that the model will restore the weights from the epoch with the best validation loss
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [77]:
## Training model 
history = model.fit(X_train, Y_train, 
                    validation_data=(X_test, Y_test), 
                    epochs=100, 
                    batch_size=32, 
                    callbacks=[tensorflow_callback, early_stopping_callback])
# batch_size=32 means that the model will be trained on 32 samples at a time before updating the weights
#callbacks are used to monitor the training process and take actions based on the performance of the model during training

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100


In [78]:
# save the trained model
model.save('model.h5')  # Save the trained model to a file

  saving_api.save_model(


In [79]:
## Load Tensorboard Extension
# why?
# TensorBoard is a visualization tool that helps in monitoring and debugging the training process of machine learning models. It provides insights into the model's performance, allowing for better understanding and optimization.
%load_ext tensorboard
# The %load_ext tensorboard command loads the TensorBoard extension in Jupyter Notebook, allowing you to visualize the training process and monitor metrics such as loss and accuracy in real-time.

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [80]:
# this is used to visualize the training process in TensorBoard
# %tensorboard --logdir logs/fit
%tensorboard --logdir logs/fit

Reusing TensorBoard on port 6006 (pid 9444), started 0:06:31 ago. (Use '!kill 9444' to kill it.)

In [None]:
# Use model to make predictions and get the predicted values for the test set
predictions = model.predict(X_test)
# now Y_test contains the actual values and predictions contains the predicted values
# You can compare the two to evaluate the model's performance



In [83]:
# compare predictions and Y_test
comparison = pd.DataFrame({'Actual': Y_test, 'Predicted': predictions.flatten()})
comparison.head()

Unnamed: 0,Actual,Predicted
6252,0,0.043118
4684,0,0.012803
1731,0,0.099484
4742,0,0.087273
4521,0,0.079891


In [None]:
# MSE (Mean Squared Error) is a common metric used to evaluate the performance of regression models. It measures the average of the squares of the errors, which are the differences between predicted and actual values. A lower MSE indicates a better fit of the model to the data.
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(Y_test, predictions)
mse
# MSE is valuse is good if it is low, it means that the model is performing well
# good valuse is less than 0.5

0.10248791426420212

In [None]:
# RSE (Root Squared Error) is another metric used to evaluate the performance of regression models. It is the square root of the MSE and provides an error value in the same units as the target variable, making it easier to interpret.
from sklearn.metrics import mean_squared_error
rse = np.sqrt(mse)  
rse
# RSE is valuse is good if it is low, it means that the model is performing well
# good valuse is less than 0.5

0.32013733656698357

In [88]:
### Load the ANN model,scaler pickle,onehot
from tensorflow.keras.models import load_model
import pickle
model=load_model('model.h5')  # Load the trained model from a file

In [90]:
## Load the scaler and onehot encoder
with open('scaler.pkl', 'rb') as f:
    scaler = pickle.load(f)  # Load the scaler from a file
# with open('onehot_encoder.pkl', 'rb') as f:
with open('onehot_encoder.pkl', 'rb') as f:
    onehot_encoder_geo = pickle.load(f)  # Load the onehot encoder from a file

## Load encoder and scaler
with open('label_encoder_gender.pkl', 'rb') as f:
    label_encoder_gender = pickle.load(f)  # Load the label encoder from a file

In [190]:
#Example input data
input_data= {
    'CreditScore': 600,
    'Geography': 'France',
    'Gender':"Male",
    'Age': 40,
    'Tenure': 5,
    'Balance': 10000,
    'NumOfProducts': 2,
    'HasCrCard': 1,
    'IsActiveMember': 1,
    'EstimatedSalary': 50000
}

In [191]:
#conver Geography to onehot encoding
geo_encoded = onehot_encoder_geo.transform([[input_data['Geography']]])
geo_enocded_df=pd.DataFrame(geo_encoded.toarray(),columns=onehot_encoder_geo.get_feature_names_out(['Geography']))
geo_enocded_df



Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain
0,1.0,0.0,0.0


In [192]:
input_df= pd.DataFrame([input_data])
input_df

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,600,France,Male,40,5,10000,2,1,1,50000


In [194]:
# Encode categorical variables
input_df['Gender'] = label_encoder_gender.transform([input_data['Gender']])
input_df

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,600,France,0,40,5,10000,2,1,1,50000


In [195]:
# Concatenate the onehot encoded geography with input data
input_data_df = pd.concat([input_df.reset_index(drop=True), geo_enocded_df.reset_index(drop=True)], axis=1)

In [None]:
input_data_df

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Germany,Geography_Spain
0,600,France,0,40,5,10000,2,1,1,50000,1.0,0.0,0.0


In [197]:
input_data_df=input_data_df.drop(['Geography'], axis=1)

In [198]:
input_data_df

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Germany,Geography_Spain
0,600,0,40,5,10000,2,1,1,50000,1.0,0.0,0.0


In [201]:
# Scaling the input data
input_data_scaled = scaler.transform(input_data_df)
input_data_scaled

array([[-0.53598516, -1.09499335,  0.10479359, -0.00134472, -1.05836066,
         0.80843615,  0.64920267,  0.97481699, -0.87683221,  1.00150113,
        -0.57946723, -0.57638802]])

In [203]:
# using the model to make predictions
predicted_value = model.predict(input_data_scaled)
predicted_value



array([[0.02672383]], dtype=float32)

In [205]:
# Prediction probability
predicted_probability = model.predict(input_data_scaled)



In [207]:
if predicted_probability > 0.5:
    print("Customer will exit")
else:
    print("Customer will not exit")

Customer will not exit
