In [1]:
!pip install tensorflow.gpu

Defaulting to user installation because normal site-packages is not writeable


TensorFlow is a free and open-source software library for machine learning and artificial intelligence.

In [2]:
from tensorflow.keras.models import load_model

In [3]:
# import basic libraries

#for complex mathematical operations
import numpy as np 

#for dataframe manipulation 
import pandas as pd

# for data visulisation
import seaborn as sns
import matplotlib.pyplot as plt


import warnings
warnings.filterwarnings("ignore")

In [4]:
from sklearn.preprocessing import StandardScaler

In [5]:
from tensorflow.keras.models import Sequential
# tensorflow.keras is present in layers package
#Sequential is used for initialising neural network model.......Sequntial indicates that we can do both forward and bakward propogation
from tensorflow.keras.layers import Dense
#Dense used for adding/creating layers(input, hidden and output layers)
from tensorflow.keras.layers import LeakyReLU, PReLU, ELU, ReLU#importing activation functions
from tensorflow.keras.layers import Dropout
# Dropout is like a regularisation paramaeter used to avoid overfitting

In [6]:
dataset = pd.read_csv("Churn_Modelling.csv")

In [7]:
dataset.shape

(10000, 14)

In [8]:
dataset.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [9]:
dataset.isnull().sum()

RowNumber          0
CustomerId         0
Surname            0
CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64

In [10]:
#dataset.drop(['RowNumber', 'CustomerId','Surname'], axis=1, inplace=True)# axis=1 for coloumnwise operation# works same as code below
dataset=dataset.iloc[:,3:14]

In [11]:
dataset.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [12]:
dataset['Geography']

0        France
1         Spain
2        France
3        France
4         Spain
         ...   
9995     France
9996     France
9997     France
9998    Germany
9999     France
Name: Geography, Length: 10000, dtype: object

In [13]:
dataset['Geography'].unique()

array(['France', 'Spain', 'Germany'], dtype=object)

In [14]:
dataset['Gender'].unique()

array(['Female', 'Male'], dtype=object)

In [15]:
dataset=pd.get_dummies(dataset,columns=['Geography','Gender'],drop_first=True)# here drop_first =True ----> actually drops first unique value in Geography and Gender

In [16]:
dataset.head()

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_Germany,Geography_Spain,Gender_Male
0,619,42,2,0.0,1,1,1,101348.88,1,0,0,0
1,608,41,1,83807.86,1,0,1,112542.58,0,0,1,0
2,502,42,8,159660.8,3,1,0,113931.57,1,0,0,0
3,699,39,1,0.0,2,0,0,93826.63,0,0,0,0
4,850,43,2,125510.82,1,1,1,79084.1,0,0,1,0


In [17]:
x=dataset.drop("Exited", axis=1)

In [18]:
y=dataset['Exited']

In [19]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,stratify=y,random_state=42)#to have same output always.....random state is used.....one can use any value for random state
print("The shape of x train : ", x_train.shape)
print("The shape of x test : ", x_test.shape)
print("The shape of y train : ", y_train.shape)
print("The shape of y test : ", y_test.shape)

The shape of x train :  (8000, 11)
The shape of x test :  (2000, 11)
The shape of y train :  (8000,)
The shape of y test :  (2000,)


In [20]:
from sklearn.preprocessing import StandardScaler

In [21]:
sc = StandardScaler()
x_train = sc.fit_transform(x_train)# fit_transform avoids data leakage
x_test = sc.transform(x_test)

In [22]:
classifier=Sequential()#empty model initialised

In [23]:
# Adding i/p layer
classifier.add(Dense(units=11, activation='relu'))
# N.O. of feature in input layer =11

In [24]:
# Adding first hidden layer
classifier.add(Dense(units=7, activation='relu'))
# Let N.O. of neurons in hidden layer = 7
classifier.add(Dropout(0.2))

In [25]:
# Adding second hidden layer
classifier.add(Dense(units=6, activation='relu'))
# Let N.O. of neurons in hidden layer = 6
classifier.add(Dropout(0.3))

In [26]:
# Adding o/p layer
classifier.add(Dense(1, activation='sigmoid'))
# N.O. of neurons in o/p layer = 1 .... for binary classification

In [27]:
classifier.compile(optimizer='adam', loss='binary_crossentropy',metrics=['accuracy'])
# adam is the best optimizer..by default adam uses a learning rte of 0.01
#loss function used is binary crossentropy for binary classification

In [28]:
import tensorflow
tensorflow.keras.optimizers.Adam(learning_rate=0.01)

<keras.optimizers.optimizer_v2.adam.Adam at 0x1c1c3f57250>

In [29]:
#Early stopping - Stops training when a monitored metric has stopped improving
import tensorflow as tf
early_stopping=tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",
    min_delta=0.0001,
    patience=20,
    verbose=1,
    mode="auto",
    baseline=None,
    restore_best_weights=False,
)

In [30]:
model_history=classifier.fit(x_train,y_train,validation_split=0.33,batch_size=10,epochs=1000,callbacks=early_stopping)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000


Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 71: early stopping


In [31]:
# Making predictions for evaluating model
y_pred = classifier.predict(x_test)
y_pred = (y_pred >= 0.5)



In [32]:
# Making confusion matrix
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(y_test,y_pred)
cm

array([[1562,   31],
       [ 261,  146]], dtype=int64)

In [33]:
# Calculate the accuracy
from sklearn.metrics import accuracy_score
score=accuracy_score(y_test,y_pred)
score

0.854

In [34]:
#To print the Classification Report :
from sklearn.metrics import classification_report
cr = classification_report(y_test,y_pred)
print(cr)

              precision    recall  f1-score   support

           0       0.86      0.98      0.91      1593
           1       0.82      0.36      0.50       407

    accuracy                           0.85      2000
   macro avg       0.84      0.67      0.71      2000
weighted avg       0.85      0.85      0.83      2000

