# Data Preprocessing

In [1]:
!pip install pandas



In [2]:
!pip install sklearn



In [3]:
# Import libaries
import numpy as np
import pandas as pd
import tensorflow as tf

In [4]:
# Import dataset
dataset= pd.read_csv('Churn_Modelling.csv')
dataset

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [5]:
# Create feature matrix X and dependent vector Y
X= dataset.iloc[:,3:-1].values
Y= dataset.iloc[:,-1].values

In [6]:
X

array([[619, 'France', 'Female', ..., 1, 1, 101348.88],
       [608, 'Spain', 'Female', ..., 0, 1, 112542.58],
       [502, 'France', 'Female', ..., 1, 0, 113931.57],
       ...,
       [709, 'France', 'Female', ..., 0, 1, 42085.58],
       [772, 'Germany', 'Male', ..., 1, 0, 92888.52],
       [792, 'France', 'Female', ..., 1, 0, 38190.78]], dtype=object)

In [7]:
# Encoding using LabelEncoder
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
X[:,2]=np.array(le.fit_transform(X[:,2]))

In [8]:
X

array([[619, 'France', 0, ..., 1, 1, 101348.88],
       [608, 'Spain', 0, ..., 0, 1, 112542.58],
       [502, 'France', 0, ..., 1, 0, 113931.57],
       ...,
       [709, 'France', 0, ..., 0, 1, 42085.58],
       [772, 'Germany', 1, ..., 1, 0, 92888.52],
       [792, 'France', 0, ..., 1, 0, 38190.78]], dtype=object)

In [9]:
# Encoding using OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct=ColumnTransformer(transformers=[('encoder',OneHotEncoder(),[1])],remainder='passthrough')
X=np.array(ct.fit_transform(X))

In [10]:
X

array([[1.0, 0.0, 0.0, ..., 1, 1, 101348.88],
       [0.0, 0.0, 1.0, ..., 0, 1, 112542.58],
       [1.0, 0.0, 0.0, ..., 1, 0, 113931.57],
       ...,
       [1.0, 0.0, 0.0, ..., 0, 1, 42085.58],
       [0.0, 1.0, 0.0, ..., 1, 0, 92888.52],
       [1.0, 0.0, 0.0, ..., 1, 0, 38190.78]], dtype=object)

In [11]:
# Replacing missing data
from sklearn.impute import SimpleImputer
imputer= SimpleImputer(missing_values=np.nan,strategy='mean')
X= imputer.fit_transform(X)

In [12]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
X=sc.fit_transform(X)

In [13]:
# Spliting dataset into training and testing
from sklearn.model_selection import train_test_split
Xtrain,Xtest,Ytrain,Ytest= train_test_split(X,Y,test_size=0.2,random_state=1)

# Building Deep Learning Model

In [14]:
# Building a model
d1= tf.keras.models.Sequential()
# First hidden layer
d1.add(tf.keras.layers.Dense(units=1,activation='relu'))
# Second hidden layer
d1.add(tf.keras.layers.Dense(units=1,activation='relu'))
# Output layer
d1.add(tf.keras.layers.Dense(units=1,activation='sigmoid'))

In [15]:
# Compile the model
d1.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [16]:
# Training the model
d1.fit(Xtrain,Ytrain)



<tensorflow.python.keras.callbacks.History at 0x2102bb67af0>

In [17]:
d1.fit(Xtrain,Ytrain,epochs=400)

Epoch 1/400
Epoch 2/400
Epoch 3/400
Epoch 4/400
Epoch 5/400
Epoch 6/400
Epoch 7/400
Epoch 8/400
Epoch 9/400
Epoch 10/400
Epoch 11/400
Epoch 12/400
Epoch 13/400
Epoch 14/400
Epoch 15/400
Epoch 16/400
Epoch 17/400
Epoch 18/400
Epoch 19/400
Epoch 20/400
Epoch 21/400
Epoch 22/400
Epoch 23/400
Epoch 24/400
Epoch 25/400
Epoch 26/400
Epoch 27/400
Epoch 28/400
Epoch 29/400
Epoch 30/400
Epoch 31/400
Epoch 32/400
Epoch 33/400
Epoch 34/400
Epoch 35/400
Epoch 36/400
Epoch 37/400
Epoch 38/400
Epoch 39/400
Epoch 40/400
Epoch 41/400
Epoch 42/400
Epoch 43/400
Epoch 44/400
Epoch 45/400
Epoch 46/400
Epoch 47/400
Epoch 48/400
Epoch 49/400
Epoch 50/400
Epoch 51/400
Epoch 52/400
Epoch 53/400
Epoch 54/400
Epoch 55/400
Epoch 56/400
Epoch 57/400
Epoch 58/400
Epoch 59/400
Epoch 60/400
Epoch 61/400
Epoch 62/400
Epoch 63/400
Epoch 64/400
Epoch 65/400
Epoch 66/400
Epoch 67/400
Epoch 68/400
Epoch 69/400
Epoch 70/400
Epoch 71/400
Epoch 72/400
Epoch 73/400
Epoch 74/400
Epoch 75/400
Epoch 76/400
Epoch 77/400
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x2102d062790>

In [18]:
# Testing the model
Yest= d1.predict(Xtest)
Yest

array([[0.09569132],
       [0.15636042],
       [0.12047338],
       ...,
       [0.04224467],
       [0.12994447],
       [0.31388658]], dtype=float32)

In [19]:
Yest= (Yest>0.5)
Yest

array([[False],
       [False],
       [False],
       ...,
       [False],
       [False],
       [False]])

In [21]:
# Encoding
from sklearn.preprocessing import LabelEncoder
le= LabelEncoder()
Yest= le.fit_transform(Yest)
Yest

array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

# Performance metrics

In [22]:
# Performance metrics
from sklearn.metrics import confusion_matrix,accuracy_score,precision_score,recall_score
cm= confusion_matrix(Ytest,Yest)
print('Confusion matrix : ')
print(cm)
print('Accuracy score: ',accuracy_score(Ytest,Yest))
print('Precision score: ',precision_score(Ytest,Yest))
print('Recall score: ',recall_score(Ytest,Yest))

Confusion matrix : 
[[1528   57]
 [ 321   94]]
Accuracy score:  0.811
Precision score:  0.6225165562913907
Recall score:  0.22650602409638554
