####**Extracting the data from the zip file**

In [None]:
from zipfile import ZipFile
  
# specifying the zip file name
file_name = "archive.zip"
  
# opening the zip file in READ mode
with ZipFile(file_name, 'r') as zip:
    # printing all the contents of the zip file
    zip.printdir()
  
    # extracting all the files
    print('Extracting all the files now...')
    zip.extractall()
    print('Done!')

File Name                                             Modified             Size
Churn_Modelling.csv                            2020-09-12 07:28:50       684858
Extracting all the files now...
Done!


####**Importing all the dependencies**

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

In [None]:
df= pd.read_csv("Churn_Modelling.csv")
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


####**Dropping some unnecessary columns**

In [None]:
df.drop(columns=["RowNumber","CustomerId","Surname"],inplace=True)

In [None]:
df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


####**Checking if there are any NULL or Duplicate values**

In [None]:
df.duplicated().sum()

0

In [None]:
df.isnull().sum()

CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64

####**Encoding the categorical data using "pd.get_dummies()"**

In [None]:
df=pd.get_dummies(df,columns=["Geography","Gender"],drop_first= True)

In [None]:
x= df.drop("Exited",axis=1)
y= df["Exited"]

In [None]:
x.shape

(10000, 11)

In [None]:
y.shape

(10000,)

####**Splitting the dataset for training and the testing**

> Training 90%

>  Testing 10%


In [None]:
from sklearn.model_selection import train_test_split

xtrain,xtest,ytrain,ytest= train_test_split(x,y,test_size=0.1)

In [None]:
print("xtrain.shape:",xtrain.shape)
print("xtest.shape:",xtest.shape)
print("ytrain.shape:",ytrain.shape)
print("ytest.shape:",ytest.shape)

xtrain.shape: (9000, 11)
xtest.shape: (1000, 11)
ytrain.shape: (9000,)
ytest.shape: (1000,)


####**Scaling the data**

In [None]:
from sklearn.preprocessing import StandardScaler

scale= StandardScaler()

x_train_scaled= scale.fit_transform(xtrain)
x_test_scaled= scale.transform(xtest)
x_test_scaled

array([[ 0.04367502, -0.18089197,  0.68459749, ..., -0.5782056 ,
        -0.57290247, -1.09812675],
       [-0.59711851, -0.94401505, -0.00668237, ...,  1.7294886 ,
        -0.57290247, -1.09812675],
       [ 1.96605559, -0.6578439 , -1.04360216, ...,  1.7294886 ,
        -0.57290247,  0.91064169],
       ...,
       [ 0.56044399, -1.51635737, -0.00668237, ..., -0.5782056 ,
         1.74549781, -1.09812675],
       [-0.22504485,  0.10527919, -1.04360216, ...,  1.7294886 ,
        -0.57290247,  0.91064169],
       [ 0.72581006, -0.08550158,  0.68459749, ..., -0.5782056 ,
         1.74549781,  0.91064169]])

####**Building the model**

In [None]:
model= Sequential()

model.add(Dense(10, activation='relu',input_dim= 11))
model.add(Dense(16, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation="sigmoid"))

In [None]:
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_20 (Dense)            (None, 10)                120       
                                                                 
 dense_21 (Dense)            (None, 16)                176       
                                                                 
 dense_22 (Dense)            (None, 32)                544       
                                                                 
 dense_23 (Dense)            (None, 64)                2112      
                                                                 
 dense_24 (Dense)            (None, 1)                 65        
                                                                 
Total params: 3,017
Trainable params: 3,017
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
model.fit(x_train_scaled,
          ytrain,
          epochs=20,
          validation_split=0.2)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f7d7a3e0050>

In [None]:
import numpy as np

y_log= model.predict(x_test_scaled)

y_pred=np.where(y_log>0.5,1,0)
y_pred.shape

(1000, 1)

In [None]:
from sklearn.metrics import accuracy_score

accuracy_score(ytest,y_pred)

0.851