# Business Problem

Our basic aim is to predict customer churn for a certain bank i.e. which customer is going to leave this bank service. Dataset contains 10000 rows with 14 columns.

### Importing Libraries

In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf

### Loading dataset

In [11]:
Bank_data= pd.read_csv('Churn_Modelling.csv')
Bank_data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


### Data Prepatarion(EDA)

In [12]:
#Removing unwanted cloum
Bank_data.drop('RowNumber', axis=1, inplace= True)

In [19]:
#Removing unwanted cloum
Bank_data.drop('CustomerId', axis=1, inplace= True)

In [28]:
#Removing unwanted cloum
Bank_data.drop('Surname', axis=1, inplace= True)

In [29]:
Bank_data.columns

Index(['CreditScore', 'Geography', 'Gender', 'Age', 'Tenure', 'Balance',
       'NumOfProducts', 'HasCrCard', 'IsActiveMember', 'EstimatedSalary',
       'Exited'],
      dtype='object')

In [30]:
Bank_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   CreditScore      10000 non-null  int64  
 1   Geography        10000 non-null  object 
 2   Gender           10000 non-null  object 
 3   Age              10000 non-null  int64  
 4   Tenure           10000 non-null  int64  
 5   Balance          10000 non-null  float64
 6   NumOfProducts    10000 non-null  int64  
 7   HasCrCard        10000 non-null  int64  
 8   IsActiveMember   10000 non-null  int64  
 9   EstimatedSalary  10000 non-null  float64
 10  Exited           10000 non-null  int64  
dtypes: float64(2), int64(7), object(2)
memory usage: 859.5+ KB


In [31]:
Bank_data.dtypes

CreditScore          int64
Geography           object
Gender              object
Age                  int64
Tenure               int64
Balance            float64
NumOfProducts        int64
HasCrCard            int64
IsActiveMember       int64
EstimatedSalary    float64
Exited               int64
dtype: object

#### Separing Numeric & Categorical Variables

In [32]:
numeric_var_names=[key for key in dict(Bank_data.dtypes) if dict(Bank_data.dtypes)[key] in ['float64', 'int64', 'float32', 'int32']]
cat_var_names=[key for key in dict(Bank_data.dtypes) if dict(Bank_data.dtypes)[key] in ['object', 'O']]

In [33]:
print(numeric_var_names)

['CreditScore', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard', 'IsActiveMember', 'EstimatedSalary', 'Exited']


In [34]:
print(cat_var_names)

['Geography', 'Gender']


In [35]:
Bank_num=Bank_data[numeric_var_names]
Bank_num.head(5)

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,42,2,0.0,1,1,1,101348.88,1
1,608,41,1,83807.86,1,0,1,112542.58,0
2,502,42,8,159660.8,3,1,0,113931.57,1
3,699,39,1,0.0,2,0,0,93826.63,0
4,850,43,2,125510.82,1,1,1,79084.1,0


In [36]:
Bank_cat = Bank_data[cat_var_names]
Bank_cat.head(5)

Unnamed: 0,Geography,Gender
0,France,Female
1,Spain,Female
2,France,Female
3,France,Female
4,Spain,Female


#### Handling Categorical Features

In [40]:
col_dummies1=pd.get_dummies(Bank_cat['Geography'], prefix='Geog_type', drop_first=True)

In [41]:
col_dummies2=pd.get_dummies(Bank_cat['Gender'], prefix='Gender_type', drop_first=True)

In [42]:
Bank_cat1 = pd.concat([Bank_cat, col_dummies1,col_dummies2], axis=1)
Bank_cat1.head()

Unnamed: 0,Geography,Gender,Geog_type_Germany,Geog_type_Spain,Gender_type_Male
0,France,Female,0,0,0
1,Spain,Female,0,1,0
2,France,Female,0,0,0
3,France,Female,0,0,0
4,Spain,Female,0,1,0


In [43]:
Bank_cat1.drop( ['Geography','Gender'], axis = 1, inplace = True )

### Final File for Analysis

In [44]:
Bank_file = pd.concat([Bank_cat1, Bank_num], axis=1)
Bank_file.head()

Unnamed: 0,Geog_type_Germany,Geog_type_Spain,Gender_type_Male,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,0,0,0,619,42,2,0.0,1,1,1,101348.88,1
1,0,1,0,608,41,1,83807.86,1,0,1,112542.58,0
2,0,0,0,502,42,8,159660.8,3,1,0,113931.57,1
3,0,0,0,699,39,1,0.0,2,0,0,93826.63,0
4,0,1,0,850,43,2,125510.82,1,1,1,79084.1,0


In [45]:
X = Bank_file.iloc[:, 0:10].values
y = Bank_file.iloc[:, 11].values

In [46]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [47]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

#### Defining the Variables

In [48]:
epochs=50
batch_size = 10

#### Building the Architecture & Compiling

In [49]:
model2 = tf.keras.models.Sequential();
model2.add(tf.keras.layers.Flatten())
model2.add(tf.keras.layers.Dense(6, activation='relu'))
#mnistmodel2.add(tf.keras.layers.Dropout(0.2))
model2.add(tf.keras.layers.Dense(1,activation=tf.nn.sigmoid))

In [50]:
model2.compile (optimizer= tf.keras.optimizers.Adam(), loss='binary_crossentropy',metrics = ['accuracy'])

#### Fitting the Model

In [53]:
model2.fit(X_train, y_train, batch_size=10, epochs=50)

Train on 8000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x1ddfaf2d978>

#### Evaluate the Model with test data

In [54]:
model2.evaluate(X_test,y_test)



[0.34359566020965576, 0.8565]

In [55]:
# Predicting the Test set results
y_pred = model2.predict(X_test)
y_pred = (y_pred > 0.5)

In [56]:
# Creating the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm

array([[1531,   70],
       [ 217,  182]], dtype=int64)

In [57]:
import sklearn.metrics as metrics
print(metrics.classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.88      0.96      0.91      1601
           1       0.72      0.46      0.56       399

    accuracy                           0.86      2000
   macro avg       0.80      0.71      0.74      2000
weighted avg       0.85      0.86      0.84      2000

