## Importing Libraries

In [1]:
import keras
import numpy as np
import pandas as pd
import tensorflow as tf
from keras.layers import Dense
import matplotlib.pyplot as plt
from keras.models import Sequential
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

## Importing and Reading the Dataset .csv File

In [2]:
customer_data = pd.read_csv('dataset/customer_churning_data.csv')

In [3]:
customer_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   RowNumber        10000 non-null  int64  
 1   CustomerId       10000 non-null  int64  
 2   Surname          10000 non-null  object 
 3   CreditScore      10000 non-null  int64  
 4   Geography        10000 non-null  object 
 5   Gender           10000 non-null  object 
 6   Age              10000 non-null  int64  
 7   Tenure           10000 non-null  int64  
 8   Balance          10000 non-null  float64
 9   NumOfProducts    10000 non-null  int64  
 10  HasCrCard        10000 non-null  int64  
 11  IsActiveMember   10000 non-null  int64  
 12  EstimatedSalary  10000 non-null  float64
 13  Exited           10000 non-null  int64  
dtypes: float64(2), int64(9), object(3)
memory usage: 1.1+ MB


In [4]:
customer_data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


## Data Preprocessing

Finding and filling the missing values

In [5]:
print(customer_data.isnull().any(axis=1))

0       False
1       False
2       False
3       False
4       False
        ...  
9995    False
9996    False
9997    False
9998    False
9999    False
Length: 10000, dtype: bool


In [6]:
print(customer_data.isnull().sum(axis=1))

0       0
1       0
2       0
3       0
4       0
       ..
9995    0
9996    0
9997    0
9998    0
9999    0
Length: 10000, dtype: int64


Understanding and seperating the independent variable and dependent variable.

In [7]:
X = customer_data.iloc[:, 3:13] # Independent variable
y = customer_data.iloc[:, -1] # Dependent variable

In [8]:
print(X)

      CreditScore Geography  Gender  Age  Tenure    Balance  NumOfProducts  \
0             619    France  Female   42       2       0.00              1   
1             608     Spain  Female   41       1   83807.86              1   
2             502    France  Female   42       8  159660.80              3   
3             699    France  Female   39       1       0.00              2   
4             850     Spain  Female   43       2  125510.82              1   
...           ...       ...     ...  ...     ...        ...            ...   
9995          771    France    Male   39       5       0.00              2   
9996          516    France    Male   35      10   57369.61              1   
9997          709    France  Female   36       7       0.00              1   
9998          772   Germany    Male   42       3   75075.31              2   
9999          792    France  Female   28       4  130142.79              1   

      HasCrCard  IsActiveMember  EstimatedSalary  
0           

In [9]:
print(y)

0       1
1       0
2       1
3       0
4       0
       ..
9995    0
9996    0
9997    1
9998    1
9999    0
Name: Exited, Length: 10000, dtype: int64


In [10]:
X = customer_data.iloc[:, 3:13].values
y = customer_data.iloc[:, -1].values

In [11]:
print(X)

[[619 'France' 'Female' ... 1 1 101348.88]
 [608 'Spain' 'Female' ... 0 1 112542.58]
 [502 'France' 'Female' ... 1 0 113931.57]
 ...
 [709 'France' 'Female' ... 0 1 42085.58]
 [772 'Germany' 'Male' ... 1 0 92888.52]
 [792 'France' 'Female' ... 1 0 38190.78]]


In [12]:
print(y)

[1 0 1 ... 1 1 0]


## Encoding the Categorical Data

In [13]:
countriesEncoder = LabelEncoder()

In [14]:
X[:, 1] = countriesEncoder.fit_transform(X[:, 1])

In [15]:
print(X)

[[619 0 'Female' ... 1 1 101348.88]
 [608 2 'Female' ... 0 1 112542.58]
 [502 0 'Female' ... 1 0 113931.57]
 ...
 [709 0 'Female' ... 0 1 42085.58]
 [772 1 'Male' ... 1 0 92888.52]
 [792 0 'Female' ... 1 0 38190.78]]


In [16]:
genderEncoder = LabelEncoder()

In [17]:
X[:, 2] = genderEncoder.fit_transform(X[:, 2])

In [18]:
print(X)

[[619 0 0 ... 1 1 101348.88]
 [608 2 0 ... 0 1 112542.58]
 [502 0 0 ... 1 0 113931.57]
 ...
 [709 0 0 ... 0 1 42085.58]
 [772 1 1 ... 1 0 92888.52]
 [792 0 0 ... 1 0 38190.78]]


One Hot Encoding

In [19]:
onehotencoder = OneHotEncoder()
ct = ColumnTransformer(transformers=[('onehot', onehotencoder, [1])], remainder='passthrough')
X = ct.fit_transform(X)
X = X[:, 1:]

In [20]:
print(X)

[[0.0 0.0 619 ... 1 1 101348.88]
 [0.0 1.0 608 ... 0 1 112542.58]
 [0.0 0.0 502 ... 1 0 113931.57]
 ...
 [0.0 0.0 709 ... 0 1 42085.58]
 [1.0 0.0 772 ... 1 0 92888.52]
 [0.0 0.0 792 ... 1 0 38190.78]]


Splitting the data into training set and test set

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

In [22]:
print(X_train)

[[0.0 1.0 756 ... 1 0 4888.91]
 [0.0 0.0 701 ... 1 0 47856.78]
 [1.0 0.0 476 ... 1 0 68343.53]
 ...
 [1.0 0.0 729 ... 1 0 177130.33]
 [0.0 0.0 651 ... 1 1 130132.41]
 [1.0 0.0 579 ... 0 1 192146.63]]


In [23]:
print(X_test)

[[1.0 0.0 581 ... 1 0 187597.98]
 [0.0 0.0 645 ... 1 1 18369.33]
 [0.0 0.0 571 ... 1 0 28045.77]
 ...
 [0.0 0.0 558 ... 1 1 109096.71]
 [0.0 0.0 466 ... 1 0 193984.6]
 [0.0 0.0 784 ... 0 1 169920.92]]


In [24]:
print(y_train)

[0 0 0 ... 0 0 1]


In [25]:
print(y_test)

[1 0 0 ... 1 0 0]


Feature scaling

In [26]:
sc = StandardScaler()

In [27]:
X_train = sc.fit_transform(X_train)

In [28]:
print(X)

[[0.0 0.0 619 ... 1 1 101348.88]
 [0.0 1.0 608 ... 0 1 112542.58]
 [0.0 0.0 502 ... 1 0 113931.57]
 ...
 [0.0 0.0 709 ... 0 1 42085.58]
 [1.0 0.0 772 ... 1 0 92888.52]
 [0.0 0.0 792 ... 1 0 38190.78]]


In [29]:
X_test = sc.transform(X_test)

In [30]:
print(X_test)

[[ 1.7194414  -0.57427105 -0.71761074 ...  0.64550866 -1.03408041
   1.52028802]
 [-0.58158423 -0.57427105 -0.05652419 ...  0.64550866  0.96704278
  -1.41725588]
 [-0.58158423 -0.57427105 -0.82090551 ...  0.64550866 -1.03408041
  -1.24928806]
 ...
 [-0.58158423 -0.57427105 -0.95518872 ...  0.64550866  0.96704278
   0.15762912]
 [-0.58158423 -0.57427105 -1.90550063 ...  0.64550866 -1.03408041
   1.63114972]
 [-0.58158423 -0.57427105  1.37927316 ... -1.54916591  0.96704278
   1.21344199]]


In [31]:
clf = Sequential()

In [32]:
clf.add(Dense(units=6, activation='relu', input_dim=11))

In [33]:
clf.add(Dense(units=6, activation='relu'))

In [34]:
clf.add(Dense(units=1, activation='sigmoid'))

In [35]:
clf.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [36]:
clf.fit(X_train, y_train, batch_size=10, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x21dcdb382e0>

In [37]:
y_pred = clf.predict(X_test)



In [38]:
y_pred = (y_pred > 0.5)

In [39]:
print(y_pred)

[[ True]
 [False]
 [ True]
 ...
 [False]
 [False]
 [False]]


In [40]:
cm = confusion_matrix(y_test, y_pred)

In [41]:
print(cm)

[[1536   60]
 [ 212  192]]


In [42]:
accuracy = accuracy_score(y_test, y_pred)

In [43]:
print(accuracy)

0.864


Predict if the customer with the following informations will leave the bank: <br>

Geography: France <br>
Credit Score: 600 <br>
Gender: Male <br>
Age: 40 years old <br>
Tenure: 3 years <br>
Balance: $60000 <br>
Number of Products: 2 <br>
Does this customer have a credit card ? Yes <br>
Is this customer an Active Member: Yes <br>
Estimated Salary: $50000 <br>
So should we say goodbye to that customer ? <br>

In [44]:
predict_inputs = np.array([[0, 0, 600, 1, 40, 3, 60000.0, 2, 1, 1, 50000.0]])

In [45]:
predict_inputs = sc.transform(predict_inputs)

In [46]:
predictions = clf.predict(predict_inputs)



In [47]:
predictions = (predictions > 0.5)

In [48]:
print(predictions)

[[False]]


Therefore the customer will not leave from the bank.

In [49]:
clf.save('model/model.h5')