# Importing libraries

In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

In [11]:
tf.__version__

'2.16.2'

# Data Preprocessing

### 1. Importing Dataset

In [66]:
dataset = pd.read_csv("Churn_Modelling.csv")
dataset

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [67]:
dataset.describe()

Unnamed: 0,RowNumber,CustomerId,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,5000.5,15690940.0,650.5288,38.9218,5.0128,76485.889288,1.5302,0.7055,0.5151,100090.239881,0.2037
std,2886.89568,71936.19,96.653299,10.487806,2.892174,62397.405202,0.581654,0.45584,0.499797,57510.492818,0.402769
min,1.0,15565700.0,350.0,18.0,0.0,0.0,1.0,0.0,0.0,11.58,0.0
25%,2500.75,15628530.0,584.0,32.0,3.0,0.0,1.0,0.0,0.0,51002.11,0.0
50%,5000.5,15690740.0,652.0,37.0,5.0,97198.54,1.0,1.0,1.0,100193.915,0.0
75%,7500.25,15753230.0,718.0,44.0,7.0,127644.24,2.0,1.0,1.0,149388.2475,0.0
max,10000.0,15815690.0,850.0,92.0,10.0,250898.09,4.0,1.0,1.0,199992.48,1.0


### 2. Deleting unwanted rows

In [68]:
dataset.drop(columns=['RowNumber', 'CustomerId', 'Surname'], axis=1, inplace=True)
dataset

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


### 3. Encoding Categorical Variables

In [69]:
dataset.dtypes

CreditScore          int64
Geography           object
Gender              object
Age                  int64
Tenure               int64
Balance            float64
NumOfProducts        int64
HasCrCard            int64
IsActiveMember       int64
EstimatedSalary    float64
Exited               int64
dtype: object

In [70]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer

categorical_variables = ['Geography', 'Gender']
le = LabelEncoder()
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), ['Geography'])])

dataset['Gender'] = le.fit_transform(dataset['Gender'])
dataset['Geography'] = ct.fit_transform(dataset)

In [71]:
print(type(dataset))

<class 'pandas.core.frame.DataFrame'>


In [72]:
dataset

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,1.0,0,42,2,0.00,1,1,1,101348.88,1
1,608,0.0,0,41,1,83807.86,1,0,1,112542.58,0
2,502,1.0,0,42,8,159660.80,3,1,0,113931.57,1
3,699,1.0,0,39,1,0.00,2,0,0,93826.63,0
4,850,0.0,0,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,1.0,1,39,5,0.00,2,1,0,96270.64,0
9996,516,1.0,1,35,10,57369.61,1,1,1,101699.77,0
9997,709,1.0,0,36,7,0.00,1,0,1,42085.58,1
9998,772,0.0,1,42,3,75075.31,2,1,0,92888.52,1


### 4. Splitting dataset into training and testing sets

In [74]:
from sklearn.model_selection import train_test_split

X = dataset.drop('Exited', axis=1)
y = dataset['Exited']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
len(X_train), len(X_test), len(y_train), len(y_test)

(8000, 2000, 8000, 2000)

### 5. Feature Scaling

In [76]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

# Builing ANN

### 1. Initializing the ANN

In [77]:
ann = tf.keras.models.Sequential()

### 2. Adding input layer and 1st hidden layer

In [78]:
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))

### 3. Adding the 2nd hidden layer

In [79]:
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))

### 4. Adding the output layer

for binary classification -> `activation=sigmoid`

for non-binary classification -> `activation=soft-max`

In [80]:
ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# Training the ANN

### 1. Compiling the ANN

for binary classification -> `loss=binary_crossentropy`

for non-binary classification -> `loss=categorical_crossentropy`

In [83]:
ann.compile(optimizer='adam', metrics=['accuracy'], loss='binary_crossentropy')

### 2. Training the ANN on dataset

In [84]:
ann.fit(X_train, y_train, batch_size=32, epochs=100)

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 231us/step - accuracy: 0.8632 - loss: 0.3388
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 208us/step - accuracy: 0.8527 - loss: 0.3477
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 210us/step - accuracy: 0.8553 - loss: 0.3495
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 212us/step - accuracy: 0.8527 - loss: 0.3511
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 214us/step - accuracy: 0.8664 - loss: 0.3328
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 219us/step - accuracy: 0.8619 - loss: 0.3390
Epoch 7/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 222us/step - accuracy: 0.8574 - loss: 0.3471
Epoch 8/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 221us/step - accuracy: 0.8590 - loss: 0.3429
Epoch 9/100
[1m

<keras.src.callbacks.history.History at 0x3102aaa90>

# Making Predictions and evaluating the model

In [92]:
from sklearn.metrics import accuracy_score, confusion_matrix

y_preds = ann.predict(X_test)
y_preds = (y_preds > 0.5)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 232us/step


In [95]:
cm = confusion_matrix(y_test, y_preds)
print(cm)
accuracy_score(y_test, y_preds)

[[1524   71]
 [ 216  189]]


0.8565