Customer Churn Prediction project using ANN (Artificial Neural Network)

step 1--Imort Libraries

In [3]:
import numpy as np
import pandas as pd
import tensorflow as tf

print("TensorFlow Version:", tf.__version__)

TensorFlow Version: 2.20.0


step 2--Data Preprocessing

In [6]:
dataset = pd.read_csv(r'C:\Users\hp\Downloads\Churn_Modelling (1).csv')
X = dataset.iloc[:, 3:-1].values
y = dataset.iloc[:, -1].values

print("X Sample:\n", X[:3])
print("y Sample:\n", y[:10])

X Sample:
 [[619 'France' 'Female' 42 2 0.0 1 1 1 101348.88]
 [608 'Spain' 'Female' 41 1 83807.86 1 0 1 112542.58]
 [502 'France' 'Female' 42 8 159660.8 3 1 0 113931.57]]
y Sample:
 [1 0 1 0 0 1 0 1 0 0]


step 3--Encoding categorical data
**label encoding for gender
**onehot encoding for geography

In [9]:
from sklearn.preprocessing import LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

# Label Encoding Gender
le = LabelEncoder()
X[:, 2] = le.fit_transform(X[:, 2])

# One Hot Encoding Geography
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

print("After Encoding:\n", X[:3])

After Encoding:
 [[1.0 0.0 0.0 619 0 42 2 0.0 1 1 1 101348.88]
 [0.0 0.0 1.0 608 0 41 1 83807.86 1 0 1 112542.58]
 [1.0 0.0 0.0 502 0 42 8 159660.8 3 1 0 113931.57]]


step4--feature scaling

In [12]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X = sc.fit_transform(X)

print("After Scaling:\n", X[:3])

After Scaling:
 [[ 0.99720391 -0.57873591 -0.57380915 -0.32622142 -1.09598752  0.29351742
  -1.04175968 -1.22584767 -0.91158349  0.64609167  0.97024255  0.02188649]
 [-1.00280393 -0.57873591  1.74273971 -0.44003595 -1.09598752  0.19816383
  -1.38753759  0.11735002 -0.91158349 -1.54776799  0.97024255  0.21653375]
 [ 0.99720391 -0.57873591 -0.57380915 -1.53679418 -1.09598752  0.29351742
   1.03290776  1.33305335  2.52705662  0.64609167 -1.03067011  0.2406869 ]]


step 5--splitting data

In [15]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

print("Train Shape:", X_train.shape)
print("Test Shape:", X_test.shape)

Train Shape: (8000, 12)
Test Shape: (2000, 12)


step 6--building ANN

In [19]:
# Initializing the ANN
ann = tf.keras.models.Sequential()

# Input + First Hidden Layer
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))

# Second Hidden Layer
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))

# Output Layer
ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

step 7 --training the ANN
here we are using :
optimizer : Adam
loss function :binary crossentropy because of binary classification


In [22]:
ann.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

# Train ANN
history = ann.fit(X_train, y_train, batch_size = 32, epochs = 100, verbose=1)

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.5832 - loss: 0.6959
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7960 - loss: 0.4875
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7960 - loss: 0.4620
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7960 - loss: 0.4456
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7962 - loss: 0.4350
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8044 - loss: 0.4273
Epoch 7/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8152 - loss: 0.4203
Epoch 8/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8246 - loss: 0.4130
Epoch 9/100
[1m250/250[0m [32

step 8 --model evaluation

In [25]:
# Predicting the Test set results
y_pred = ann.predict(X_test)
y_pred = (y_pred > 0.5)

# Compare predictions vs actual
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1)[:10])

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[[0 0]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 1]
 [1 1]]


In [27]:
# Confusion Matrix
from sklearn.metrics import confusion_matrix, accuracy_score

cm = confusion_matrix(y_test, y_pred)
acc = accuracy_score(y_test, y_pred)

print("Confusion Matrix:\n", cm)
print("Accuracy:", acc)

Confusion Matrix:
 [[1506   89]
 [ 190  215]]
Accuracy: 0.8605


lets add two more hidden layer to compare the performance

In [30]:
# Build deeper ANN
ann_deep = tf.keras.models.Sequential()

# Input + 3 Hidden Layers
ann_deep.add(tf.keras.layers.Dense(units=8, activation='relu'))
ann_deep.add(tf.keras.layers.Dense(units=8, activation='relu'))
ann_deep.add(tf.keras.layers.Dense(units=8, activation='relu'))

# Output Layer
ann_deep.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# Compile
ann_deep.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

# Train
history_deep = ann_deep.fit(X_train, y_train, batch_size = 32, epochs = 100, verbose=1)

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7943 - loss: 0.5286
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7960 - loss: 0.4674
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7960 - loss: 0.4445
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7969 - loss: 0.4343
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8002 - loss: 0.4286
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8077 - loss: 0.4252
Epoch 7/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8121 - loss: 0.4220
Epoch 8/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8165 - loss: 0.4188
Epoch 9/100
[1m250/250[0m [32

In [31]:
# Evaluate deeper model
y_pred_deep = ann_deep.predict(X_test)
y_pred_deep = (y_pred_deep > 0.5)

cm_deep = confusion_matrix(y_test, y_pred_deep)
acc_deep = accuracy_score(y_test, y_pred_deep)

print("Confusion Matrix (Deeper ANN):\n", cm_deep)
print("Accuracy (Deeper ANN):", acc_deep)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step  
Confusion Matrix (Deeper ANN):
 [[1499   96]
 [ 195  210]]
Accuracy (Deeper ANN): 0.8545
