In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.simplefilter("ignore")

In [9]:
df = pd.read_csv("Churn_analyzed.csv")
df.drop("Unnamed: 0", axis =1, inplace = True)

In [10]:
df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,0,6.480741,2,0.0,1,1,1,101348.88,1
1,608,Spain,0,6.403124,1,83807.86,1,0,1,112542.58,0
2,502,France,0,6.480741,8,159660.8,3,1,0,113931.57,1
3,699,France,0,6.244998,1,0.0,2,0,0,93826.63,0
4,850,Spain,0,6.557439,2,125510.82,1,1,1,79084.1,0


In [None]:
df = pd.get_dummies(df, drop_first = True, dtype = int)

In [12]:
X = df.drop("Exited", axis = 1)
y = df["Exited"]

In [5]:
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, mean_absolute_error

In [6]:
from keras.models import Sequential
from keras.layers import Dense

In [14]:
# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 39)

# Scaling the dataset
scaler = StandardScaler()

X_train.loc[:, ["CreditScore", "Age", "Balance", "EstimatedSalary"]] = scaler.fit_transform(X_train.loc[:, ["CreditScore", "Age", "Balance", "EstimatedSalary"]])
X_test.loc[:, ["CreditScore", "Age", "Balance", "EstimatedSalary"]]= scaler.transform(X_test.loc[:, ["CreditScore", "Age", "Balance", "EstimatedSalary"]])

# ***Modelling - Perceptron***

In [19]:
def perceptron():
    model = Sequential()
    model.add(Dense(input_dim = 11, units = 21, activation = "relu", kernel_initializer = "uniform"))
    model.add(Dense(units = 1, activation = "sigmoid", kernel_initializer = "uniform"))
    model.compile(optimizer = "adam", loss = "binary_crossentropy", metrics = ["accuracy"])
    return model

In [20]:
model = perceptron()
model.fit(X_train, y_train, epochs = 100, batch_size = 32)

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7711 - loss: 0.5839
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7958 - loss: 0.4444
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8034 - loss: 0.4404
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8127 - loss: 0.4221
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8148 - loss: 0.4243
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8255 - loss: 0.4070
Epoch 7/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8252 - loss: 0.4221
Epoch 8/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8285 - loss: 0.4104
Epoch 9/100
[1m250/250[0m [32

<keras.src.callbacks.history.History at 0x1a578bb2890>

In [24]:
pred_train = model.predict(X_train)
pred_train = (pred_train > 0.5)
pred_test = model.predict(X_test)
pred_test = (pred_test > 0.5)

print("Training Accuracy :", accuracy_score(y_train, pred_train))
print("Testing Accuracy :", accuracy_score(y_test, pred_test))
print("Mean Absolute Error :", mean_absolute_error(y_test, pred_test))
confusion_matrix(y_test, pred_test)

[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Training Accuracy : 0.863
Testing Accuracy : 0.8675
Mean Absolute Error : 0.1325


array([[1553,   64],
       [ 201,  182]], dtype=int64)

In [25]:
print(classification_report(y_test, pred_test))

              precision    recall  f1-score   support

           0       0.89      0.96      0.92      1617
           1       0.74      0.48      0.58       383

    accuracy                           0.87      2000
   macro avg       0.81      0.72      0.75      2000
weighted avg       0.86      0.87      0.86      2000



# ***Adding hidden layer in ANN***

In [27]:
def ann_hidden_layer():
    model = Sequential()
    model.add(Dense(input_dim = 11, units = 21, activation = "relu", kernel_initializer = "uniform"))
    model.add(Dense(units = 21, activation = "relu", kernel_initializer = "uniform"))
    model.add(Dense(units = 1, activation = "sigmoid", kernel_initializer = "uniform"))
    model.compile(optimizer = "adam", loss = "binary_crossentropy", metrics = ["accuracy"])
    return model

In [28]:
model_ann = ann_hidden_layer()
model_ann.fit(X_train, y_train, epochs = 100, batch_size = 32)

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7848 - loss: 0.5803
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7982 - loss: 0.4367
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7930 - loss: 0.4269
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8021 - loss: 0.4198
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8221 - loss: 0.4263
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8313 - loss: 0.4056
Epoch 7/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8338 - loss: 0.4115
Epoch 8/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8339 - loss: 0.4087
Epoch 9/100
[1m250/250[0m [32

<keras.src.callbacks.history.History at 0x1a57900a290>

In [30]:
pred_train = model_ann.predict(X_train)
pred_train = (pred_train > 0.5)
pred_test = model_ann.predict(X_test)
pred_test = (pred_test > 0.5)

print("Training Accuracy :", accuracy_score(y_train, pred_train))
print("Testing Accuracy :", accuracy_score(y_test, pred_test))
print("Mean Absolute Error :", mean_absolute_error(y_test, pred_test))
confusion_matrix(y_test, pred_test)

[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
Training Accuracy : 0.86725
Testing Accuracy : 0.8665
Mean Absolute Error : 0.1335


array([[1527,   90],
       [ 177,  206]], dtype=int64)

In [31]:
print(classification_report(y_test, pred_test))

              precision    recall  f1-score   support

           0       0.90      0.94      0.92      1617
           1       0.70      0.54      0.61       383

    accuracy                           0.87      2000
   macro avg       0.80      0.74      0.76      2000
weighted avg       0.86      0.87      0.86      2000



# ***ANN with 2 hidden layer***

In [32]:
def ann_hidden_layer_2():
    model = Sequential()
    model.add(Dense(input_dim = 11, units = 21, activation = "relu", kernel_initializer = "uniform"))
    model.add(Dense(units = 21, activation = "relu", kernel_initializer = "uniform"))
    model.add(Dense(units = 21, activation = "relu", kernel_initializer = "uniform"))
    model.add(Dense(units = 1, activation = "sigmoid", kernel_initializer = "uniform"))
    model.compile(optimizer = "adam", loss = "binary_crossentropy", metrics = ["accuracy"])
    return model

In [33]:
model_ann_2 = ann_hidden_layer_2()
model_ann_2.fit(X_train, y_train, epochs = 100, batch_size = 32)

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7765 - loss: 0.5852
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7996 - loss: 0.4300
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7864 - loss: 0.4444
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7907 - loss: 0.4228
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8034 - loss: 0.4207
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8141 - loss: 0.4243
Epoch 7/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8129 - loss: 0.4261
Epoch 8/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8188 - loss: 0.4197
Epoch 9/100
[1m250/250[0m [32

<keras.src.callbacks.history.History at 0x1a57c1d9550>

In [34]:
pred_train = model_ann_2.predict(X_train)
pred_train = (pred_train > 0.5)
pred_test = model_ann_2.predict(X_test)
pred_test = (pred_test > 0.5)

print("Training Accuracy :", accuracy_score(y_train, pred_train))
print("Testing Accuracy :", accuracy_score(y_test, pred_test))
print("Mean Absolute Error :", mean_absolute_error(y_test, pred_test))
confusion_matrix(y_test, pred_test)

[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
Training Accuracy : 0.866875
Testing Accuracy : 0.8585
Mean Absolute Error : 0.1415


array([[1520,   97],
       [ 186,  197]], dtype=int64)

In [35]:
print(classification_report(y_test, pred_test))

              precision    recall  f1-score   support

           0       0.89      0.94      0.91      1617
           1       0.67      0.51      0.58       383

    accuracy                           0.86      2000
   macro avg       0.78      0.73      0.75      2000
weighted avg       0.85      0.86      0.85      2000



### ***Now when we add 2 layer accuracy is slightly decreasing and MAE is increasing. so, we consider ANN with 1 hidden layer or Perceptron model.***