In [1]:
import tensorflow as tf
import  numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## Data Preprocessing

In [2]:
data = pd.read_csv("Churn_Modelling.csv")

In [3]:
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
data.shape

(10000, 14)

In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   RowNumber        10000 non-null  int64  
 1   CustomerId       10000 non-null  int64  
 2   Surname          10000 non-null  object 
 3   CreditScore      10000 non-null  int64  
 4   Geography        10000 non-null  object 
 5   Gender           10000 non-null  object 
 6   Age              10000 non-null  int64  
 7   Tenure           10000 non-null  int64  
 8   Balance          10000 non-null  float64
 9   NumOfProducts    10000 non-null  int64  
 10  HasCrCard        10000 non-null  int64  
 11  IsActiveMember   10000 non-null  int64  
 12  EstimatedSalary  10000 non-null  float64
 13  Exited           10000 non-null  int64  
dtypes: float64(2), int64(9), object(3)
memory usage: 1.1+ MB


In [6]:
X = data.drop(columns=['RowNumber', 'Surname', 'CustomerId','Exited'])
X.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,France,Female,42,2,0.0,1,1,1,101348.88
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58
2,502,France,Female,42,8,159660.8,3,1,0,113931.57
3,699,France,Female,39,1,0.0,2,0,0,93826.63
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1


In [7]:
y = data['Exited']
y.head()

0    1
1    0
2    1
3    0
4    0
Name: Exited, dtype: int64

In [8]:
X.select_dtypes(include = 'object').columns

Index(['Geography', 'Gender'], dtype='object')

In [9]:
from sklearn.preprocessing import LabelEncoder

In [10]:
X = pd.get_dummies(X,drop_first=True, dtype = int)

## Splitting the dataset

In [11]:
from sklearn.model_selection import train_test_split

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=0)

## Feature Scaling

In [13]:
from sklearn.preprocessing import StandardScaler

In [14]:
scale = StandardScaler()
X_train = scale.fit_transform(X_train)
X_test = scale.transform(X_test)

## Building the Model

In [15]:
X_train.shape

(8000, 11)

In [16]:

model = tf.keras.models.Sequential([
    tf.keras.Input(shape=(11,)),
    tf.keras.layers.Dense(units=6, activation='relu'),
    tf.keras.layers.Dense(units=6, activation='relu'),
    tf.keras.layers.Dense(units=1, activation='sigmoid'),
])


In [17]:
model.summary()

In [18]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

## Training the Model

In [19]:
model.fit(X_train,y_train.to_numpy(), batch_size=20, epochs=20)

Epoch 1/20
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 651us/step - accuracy: 0.7429 - loss: 0.6237
Epoch 2/20
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 627us/step - accuracy: 0.7937 - loss: 0.4645
Epoch 3/20
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 636us/step - accuracy: 0.8092 - loss: 0.4224
Epoch 4/20
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 635us/step - accuracy: 0.8125 - loss: 0.4206
Epoch 5/20
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 592us/step - accuracy: 0.8182 - loss: 0.4121
Epoch 6/20
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 602us/step - accuracy: 0.8194 - loss: 0.4048
Epoch 7/20
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 602us/step - accuracy: 0.8144 - loss: 0.4071
Epoch 8/20
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 626us/step - accuracy: 0.8210 - loss: 0.4041
Epoch 9/20
[1m400/400[

<keras.src.callbacks.history.History at 0x2686d261390>

## Model_evaluation and prediction

In [20]:
test_loss, test_acc = model.evaluate(X_test, y_test.to_numpy())

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 640us/step - accuracy: 0.8646 - loss: 0.3352


In [21]:
y_pred = (model.predict(X_test)>0.5).astype('int32')

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 970us/step


In [22]:
print(y_pred)

[[0]
 [0]
 [0]
 ...
 [0]
 [0]
 [0]]


In [23]:
print(y_test)

9394    0
898     1
2398    0
5906    0
2343    0
       ..
1037    0
2899    0
9549    0
2740    0
6690    0
Name: Exited, Length: 2000, dtype: int64


In [24]:
y_test = y_test.to_numpy()
y_pred[11], y_test[11]


(array([0]), 0)

In [25]:
from sklearn.metrics import confusion_matrix, accuracy_score

In [26]:
confusion_matrix(y_test, y_pred)

array([[1515,   80],
       [ 195,  210]], dtype=int64)

In [27]:
accuracy_score(y_test, y_pred)

0.8625