In [1]:
# Import All Neccessory Libraries

import pandas as pd 
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score

## Data Description

Content :-

This data set contains details of a bank's customers and the target variable is a binary variable reflecting the fact whether the customer left the bank (closed his account) or he continues to be a customer.

In [2]:
df = pd.read_csv("Churn_Modelling.csv")

In [3]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
df.shape

(10000, 14)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   RowNumber        10000 non-null  int64  
 1   CustomerId       10000 non-null  int64  
 2   Surname          10000 non-null  object 
 3   CreditScore      10000 non-null  int64  
 4   Geography        10000 non-null  object 
 5   Gender           10000 non-null  object 
 6   Age              10000 non-null  int64  
 7   Tenure           10000 non-null  int64  
 8   Balance          10000 non-null  float64
 9   NumOfProducts    10000 non-null  int64  
 10  HasCrCard        10000 non-null  int64  
 11  IsActiveMember   10000 non-null  int64  
 12  EstimatedSalary  10000 non-null  float64
 13  Exited           10000 non-null  int64  
dtypes: float64(2), int64(9), object(3)
memory usage: 1.1+ MB


## Checking for messing values and duplicate values

In [6]:
df.isna().sum()

RowNumber          0
CustomerId         0
Surname            0
CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64

In [7]:
df.duplicated().sum()

0

In [8]:
df['Exited'].value_counts()

Exited
0    7963
1    2037
Name: count, dtype: int64

In [9]:
df['Geography'].value_counts()

Geography
France     5014
Germany    2509
Spain      2477
Name: count, dtype: int64

In [10]:
# Drop the first three columns which is not necessory

df.drop(labels=['RowNumber','CustomerId','Surname'],inplace=True,axis=1)

In [11]:
df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


## Encoding categorical features

In [12]:
df = pd.get_dummies(df,columns = ['Geography','Gender'],drop_first=True)

In [13]:
df.head()

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_Germany,Geography_Spain,Gender_Male
0,619,42,2,0.0,1,1,1,101348.88,1,False,False,False
1,608,41,1,83807.86,1,0,1,112542.58,0,False,True,False
2,502,42,8,159660.8,3,1,0,113931.57,1,False,False,False
3,699,39,1,0.0,2,0,0,93826.63,0,False,False,False
4,850,43,2,125510.82,1,1,1,79084.1,0,False,True,False


## Divide dataset into dependent and independent variables

In [14]:
X = df.drop(labels=['Exited'],axis = 1)
y = df['Exited']

In [15]:
#Further split dataset into train and test

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [16]:
#applying scaler 

scaler = StandardScaler()

In [17]:
X_train_scaled = scaler.fit_transform(X_train)

In [18]:
X_test_scaled = scaler.transform(X_test)

In [19]:
model = Sequential()

In [20]:
model.add(Dense(11,activation='relu',input_dim = 11))
model.add(Dense(11,activation='relu'))
model.add(Dense(1,activation = 'sigmoid'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


## Here is summary of our trained neuron 

In [21]:
model.summary()

# Compilation of model

In [22]:
model.compile(loss = 'binary_crossentropy',optimizer = 'Adam',metrics=['accuracy'])

In [23]:
model.fit(X_train_scaled,y_train,epochs = 150,validation_split=0.2)

Epoch 1/150
[1m168/168[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7168 - loss: 0.5887 - val_accuracy: 0.7978 - val_loss: 0.4880
Epoch 2/150
[1m168/168[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7899 - loss: 0.4843 - val_accuracy: 0.8030 - val_loss: 0.4478
Epoch 3/150
[1m168/168[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8035 - loss: 0.4478 - val_accuracy: 0.8134 - val_loss: 0.4339
Epoch 4/150
[1m168/168[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8244 - loss: 0.4178 - val_accuracy: 0.8134 - val_loss: 0.4284
Epoch 5/150
[1m168/168[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8107 - loss: 0.4360 - val_accuracy: 0.8194 - val_loss: 0.4250
Epoch 6/150
[1m168/168[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8237 - loss: 0.4262 - val_accuracy: 0.8172 - val_loss: 0.4209
Epoch 7/150
[1m168/16

<keras.src.callbacks.history.History at 0x2a36313fa50>

In [24]:
model.layers[1].get_weights()

[array([[ 3.09559286e-01,  2.50534564e-01,  9.17629823e-02,
          4.86432374e-01, -3.38870406e-01, -6.51822269e-01,
         -2.59031713e-01,  2.35617384e-02,  6.28692269e-01,
         -9.71653044e-01, -6.22922957e-01],
        [-5.82858443e-01, -1.05816364e+00,  4.74676527e-02,
          1.83855832e-01,  7.70660222e-01,  2.32852519e-01,
          2.17807740e-01, -1.08109623e-01,  6.54316321e-02,
          9.96933784e-04, -9.58895028e-01],
        [ 8.29178154e-01,  7.32604563e-01,  2.13181108e-01,
          2.60980576e-01,  1.37849852e-01,  6.52031720e-01,
          5.65621495e-01,  1.66549161e-01,  4.16742891e-01,
         -9.93744507e-02,  2.13106036e-01],
        [-8.60523462e-01, -3.99392515e-01, -2.43895233e-01,
          3.48960936e-01, -8.12987745e-01, -9.36841726e-01,
          1.19177318e+00, -3.07321977e-02,  6.90694392e-01,
          3.12111676e-01,  9.81739629e-03],
        [ 8.52106035e-01,  4.37024385e-01,  2.43297473e-01,
         -9.93149340e-01,  2.99612701e-01,  

In [25]:
y_log = model.predict(X_test_scaled)

[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


In [26]:
y_pred = np.where(y_log>0.5,1,0)

In [33]:
# Accuracy of our model

testing_accuracy = accuracy_score(y_test,y_pred)*100

In [28]:
X_trained = model.predict(X_train_scaled)

[1m210/210[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 771us/step


In [29]:
X_trainedd = np.where(X_trained>0.5,1,0)

In [34]:
Training_accuracy = accuracy_score(y_train,X_trainedd)*100

In [35]:
Training_accuracy,testing_accuracy

(86.56716417910447, 86.66666666666667)