In [68]:
import pandas as pd
import numpy as np

In [44]:
df = pd.read_csv('Churn_Modelling.csv')

In [45]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [46]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   RowNumber        10000 non-null  int64  
 1   CustomerId       10000 non-null  int64  
 2   Surname          10000 non-null  object 
 3   CreditScore      10000 non-null  int64  
 4   Geography        10000 non-null  object 
 5   Gender           10000 non-null  object 
 6   Age              10000 non-null  int64  
 7   Tenure           10000 non-null  int64  
 8   Balance          10000 non-null  float64
 9   NumOfProducts    10000 non-null  int64  
 10  HasCrCard        10000 non-null  int64  
 11  IsActiveMember   10000 non-null  int64  
 12  EstimatedSalary  10000 non-null  float64
 13  Exited           10000 non-null  int64  
dtypes: float64(2), int64(9), object(3)
memory usage: 1.1+ MB


In [47]:
df.duplicated().sum()

0

In [48]:
df['Exited'].value_counts()

Exited
0    7963
1    2037
Name: count, dtype: int64

In [49]:
df['Geography'].value_counts()

Geography
France     5014
Germany    2509
Spain      2477
Name: count, dtype: int64

In [50]:
# We will remove first three columns, because we think they are not of our use.
df.drop(columns = ['RowNumber', 'CustomerId', 'Surname'], inplace = True)

In [51]:
df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [52]:
# By sequence our approach must be like this that, we must have performed EDA and then come to know 
# that which columns are of our use and which are not, but our goal here is to see how to train
# neural networks and not to make a perfect model. So we will directly go and build ANN using keras.

# For that first thing which we need to do is to one hot encode these two columns, geography and gender.
df = pd.get_dummies(df, columns = ['Geography','Gender'], drop_first = True)

In [53]:
df.head()

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_Germany,Geography_Spain,Gender_Male
0,619,42,2,0.0,1,1,1,101348.88,1,False,False,False
1,608,41,1,83807.86,1,0,1,112542.58,0,False,True,False
2,502,42,8,159660.8,3,1,0,113931.57,1,False,False,False
3,699,39,1,0.0,2,0,0,93826.63,0,False,False,False
4,850,43,2,125510.82,1,1,1,79084.1,0,False,True,False


In [54]:
# We will scale our values before doing model training.
X = df.drop(columns = ['Exited'])
y = df['Exited']
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)

In [55]:
X_train.shape,X_test.shape

((8000, 11), (2000, 11))

In [56]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [57]:
X_train_scaled

array([[-0.23082038, -0.94449979, -0.70174202, ...,  1.71490137,
        -0.57273139,  0.91509065],
       [-0.25150912, -0.94449979, -0.35520275, ..., -0.58312392,
        -0.57273139, -1.09278791],
       [-0.3963303 ,  0.77498705,  0.33787579, ...,  1.71490137,
        -0.57273139, -1.09278791],
       ...,
       [ 0.22433188,  0.58393295,  1.3774936 , ..., -0.58312392,
        -0.57273139, -1.09278791],
       [ 0.13123255,  0.01077067,  1.03095433, ..., -0.58312392,
        -0.57273139, -1.09278791],
       [ 1.1656695 ,  0.29735181,  0.33787579, ...,  1.71490137,
        -0.57273139,  0.91509065]])

In [58]:
!pip install tensorflow





In [59]:
import tensorflow 
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense




In [60]:
# In keras, we can create two types of models, 1. Sequential. 2. Non Sequential.
model = Sequential()
# we will have input layer, hidden layer and output layer.
model.add(Dense(3, activation = 'sigmoid', input_dim = 11)) # input layer.
model.add(Dense(1, activation = 'sigmoid')) # output layer.
# Above is the architecture of our neural networks.




In [61]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 3)                 36        
                                                                 
 dense_1 (Dense)             (None, 1)                 4         
                                                                 
Total params: 40 (160.00 Byte)
Trainable params: 40 (160.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [62]:
# Here we need to do compilation.
model.compile(loss = 'binary_crossentropy', optimizer = 'Adam')




In [63]:
model.fit(X_train_scaled, y_train, epochs = 10) # this is the stage where our neural network will get trained.

Epoch 1/10

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x1cd5dcdecd0>

In [64]:
# Now, we will see that where the output of this weights and biases are stored.
model.layers[0].get_weights() # We have weight and biases in below two arrays.

[array([[ 2.16226846e-01, -8.41929615e-02, -3.06858290e-02],
        [-2.98229903e-01,  9.67987180e-01,  1.02944124e+00],
        [ 4.13845330e-01,  2.69055454e-04,  1.22349389e-01],
        [-8.54071304e-02,  4.23032016e-01,  1.54000416e-01],
        [ 1.08886790e+00, -8.71080626e-03,  3.45375724e-02],
        [-2.98904300e-01, -1.76487222e-01, -1.01397268e-01],
        [ 1.14436455e-01, -2.98587859e-01, -7.26337373e-01],
        [-1.79310665e-01, -1.07024536e-02,  1.61056772e-01],
        [-6.34643659e-02,  5.04090726e-01,  5.95310509e-01],
        [ 3.25979233e-01,  5.26059940e-02, -1.62620783e-01],
        [-3.40762772e-02, -5.06890237e-01, -2.02399999e-01]], dtype=float32),
 array([ 0.03843655, -0.74709576, -0.8329615 ], dtype=float32)]

In [66]:
y_log = model.predict(X_test_scaled) # Prediction for test set.
y_log



array([[0.20919088],
       [0.23957407],
       [0.2061685 ],
       ...,
       [0.1661587 ],
       [0.2269552 ],
       [0.31218794]], dtype=float32)

In [69]:
# The thing which we need to notice here is that our output is neither 0 nor 1. 
# The reason is we are using sigmoid function and the output of the sigmoid function is a 
# probability between 0 and 1. Now, this is our work to convert this probability between 0 and 1.
# It is easy, we just need to decide a threshold. For threshold, we must see ROC, AUC, AUC curve but,
# here we are just learning so we just take threshold as 0.5.

y_pred = np.where(y_log > 0.5, 1, 0)

In [70]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

0.7925

In [71]:
# We can do some basic changes and then we can increase the accuracy of our model. Here it is 79%.
# 1. We can increase the number of epochs.
# 2. Generally it is seen as, if you keep your activation function to be ReLu then it gives 
#    a better result.
# 3. We can increase the number of nodes within hidden layer.