## Binary Classification using ANN

In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('./Datasets/Churn_Modelling.xls')
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [3]:
df.drop(columns=['RowNumber', 'CustomerId', 'Surname'], inplace=True)

In [4]:
df.shape

(10000, 11)

In [5]:
df.isnull().sum()

CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64

In [6]:
df.duplicated().sum()

np.int64(0)

In [7]:
df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [8]:
df['Geography'].value_counts()

Geography
France     5014
Germany    2509
Spain      2477
Name: count, dtype: int64

In [9]:
df['NumOfProducts'].value_counts()

NumOfProducts
1    5084
2    4590
3     266
4      60
Name: count, dtype: int64

In [10]:
from sklearn.model_selection import train_test_split

X = df.drop(columns="Exited")
y = df["Exited"]
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [11]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder

transformer = ColumnTransformer(
    transformers = [
        ('scaler', StandardScaler(), ['CreditScore', 'Age', 'EstimatedSalary']),
        ('encoder', OneHotEncoder(sparse_output=False, drop='first'), ['Gender', 'Geography'])
    ],
    remainder='passthrough',
    verbose_feature_names_out=False
).set_output(transform="pandas")

In [12]:
X_train_transformed = transformer.fit_transform(X_train)
X_test_transformed = transformer.transform(X_test)

X_train_transformed.head()

Unnamed: 0,CreditScore,Age,EstimatedSalary,Gender_Male,Geography_Germany,Geography_Spain,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember
569,1.134864,-0.757588,1.156018,1.0,0.0,0.0,7,0.0,3,1,1
3630,0.047405,0.663221,1.614991,1.0,0.0,0.0,9,143267.14,2,0,0
5985,0.554886,-0.568147,-0.61417,1.0,0.0,1.0,7,68423.89,1,1,1
2324,0.306324,-0.94703,-0.837581,0.0,0.0,0.0,2,148143.84,1,1,1
5530,-0.139017,-0.473426,-0.04252,1.0,0.0,0.0,6,114543.27,1,1,1


# NEURAL NETWORK

In [38]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, InputLayer

In [28]:
model = Sequential()

model.add(InputLayer(shape=(11,)))                 # Input Layer (11 Nodes)  - (11, ) means 1 dimensional array 11 columns 
model.add(Dense(units=3, activation='sigmoid'))    # Hidden Layer (3 Nodes)  
model.add(Dense(units=1, activation='sigmoid'))    # Output Layer (1 Node)

In [29]:
model.summary()

In [30]:
model.compile(optimizer="Adam", loss="binary_crossentropy")

In [31]:
model.fit(X_train_transformed, y_train, epochs=10)

Epoch 1/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.0498 
Epoch 2/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 946us/step - loss: 0.7745
Epoch 3/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 917us/step - loss: 0.6411
Epoch 4/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 935us/step - loss: 0.5740
Epoch 5/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 964us/step - loss: 0.5411
Epoch 6/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 935us/step - loss: 0.5214
Epoch 7/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 930us/step - loss: 0.5046
Epoch 8/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 929us/step - loss: 0.4992
Epoch 9/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 899us/step - loss: 0.4995
Epoch 10/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s

<keras.src.callbacks.history.History at 0x7fe7307eaed0>

In [32]:
model.layers[0].get_weights()    # weights coming out of L0 (input layer)

[array([[ 0.09557378,  0.57633454, -0.34717935],
        [ 0.5425525 , -1.0761465 ,  0.24841519],
        [-0.06394098, -0.28515834,  0.452155  ],
        [ 0.16745037,  0.75086826, -0.5619638 ],
        [ 0.49998665,  0.601987  ,  0.14456135],
        [ 0.0824817 ,  0.12291656, -0.61195725],
        [-0.05303614,  0.00628385, -0.18174975],
        [ 0.49232578,  0.3537991 , -0.17132264],
        [-1.0462902 ,  0.7935514 , -0.5537071 ],
        [ 0.21760587,  0.4894592 , -0.6755667 ],
        [-0.92215383,  0.21198891,  0.18323056]], dtype=float32),
 array([-0.40865973,  0.2990406 , -0.27090523], dtype=float32)]

In [33]:
model.layers[1].get_weights()    # weights coming out of L1 (hidden layer)

[array([[ 0.1948435 ],
        [-0.5243137 ],
        [-0.02431898]], dtype=float32),
 array([-0.9512189], dtype=float32)]

In [34]:
# since we are using sigmoid we will get the probabilities
y_pred_prob = model.predict(X_test_transformed)
y_pred_prob

[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


array([[0.21743295],
       [0.18845417],
       [0.21743295],
       ...,
       [0.21743295],
       [0.18801329],
       [0.19400078]], dtype=float32)

In [35]:
# converting to 0 or 1
y_pred = np.where(y_pred_prob >= 0.5, 1, 0)
y_pred

array([[0],
       [0],
       [0],
       ...,
       [0],
       [0],
       [0]])

In [36]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

print(accuracy_score(y_test, y_pred))
print(precision_score(y_test, y_pred))
print(recall_score(y_test, y_pred))

0.7896
0.0
0.0


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# IMPROVING MODEL PERFORMANCE

In [56]:
# added an additional hidden layer
# increased number of nodes in hidden layer
# changed activation function of hidden layers to relu

model = Sequential()

model.add(InputLayer(shape=(11,)))
model.add(Dense(11, activation="relu"))
model.add(Dense(11, activation="relu"))
model.add(Dense(1, activation="sigmoid"))

In [57]:
model.summary()

In [58]:
model.compile(loss="binary_crossentropy", optimizer="Adam", metrics=["accuracy"])    # metrics to log the accuracy during fit

In [59]:
history = model.fit(X_train_transformed, y_train, epochs=30, validation_split=0.2)    # validation split to add validation set 

Epoch 1/30
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.5219 - loss: 2160.6772 - val_accuracy: 0.8000 - val_loss: 59.8184
Epoch 2/30
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7304 - loss: 19.2030 - val_accuracy: 0.7980 - val_loss: 39.5465
Epoch 3/30
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7129 - loss: 29.7491 - val_accuracy: 0.7980 - val_loss: 15.1040
Epoch 4/30
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7099 - loss: 31.5697 - val_accuracy: 0.7980 - val_loss: 4.6703
Epoch 5/30
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6989 - loss: 33.2255 - val_accuracy: 0.8000 - val_loss: 6.6569
Epoch 6/30
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7171 - loss: 29.2439 - val_accuracy: 0.8007 - val_loss: 62.1834
Epoch 7/30
[1m1

- #### Test accuracy higher compared to validation accuracy (72, 80) => Overfitting => Reduce nodes, layers etc and experiment
- #### Training loss fluctuates a lot => Model might be having difficulty converging => reduce learning rate
- #### 