# ARTIFICIAL NEURAL NETWORK

In [38]:
# Importing Libraries

import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
import tensorflow as tf

In [4]:
tf.__version__

'2.18.0'

In [7]:
# Data preprocessing

# Import the Dataset

dataset = pd.read_csv(r"/Churn_Modelling.csv")

In [8]:
dataset

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [9]:
dataset.shape

(10000, 14)

In [12]:
# Inependent Variable

x = dataset.iloc[:,3:-1].values

In [16]:
# Dependent variable

y = dataset.iloc[:,-1].values

In [17]:
print(x)

[[619 'France' 'Female' ... 1 1 101348.88]
 [608 'Spain' 'Female' ... 0 1 112542.58]
 [502 'France' 'Female' ... 1 0 113931.57]
 ...
 [709 'France' 'Female' ... 0 1 42085.58]
 [772 'Germany' 'Male' ... 1 0 92888.52]
 [792 'France' 'Female' ... 1 0 38190.78]]


In [18]:
print(y)

[1 0 1 ... 1 1 0]


In [21]:
# converting Categorical text into numerical values using label encoder in Gender column

le = LabelEncoder()

x[:,2] = le.fit_transform(x[:,2])

print(x)

[[619 'France' 0 ... 1 1 101348.88]
 [608 'Spain' 0 ... 0 1 112542.58]
 [502 'France' 0 ... 1 0 113931.57]
 ...
 [709 'France' 0 ... 0 1 42085.58]
 [772 'Germany' 1 ... 1 0 92888.52]
 [792 'France' 0 ... 1 0 38190.78]]


In [24]:
# converting categorical text into numerical using One Hot Encoder for Geography column
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

ct = ColumnTransformer(transformers = [('encoder', OneHotEncoder(), [1])], remainder='passthrough')

x = np.array(ct.fit_transform(x))

In [26]:
# Feature scaling

sc = StandardScaler()

x = sc.fit_transform(x)

print(x)

[[ 0.57873591 -0.57873591  0.99720391 ...  0.64609167  0.97024255
   0.02188649]
 [ 0.57873591 -0.57873591 -1.00280393 ... -1.54776799  0.97024255
   0.21653375]
 [ 0.57873591 -0.57873591  0.99720391 ...  0.64609167 -1.03067011
   0.2406869 ]
 ...
 [ 0.57873591 -0.57873591  0.99720391 ... -1.54776799  0.97024255
  -1.00864308]
 [-1.72790383  1.72790383 -1.00280393 ...  0.64609167 -1.03067011
  -0.12523071]
 [ 0.57873591 -0.57873591  0.99720391 ...  0.64609167 -1.03067011
  -1.07636976]]


In [28]:
# Splitting the Dataset into training set & testing set

x_train,x_test,y_train,y_test = train_test_split(x,y, test_size= 0.2, random_state=0)

In [29]:
# Creating Artificial Neural Network model

ann = tf.keras.models.Sequential()

In [30]:
# adding layers

ann.add(tf.keras.layers.Dense(units = 6, activation = 'relu'))


# Hidden Layer2
ann.add(tf.keras.layers.Dense(units = 6, activation = 'relu'))

# Hidden Layer3
ann.add(tf.keras.layers.Dense(units = 5, activation = 'relu'))

# Hidden Layer4
ann.add(tf.keras.layers.Dense(units = 4, activation = 'relu'))

# output Layer
ann.add(tf.keras.layers.Dense(units = 1, activation = 'sigmoid'))

In [31]:
# Compile the ANN model

ann.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [32]:
# After Compiling Train the model

ann.fit(x_train,y_train, batch_size = 32, epochs = 5)

Epoch 1/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.7880 - loss: 0.5776
Epoch 2/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7898 - loss: 0.4593
Epoch 3/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7963 - loss: 0.4358
Epoch 4/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8033 - loss: 0.4186
Epoch 5/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7982 - loss: 0.4166


<keras.src.callbacks.history.History at 0x7ed4229a1d90>

In [34]:
# Making Predictions

y_pred = ann.predict(x_test)

# Applying Threshold (Converts probability predictions into binary class labels 0 & 1 )
y_pred = (y_pred > 0.5)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


In [35]:
# Compare Predicted values with Actual values by concatinating them side by side

print(np.concatenate((y_pred.reshape(len(y_pred), 1), y_test.reshape(len(y_test),1)), 1))

[[0 0]
 [0 1]
 [0 0]
 ...
 [0 0]
 [0 0]
 [0 0]]


In [37]:
# Making confusion matrix

cm = confusion_matrix(y_test, y_pred)

print(cm)

[[1595    0]
 [ 405    0]]


In [39]:
# Accuracy Score
ac = accuracy_score(y_test,y_pred)

print(ac)

0.7975
