### Import Libraries :

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [3]:
dataset = pd.read_csv('Churn_Modelling.csv')  #import dataset

In [4]:
dataset

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [5]:
x = dataset.iloc[:, 3:-1].values  # First 3 Column are useless data that's why starting from 4th column
y = dataset.iloc[:, -1].values

In [6]:
print(x)
print("\n")
print(y)

[[619 'France' 'Female' ... 1 1 101348.88]
 [608 'Spain' 'Female' ... 0 1 112542.58]
 [502 'France' 'Female' ... 1 0 113931.57]
 ...
 [709 'France' 'Female' ... 0 1 42085.58]
 [772 'Germany' 'Male' ... 1 0 92888.52]
 [792 'France' 'Female' ... 1 0 38190.78]]


[1 0 1 ... 1 1 0]


### Gender is Categorial data. So we will convert it into numerical labels. So use LabelEncoder to convert it

In [8]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
x[:,2] = le.fit_transform(x[:,2])

In [9]:
x[:, 2]  # Female is count as 0 and Male as 1

array([0, 0, 0, ..., 0, 1, 0], dtype=object)

In [11]:
print(x)

[[619 'France' 0 ... 1 1 101348.88]
 [608 'Spain' 0 ... 0 1 112542.58]
 [502 'France' 0 ... 1 0 113931.57]
 ...
 [709 'France' 0 ... 0 1 42085.58]
 [772 'Germany' 1 ... 1 0 92888.52]
 [792 'France' 0 ... 1 0 38190.78]]


In [29]:
print(dataset["Geography"])

0        France
1         Spain
2        France
3        France
4         Spain
         ...   
9995     France
9996     France
9997     France
9998    Germany
9999     France
Name: Geography, Length: 10000, dtype: object


In [13]:
dataset["Geography"].unique()

array(['France', 'Spain', 'Germany'], dtype=object)

### "Geography" contain 3 categorial elements so we will use OneHotEncoder to convert categorial into a numerical format

In [14]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')
x = np.array(ct.fit_transform(x))

In [16]:
print(x)

[[1.0 0.0 0.0 ... 1 1 101348.88]
 [0.0 0.0 1.0 ... 0 1 112542.58]
 [1.0 0.0 0.0 ... 1 0 113931.57]
 ...
 [1.0 0.0 0.0 ... 0 1 42085.58]
 [0.0 1.0 0.0 ... 1 0 92888.52]
 [1.0 0.0 0.0 ... 1 0 38190.78]]


### Splitting the dataset into the Training set and Test set

In [48]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 1)

### Feature Scaling

standardizes the features in both the training and test datasets using the StandardScaler, ensuring that the data is on the same scale for better model training and evaluation.

In [49]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

# Now ANN

In [50]:
ann = tf.keras.models.Sequential()    #initializes a new neural network

Adding the layer.

In [51]:
ann.add(tf.keras.layers.Dense(units=10, activation='relu'))  # adds a fully connected layer with 10 neurons and ReLU(Rectified Linear Unit) helps introduce non-linearity into the model

Adding output layer.

In [52]:
ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))  # adds a fully connected layer with 1 neuron and sigmoid activation function, typically used for binary classification tasks in neural networks.

# Training the ANN

Configures the neural network model for training with the Adam optimizer, binary cross-entropy loss function, and accuracy metric for evaluation.

In [53]:
ann.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

### Now we will trains the neural network model ann on the input data x_train and labels y_train for 25 epochs using batches of 32 samples for each update.

training data will be divided into batches of 32 samples each, and the model will be updated after processing each batch.

 In this case, the training process will iterate over the entire dataset 25 times to update the model's weights and improve its performance.

In [54]:
ann.fit(x_train, y_train, batch_size = 32, epochs = 25)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.src.callbacks.History at 0x7d8b19b03400>

## Making the predictions

In [55]:
print(ann.predict(sc.transform([[1, 0, 0, 600, 1, 40, 3, 60000, 2, 1, 1, 50000]])))

[[0.03668727]]


## Predicting the Test set results

 If a predicted value is greater than 0.5, it will be set to True, otherwise False

Combines the reshaped predicted values and actual values into a single array for evaluation or comparison purposes.

In [56]:
y_pred = ann.predict(x_test)
y_pred = (y_pred > 0.5)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[0 0]
 [0 0]
 [0 0]
 ...
 [0 0]
 [0 0]
 [0 0]]


## Confusing Matrix
### Now Print the confusion matrix based on the true and predicted labels

In [57]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)

[[1526   59]
 [ 218  197]]


### Accuracy

In [58]:
accuracy_score(y_test, y_pred)

0.8615