# Part-1 Data PreProcessing

# Importing the Libraries

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [2]:
tf.__version__

'2.16.1'

In [3]:
dataset=pd.read_csv('Churn_Modelling.csv')
X=dataset.iloc[:, 3:-1].values
y=dataset.iloc[:, -1].values

In [4]:
print(X)

[[619 'France' 'Female' ... 1 1 101348.88]
 [608 'Spain' 'Female' ... 0 1 112542.58]
 [502 'France' 'Female' ... 1 0 113931.57]
 ...
 [709 'France' 'Female' ... 0 1 42085.58]
 [772 'Germany' 'Male' ... 1 0 92888.52]
 [792 'France' 'Female' ... 1 0 38190.78]]


In [5]:
print(y)

[1 0 1 ... 1 1 0]


# Take care of missing data

In [6]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   RowNumber        10000 non-null  int64  
 1   CustomerId       10000 non-null  int64  
 2   Surname          10000 non-null  object 
 3   CreditScore      10000 non-null  int64  
 4   Geography        10000 non-null  object 
 5   Gender           10000 non-null  object 
 6   Age              10000 non-null  int64  
 7   Tenure           10000 non-null  int64  
 8   Balance          10000 non-null  float64
 9   NumOfProducts    10000 non-null  int64  
 10  HasCrCard        10000 non-null  int64  
 11  IsActiveMember   10000 non-null  int64  
 12  EstimatedSalary  10000 non-null  float64
 13  Exited           10000 non-null  int64  
dtypes: float64(2), int64(9), object(3)
memory usage: 1.1+ MB


In [7]:
from sklearn.impute import SimpleImputer
imputer=SimpleImputer(missing_values=np.nan, strategy='mean')
imputer.fit(X[:, 3:-1])
X[:, 3:-1]=imputer.transform(X[:, 3:-1])

In [8]:
print(X[:, 3:-1])

[[42.0 2.0 0.0 1.0 1.0 1.0]
 [41.0 1.0 83807.86 1.0 0.0 1.0]
 [42.0 8.0 159660.8 3.0 1.0 0.0]
 ...
 [36.0 7.0 0.0 1.0 0.0 1.0]
 [42.0 3.0 75075.31 2.0 1.0 0.0]
 [28.0 4.0 130142.79 1.0 1.0 0.0]]


# Encode Categorical Data

# One Hot Encoding (Geography)

In [9]:
print(X[:, 2])

['Female' 'Female' 'Female' ... 'Female' 'Male' 'Female']


In [10]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [11]:
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
X[:,4]=le.fit_transform(X[:, 4])

# LabelEncoding 'Gender'

In [12]:
print(X[:,4])

[0 0 0 ... 0 1 0]


In [13]:
dataset.select_dtypes(include=['object'])

Unnamed: 0,Surname,Geography,Gender
0,Hargrave,France,Female
1,Hill,Spain,Female
2,Onio,France,Female
3,Boni,France,Female
4,Mitchell,Spain,Female
...,...,...,...
9995,Obijiaku,France,Male
9996,Johnstone,France,Male
9997,Liu,France,Female
9998,Sabbatini,Germany,Male


# Splitting Data into Training and Test Set 

In [14]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=0)

In [15]:
print(X_train)

[[0.0 0.0 1.0 ... 1.0 0.0 163830.64]
 [0.0 1.0 0.0 ... 1.0 1.0 57098.0]
 [1.0 0.0 0.0 ... 1.0 0.0 185630.76]
 ...
 [1.0 0.0 0.0 ... 1.0 0.0 181429.87]
 [0.0 0.0 1.0 ... 1.0 1.0 148750.16]
 [0.0 1.0 0.0 ... 1.0 0.0 118855.26]]


print similarily X_test,y_train,y_test

# Feature Scaling (While building an ANN we will apply feature scaling on all the features of X_train and X_test)

In [16]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train= sc.fit_transform(X_train)
X_test= sc.transform(X_test)

In [17]:
print(X_train)

[[-1.01460667 -0.5698444   1.74309049 ...  0.64259497 -1.03227043
   1.10643166]
 [-1.01460667  1.75486502 -0.57369368 ...  0.64259497  0.9687384
  -0.74866447]
 [ 0.98560362 -0.5698444  -0.57369368 ...  0.64259497 -1.03227043
   1.48533467]
 ...
 [ 0.98560362 -0.5698444  -0.57369368 ...  0.64259497 -1.03227043
   1.41231994]
 [-1.01460667 -0.5698444   1.74309049 ...  0.64259497  0.9687384
   0.84432121]
 [-1.01460667  1.75486502 -0.57369368 ...  0.64259497 -1.03227043
   0.32472465]]


In [18]:
print(X_test)

[[-1.01460667  1.75486502 -0.57369368 ...  0.64259497  0.9687384
   1.61085707]
 [ 0.98560362 -0.5698444  -0.57369368 ...  0.64259497 -1.03227043
   0.49587037]
 [-1.01460667 -0.5698444   1.74309049 ...  0.64259497  0.9687384
  -0.42478674]
 ...
 [-1.01460667 -0.5698444   1.74309049 ...  0.64259497 -1.03227043
   0.71888467]
 [-1.01460667  1.75486502 -0.57369368 ...  0.64259497  0.9687384
  -1.54507805]
 [-1.01460667  1.75486502 -0.57369368 ...  0.64259497 -1.03227043
   1.61255917]]


# Part-2 Building ANN

In [19]:
ann=tf.keras.models.Sequential()
# keras-library integrated into tf now
# models = module
# Sequential() = Class to build NN by stacking layers sequentially

Add input layer and first hidden layer 

In [20]:
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))
# Dense = type of layer in a neural network where all neurons in the layer are connected to every neuron in the preceding layer.
# Dense(contd.) = It's also called a fully connected layer.
# units(Dimensions) = no. of neurons in dense layer, No rule to choose the value to units, fully experimental acc. to no. of features in dataset
# activation-function 'rectifier'-function in hidden layers


Add second hidden layer

In [21]:
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))
# Just a copy of previous one

Add Output Layer

In [22]:
ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))
# Here, units=1 because a binary output 0/1 , if in case different classes like A,B,C would have been there, 
# then 3 neurons or units or dimensions would have been there in output layer
# 'Sigmoid' Activation-function in output layer since it gives probabilities of all possibilities for each entity.

# IMPORTANT :-
#in case of non-binary classification/ more than two categories in outcome, we use 'soft_max'rather than 'sigmoid'

# Part-3 Training the ANN

Compiling ANN

In [23]:
ann.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# adam-optimizer = updates the weight through SGD
# loss = for binary-classification/ predicting binary-outcome, we use 'binary_crossentropy',
# for non-binary classification, we use 'categorical_crossentropy'
# metrics = we can choose several metrics simultaneously, but we chose most important one here, 'accuracy'.

Training the ANN on training-set

In [24]:
ann.fit(X_train,y_train,batch_size=32,epochs=100)
# Here, we are doing BatchLearning which has default size=32, but you can change according to your choice

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 362us/step - accuracy: 0.6171 - loss: 0.6561
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 322us/step - accuracy: 0.7921 - loss: 0.4855
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 330us/step - accuracy: 0.7951 - loss: 0.4554
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 309us/step - accuracy: 0.8013 - loss: 0.4387
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 300us/step - accuracy: 0.7909 - loss: 0.4416
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 301us/step - accuracy: 0.7968 - loss: 0.4318
Epoch 7/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 300us/step - accuracy: 0.7931 - loss: 0.4279
Epoch 8/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 300us/step - accuracy: 0.7904 - loss: 0.4280
Epoch 9/100
[1m

<keras.src.callbacks.history.History at 0x165479900>

# Part-4 Making Predictions and Evaluating the Model

Predicting the result of a single observation

In [25]:
print(ann.predict(sc.transform([[1,0,0,600,1,40,3,60000,2,1,1,50000]])))
# Entering dummy variables values for 'Geography' and 'Gender'

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[[0.02410218]]


In [26]:
# since probability of 'Exited' is ˜0 , therefore we are not saying goodbye to the customer.

Predicting the results for whole test set

In [27]:
y_pred = ann.predict(X_test)
y_pred=(y_pred>0.5)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 260us/step
[[0 0]
 [0 1]
 [0 0]
 ...
 [0 0]
 [0 0]
 [0 0]]


Making the Confusion Matrix

Structure :-

                    Predicted
                Positive  Negative

       Positive    50        10
Actual
      
      Negative    5         35


In [28]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[1522   73]
 [ 198  207]]


0.8645