In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [None]:
tf.__version__

In [None]:
dataset = pd.read_csv('Churn_Modelling.csv')
X = dataset.iloc[:, 3:-1].values
y = dataset.iloc[:, -1].values

In [None]:
print(X)

In [None]:
print(y)

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:, 2] = le.fit_transform(X[:, 2])
print(X)

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')      # One hot encoding shifts the variable converted to the first column
X = np.array(ct.fit_transform(X))                                                                      # Hence, the 1.0 0.0 0.0 come before credit score column
print(X)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)                       # ALWAYS apply feature scaling to all columns in ANN's irrespective of values of the column

In [None]:
ann = tf.keras.models.Sequential()

In [None]:
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))

In [None]:
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))

In [None]:
ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid')) # If we had a non binary output var, we would need that many units and we would one hot encode them

In [None]:
ann.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

Whenever we have binary outcomes(Yes/No), we use loss = 'binary_crossentropy'
But if we had many different outputs possible, we would use loss='categorical_crossentropy'
Also, in that case we would have to use: model.add(tf.keras.layers.Dense(units=3, activation='softmax'))

In [None]:
ann.fit(X_train, y_train, batch_size = 32, epochs = 100)

__Predicting the result of a single observation:__

Using the ANN model to predict if the customer with the following informations will leave the bank:

Geography: France
Credit Score: 600
Gender: Male
Age: 40 years old
Tenure: 3 years
Balance: 60000
Number of Products: 2
Does this customer have a credit card? Yes
Is this customer an Active Member: Yes
Estimated Salary: $ 50000
So, should we say goodbye to that customer?

In [None]:
prediction = ann.predict(sc.transform([[1, 0, 0, 600, 1, 40, 3, 60000, 2, 1, 1, 50000]]))
temp = prediction[0][0]
print(temp > 0.5) 
# Predict method always needs 2D array, hence the [[]]
# Don't enter France here, we have to enter the one hot encoded value, And be careful to include these values in the first three columns, 
# because the dummy variables are always created in the first columns.
# We have to apply predict function to sc.transform as all values were feature scaled initially
# Probability > 0.5 means prediction = Yes in this case, adjust this acc to need
if(temp>0.5):
    print("Customer is likely to leave this bank")
else:
    print("Customer is likely to stay in this bank")

In [None]:
prediction = ann.predict(sc.transform([[1, 0, 0, 600, 1, 40, 3, 60000, 2, 1, 1, 50000]]))
probability = prediction[0][0] * 100
rounded_probability = round(probability, 2)
print("The probability that this customer will leave the bank is:", rounded_probability, "%")


__Predicting The Test Results:__

In [None]:
y_pred = ann.predict(X_test)
y_pred = (y_pred > 0.5)         # Converting all probabilities to either 0 or 1 by checking >0.5
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

__Making the Confusion Matrix:__

In [33]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)

[[1515   80]
 [ 198  207]]


__Checking Final Accuracy:__

In [36]:
print(accuracy_score(y_test, y_pred)*100, "%")

86.1 %
