In [1]:
# importing the packages
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.metrics import confusion_matrix, accuracy_score
import keras
from keras.models import Sequential, save_model, load_model
from keras.layers import Dense, Dropout
from scikeras.wrappers import KerasClassifier
from joblib import dump, load
import matplotlib.pyplot as plt
from warnings import filterwarnings

filterwarnings("ignore")



In [3]:
# reading the data into a dataframe
data = pd.read_csv("data/Churn_Modelling.csv")
# display the dataframe
data.head(10)

Unnamed: 0,RowNumber,CustomerId,name,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,roshan,619,Chennai,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,lodha,608,Bangalore,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,garcia,502,Chennai,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,paud,699,Chennai,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,kalbhor,850,Bangalore,Female,43,2,125510.82,1,1,1,79084.1,0
5,6,15574012,zahaldar,645,Bangalore,Male,44,8,113755.78,2,1,0,149756.71,1
6,7,15592531,tamhankar,822,Chennai,Male,50,7,0.0,2,1,1,10062.8,0
7,8,15656148,vichare,376,Mumbai,Female,29,4,115046.74,4,1,0,119346.88,1
8,9,15792365,girdhar,501,Chennai,Male,44,4,142051.07,2,0,1,74940.5,0
9,10,15592389,shetty,684,Chennai,Male,27,2,134603.88,1,1,1,71725.73,0


In [4]:
# reading the data into a dataframe
data = pd.read_csv("data/Churn_Modelling.csv")

# split the dataframe into dependent and independent variables
X = data.iloc[:, 3:13].values
y =data.iloc[:, 13].values

In [5]:
# encode the gender column
label_encoder_gender = LabelEncoder()
X[:, 2] = label_encoder_gender.fit_transform(X[:, 2])
# encode the geography column
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(drop='first'), [1])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

## recorded information
# female = 0, male = 1
# Chennai = [1,0], Bangalore = [0,0], Mumbai = [0,1]

In [6]:
# split the dependent and independent variables into 
# training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=101)
# scaling the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# save the scaler instance to be called in the future
dump(scaler, "model/scaler_instance.joblib")

['model/scaler_instance.joblib']

In [7]:
# Initializing the ANN
# The Optimal Parameters for this model is chosen using grid search CV
classifier = Sequential()

# Adding the input layer and hidden layer
classifier.add(Dense(units=6, activation='relu', input_dim=11))
classifier.add(Dropout(rate=0.1))

# Adding the second hidden layer
classifier.add(Dense(units=6, activation='relu'))
classifier.add(Dropout(rate=0.1))

# Adding the oputput layer
classifier.add(Dense(units=1, activation='sigmoid'))

# compiling the ANN
classifier.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

# Fit the ANN to the training set
classifier.fit(X_train, y_train, batch_size=25, epochs=500)

# save the keras model as an instance
classifier.save("model/my_model.keras", overwrite=True)
dump(classifier, "model/classifier.joblib")


Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

KeyboardInterrupt: 

In [16]:
classifier.save("saved_model")

INFO:tensorflow:Assets written to: saved_model\assets


INFO:tensorflow:Assets written to: saved_model\assets


In [8]:
# predicting the test set results
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)

# Making the confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
print("The confusion Matrix is: \n", conf_matrix,"\nThe accuracy is: ", accuracy)

The confusion Matrix is: 
 [[1510   67]
 [ 198  225]] 
The accuracy is:  0.8675


In [9]:
from joblib import load
from keras.models import load_model

scaler = load("model/scaler_instance.joblib")
classifier = load_model("model/my_model.keras")

In [10]:
# Predicting a Single new customer
"""
Geography = Bangalore
Credit Score = 600
Gender = Male
Age = 40
Tenure = 3
Balance = 60000
Number of Products = 2
Has Credit Card = Yes
Is Active Member = Yes
Estimated Salary = 50000
"""
new_prediction = classifier.predict(scaler.transform(np.array([[1,0,502,0,42,8,159660.8,3,1,0,113931.6]])))
new_pred = (new_prediction > 0.5)
new_prediction[0][0]



0.6752013