<a href="https://colab.research.google.com/github/Tanayshah29/Churn_Modelling/blob/main/Churn_Modelling_Hyperparameter_tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#Importing the libraries 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
#read the dataset from google drive
from google.colab import drive
drive.mount('/content/drive')
data = pd.read_csv('/content/sample_data/Churn_Modelling.csv')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
#Display the first 5 rows of the dataset
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
# Clearly it can be seen that first 3 columns makes no sense in classification so just removing the 3 columns from the independent features list
X = data.iloc[:, 3:13]
y = data.iloc[:, 13]

In [5]:
X.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,France,Female,42,2,0.0,1,1,1,101348.88
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58
2,502,France,Female,42,8,159660.8,3,1,0,113931.57
3,699,France,Female,39,1,0.0,2,0,0,93826.63
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1


In [6]:
y.head()

0    1
1    0
2    1
3    0
4    0
Name: Exited, dtype: int64

In [7]:
#Since we can see the 2 columns Geography and Gender as categorical we can make them dummie features.
geography = pd.get_dummies(X['Geography'], drop_first= True)
gender = pd.get_dummies(X['Gender'], drop_first= True)

In [8]:
#Then Concatenate the dummie columns with our X- independent features
X =pd.concat([X,geography,gender], axis=1)

In [9]:
X.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Germany,Spain,Male
0,619,France,Female,42,2,0.0,1,1,1,101348.88,0,0,0
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0,1,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,0,0,0
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0,0,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0,1,0


In [10]:
#Now lets drop the Geography and Gender columns it's of no use.
X = X.drop(columns=['Geography', 'Gender'], axis=1)

In [11]:
X.head()

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Germany,Spain,Male
0,619,42,2,0.0,1,1,1,101348.88,0,0,0
1,608,41,1,83807.86,1,0,1,112542.58,0,1,0
2,502,42,8,159660.8,3,1,0,113931.57,0,0,0
3,699,39,1,0.0,2,0,0,93826.63,0,0,0
4,850,43,2,125510.82,1,1,1,79084.1,0,1,0


In [12]:
#Splitting the datset into test and train split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=0)

In [13]:
X_train.shape

(8000, 11)

In [14]:
X_test.shape

(2000, 11)

In [15]:
y_train.shape

(8000,)

In [16]:
y_test.shape

(2000,)

In [17]:
#Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [18]:
# Now it's time to make our ANN and so these are the required libraries
import keras
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier
from keras.models import Sequential
from keras.layers import Dense, Activation, Embedding, Flatten, LeakyReLU, BatchNormalization
from keras.layers import LeakyReLU, ELU, PReLU
from keras.activations import relu, sigmoid
from keras.layers import Dropout

In [19]:
def create_model(layers, activation):

  model = Sequential()
  for i, nodes in enumerate(layers):

    if i==0:

      model.add(Dense(nodes, input_dim=X_train.shape[1]))
      model.add(Activation(activation))
      model.add(Dropout(0.3))
    else:
      model.add(Dense(nodes))
      model.add(Activation(activation))
      model.add(Dropout(0.3))
  model.add(Dense(units=1, kernel_initializer='glorot_uniform', activation='sigmoid'))
  model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
  return model  

In [20]:
model = KerasClassifier(build_fn=create_model, verbose=0)

In [24]:
layers=[(20,), (40,20),(45,30,15)]
activations = ['sigmoid', 'relu']
param_grid = dict(layers=layers, activation=activations, batch_size=[128,256], epochs=[30])
grid = GridSearchCV(estimator=model, param_grid =param_grid, cv=5)

In [25]:
grid_result = grid.fit(X_train, y_train)

In [26]:
print(grid_result.best_score_, grid_result.best_params_)

0.8550000071525574 {'activation': 'relu', 'batch_size': 128, 'epochs': 30, 'layers': (40, 20)}


In [30]:
# Predicting the Test set results
y_pred = grid.predict(X_test)
y_pred = (y_pred > 0.5)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

# Calculate the Accuracy
from sklearn.metrics import accuracy_score
score=accuracy_score(y_pred,y_test)

In [31]:
cm

array([[1548,   47],
       [ 220,  185]])

In [32]:
score

0.8665