In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

Predict if customer has exited or not

In [4]:
dataset = pd.read_csv("../datasets/Churn_Modelling.csv")

dataset.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


First 3 columns i.e row number , customer id , surname are unnecessary

In [3]:
X = dataset.iloc[: , 3 : 13]
y = dataset.iloc[: , 13]

Convert categorical features to dummy variables drop first variable to avoid dummy variable trap

In [5]:
geography = pd.get_dummies(X['Geography'] , drop_first = True)
gender = pd.get_dummies(X['Gender'] , drop_first = True)

Concatenate table and dummy variables

In [6]:
X = pd.concat([X , geography , gender] , axis = 1)

Now drop the categorical columns which have been converted to dummy variables

In [7]:
X = X.drop(['Geography' , 'Gender'] , axis = 1)

Split dataset into test and train split

In [8]:
from sklearn.model_selection import train_test_split
X_train , X_test , y_train , y_test = train_test_split(X , y , test_size = 0.2 , random_state = 0)

**Feature Scaling** very important

In [9]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## DEEP LEARNING hyperparameter optimization

In [18]:
%pip install scikeras

Note: you may need to restart the kernel to use updated packages.


In [32]:
from tensorflow import keras

from keras.wrappers.scikit_learn import KerasClassifier

#from scikeras.wrappers import KerasClassifier

from sklearn.model_selection import GridSearchCV

from keras.models import Sequential
from keras.layers import Dense , Activation , Embedding , Flatten , LeakyReLU , BatchNormalization , Dropout
from keras.activations import relu , sigmoid

In [33]:
def create_model(layers , activation) :
  model = Sequential()
  for i , nodes in enumerate(layers) :
    # Add number of required neurons which will be mentioned in the layers parameter
    # Apply the activation function that is specified as parameter
    # Add dropout layer
    
    # If first layer
    if i == 0 :
      # input dimensions is number of input features that is obtained from X_train.shape[1]
      model.add(Dense(units = nodes , input_dim = X_train.shape[1]))
      model.add(Activation(activation))
      model.add(Dropout(0.3))
      
    else :
      model.add(Dense(units = nodes))
      model.add(Activation(activation))
      model.add(Dropout(0.3))
      
  # As binary classification problem add sigmoid activation for final layer and use glorot_uniform to initialize weights for final layer which works well with sigmoid
  
  # In last layer as 1 output initialize units = 1
  model.add(Dense(units = 1 , kernel_initializer = 'glorot_uniform' , activation = 'sigmoid'))
  
  # Use adam optimizer and since binary classification use binary_crossentropy as loss function
  model.compile(optimizer = 'adam' , loss = 'binary_crossentropy' , metrics = ['accuracy'])
  
  return model 

In [34]:
# model = KerasClassifier(build_fn = create_model , verbose = 0)

# Build function is function we use to create model i.e create_model this case
model = KerasClassifier(build_fn = create_model , verbose = 0)

  model = KerasClassifier(build_fn = create_model , verbose = 0)


In [35]:
# Combinations to test the model with i.e diff number of layers and diff activation function
layers = [[20] , [40 , 20] , [45 , 30 , 15]]
activations = ['sigmoid' , 'relu']

In [36]:
param_grid = dict(layers = layers , activation = activations , batch_size = [128 , 256] , epochs = [30])

grid = GridSearchCV(estimator = model , param_grid = param_grid , cv = 5)

In [37]:
grid_result = grid.fit(X_train , y_train)

In [38]:
print(grid_result.best_score_ , grid_result.best_params_)

0.856499993801117 {'activation': 'relu', 'batch_size': 128, 'epochs': 30, 'layers': [40, 20]}


In [39]:
pred_y = grid.predict(X_test)



Evaluate the model

In [40]:
y_pred = (pred_y > 0.5)

In [43]:
from sklearn.metrics import confusion_matrix , accuracy_score

cm = confusion_matrix(y_pred , y_test)

cm

array([[1533,  212],
       [  62,  193]], dtype=int64)

In [45]:
score = accuracy_score(y_pred , y_test)

score

0.863