In [1]:
import pandas as pd 
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.pipeline import Pipeline
from scikeras.wrappers import KerasClassifier
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
import pickle

In [2]:
data = pd.read_csv("../Data/Churn_Modelling.csv")
data

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [3]:
## Preprocess the data - drop irrelevant features
data.drop(['RowNumber' , 'CustomerId' , 'Surname'] , axis = 1, inplace= True)
data.head()

# Label encode gender
label_encoder = LabelEncoder()
data['Gender'] = label_encoder.fit_transform(data['Gender'])

# One hot encode geography
ohe_encoder = OneHotEncoder()
geo_encoder = ohe_encoder.fit_transform(data[['Geography']])
geo_encoded_df = pd.DataFrame(geo_encoder.toarray() , columns = ohe_encoder.get_feature_names_out(['Geography']))

df = pd.concat([data, geo_encoded_df] , axis = 1).drop('Geography', axis=1)
df

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.00,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.80,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.00,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.10,0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,771,1,39,5,0.00,2,1,0,96270.64,0,1.0,0.0,0.0
9996,516,1,35,10,57369.61,1,1,1,101699.77,0,1.0,0.0,0.0
9997,709,0,36,7,0.00,1,0,1,42085.58,1,1.0,0.0,0.0
9998,772,1,42,3,75075.31,2,1,0,92888.52,1,0.0,1.0,0.0


In [4]:
# Split the data into independent and dependent features
X = df.drop('Exited' , axis = 1)
y = df['Exited']

In [5]:
#train test split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.25, random_state = 33)

# standard scaling
scalar = StandardScaler()
X_train_scaled = scalar.fit_transform(X_train)
X_test_scaled = scalar.transform(X_test)

In [6]:
# Saving all the encoders to pickle file
with open('hypt_label_encoder_gender.pkl' , 'wb') as file:
    pickle.dump(label_encoder, file)
    
with open('hypt_ohe.pkl' , 'wb') as file:
    pickle.dump(ohe_encoder, file)

with open('hypt_scalar.pkl' , 'wb') as file:
    pickle.dump(scalar, file)

In [7]:
## Define a function to create a model and try different parameters
def create_model(neurons=32, layers=1):
    model = Sequential()
    model.add(Dense(neurons, activation = 'relu' , input_shape = (X_train.shape[1],)))
    
    for _ in range(layers - 1):
        model.add(Dense(neurons, activation = 'relu'))
        
    model.add(Dense(1, activation = 'sigmoid'))
    model.compile(optimizer = 'adam' , loss = 'binary_crossentropy' , metrics = ['accuracy'])
    
    return model

In [8]:
## Create a keras classifier
model = KerasClassifier(layers = 1, neurons = 32, model = create_model, verbose = 1)

In [None]:
# Define gridsearch parameters
param_grid = {
    'model_neurons' : [16,32,64,128],
    'model_layers' : [1,2,3],
    'epochs' : [50,100]
}

In [13]:
# perform gridsearchcv
grid = GridSearchCV(estimator = model, param_grid = param_grid, n_jobs = -1, cv =3)
grid_result = grid.fit(X_train_scaled, y_train)

# Print the best parameters
print("Best: %f and %s" % (grid_result.best_score_, grid_result.best_params_))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **

Best: 0.857867 and {'epochs': 50, 'model__layers': 1, 'model__neurons': 64}
