# Hyperparamter Tuning

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder,OneHotEncoder
from sklearn.pipeline import Pipeline
from scikeras.wrappers import KerasClassifier
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
import pickle

In [2]:
data = pd.read_csv("Churn_Modelling.csv")

In [4]:
data = data.drop(['RowNumber','CustomerId','Surname'],axis=1)

label_gender = LabelEncoder()
data['Gender'] = label_gender.fit_transform(data['Gender'])

one_geo = OneHotEncoder(sparse_output=False)
geo_encode = one_geo.fit_transform(data[['Geography']])
geo_encoded_df= pd.DataFrame(geo_encode,columns=one_geo.get_feature_names_out(['Geography']))

data = pd.concat([data.drop('Geography',axis=1),geo_encoded_df],axis =1)

X = data.drop('Exited',axis=1)
y = data['Exited']

x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

scaler =StandardScaler()
x_train= scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

with open('gender_encoder.pkl','wb') as file:
    pickle.dump(label_gender,file)

with open('geo_encoder.pkl','wb') as file:
    pickle.dump(one_geo,file)

with open('scaler.pkl','wb') as file:
    pickle.dump(scaler,file)

In [13]:
# define a function to create model and try different parameters

def create_model(neurons = 32,layers =1):
    model = Sequential()
    model.add(Dense(neurons,activation='relu',input_shape= (x_train.shape[1],)))

    for _ in range(layers-1):
        model.add(Dense(neurons,activation ='relu'))

    model.add(Dense(1,activation ='sigmoid'))
    model.compile(optimizer = 'adam',loss = 'binary_crossentropy',metrics = ['accuracy'])

    return model

In [14]:
# create a keras classifer

model = KerasClassifier(neurons = 32,layers =1,build_fn=create_model,epochs=100,batch_size=10,verbose=0)

In [20]:
# create param  grid
param_grid = {
    'neurons':[16,32,64,128],
    'layers': [1,2,3],
    'batch_size':[20],
    'epochs' : [50,100]
}

In [22]:
# perform grid search
from sklearn.model_selection import GridSearchCV
grid = GridSearchCV(estimator=model,param_grid=param_grid,n_jobs = -1,cv=5,verbose=1)
grid_result = grid.fit(x_train,y_train)

#print the best parameters

print("Best: %f using %s "% (grid_result.best_score_,grid_result.best_params_))

Fitting 5 folds for each of 24 candidates, totalling 120 fits


  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Best: 0.859875 using {'batch_size': 20, 'epochs': 100, 'layers': 1, 'neurons': 16} 
