In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder,LabelEncoder
from sklearn.pipeline import Pipeline
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
from scikeras.wrappers import KerasClassifier
import pickle

In [3]:
data=pd.read_csv('data/Churn_Modelling.csv')

In [4]:
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [5]:
data=data.drop(['RowNumber','CustomerId','Surname'],axis=1)

# Encoding categorical variables
label_encoder_gender=LabelEncoder()
data['Gender']=label_encoder_gender.fit_transform(data['Gender'])

# One-hot encoding for 'Geography'
onehot_encoder_geo=OneHotEncoder(handle_unknown='ignore')
geo_encoded=onehot_encoder_geo.fit_transform(data[['Geography']]).toarray()
geo_df=pd.DataFrame(geo_encoded,columns=onehot_encoder_geo.get_feature_names_out(['Geography']))

# Concatenate the one-hot encoded columns back to the original dataframe
data=pd.concat([data,geo_df],axis=1)
data=data.drop('Geography',axis=1)

# Splitting features and target variable
X=data.drop('Exited',axis=1)
y=data['Exited']

In [8]:
# Splitting the dataset into training and testing sets
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

# Feature scaling
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

# save the scaler,encoder  aready done so 
  

In [9]:
# define a function to create the model and try different hyperparameters (KerasClassifier)
def create_model(neurons=32, layers=1): 
    model=Sequential()
    model.add(Dense(neurons,activation='relu',input_shape=(X_train.shape[1],))) # input layer + first hidden layer

    for _ in range(layers-1): # add additional hidden layers if layers > 1
        model.add(Dense(neurons,activation='relu')) # add hidden layer

    model.add(Dense(1,activation='sigmoid')) # output layer
    model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy']) # compile the model
    return model

In [10]:
# create a KerasClassifier
model=KerasClassifier(model=create_model,epochs=50,batch_size=10,verbose=0)

In [12]:
# Define the hyperparameters grid to search
param_grid={
    'model__neurons':[16,32,64,128],
    'model__layers':[1,2,3],
    'epochs':[50,100],
}

In [None]:
# perform grid search 
grid=GridSearchCV(estimator=model,param_grid=param_grid,cv=3,n_jobs=-1) 
grid_result=grid.fit(X_train,y_train) 


In [None]:
# print the best hyperparameters and the corresponding accuracy
print(f"Best Hyperparameters: {grid_result.best_params_}")
print(f"Best Accuracy: {grid_result.best_score_}")