In [11]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from scikeras.wrappers import KerasClassifier
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
import pickle

In [8]:
data=pd.read_csv('Churn_Modelling.csv')

In [9]:
data=data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)

label_encoder_gender=LabelEncoder()
data['Gender']=label_encoder_gender.fit_transform(data['Gender'])

one_hot_encoder_geo=OneHotEncoder()
geo_encoded=data['Geography']=one_hot_encoder_geo.fit_transform(data[['Geography']]).toarray()
geo_encoded_df=pd.DataFrame(geo_encoded, columns=one_hot_encoder_geo.get_feature_names_out(['Geography']))

data=pd.concat([data.drop('Geography', axis=1), geo_encoded_df], axis=1)

X=data.drop('Exited', axis=1)
y=data['Exited']

In [10]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

In [12]:
## Save the encoders and scaler
with open('label_encoder_gender.pkl', 'wb') as file:
    pickle.dump(label_encoder_gender,file)

with open('onehot_encoder.pkl', 'wb') as file:
    pickle.dump(one_hot_encoder_geo, file)

with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler,file)

In [15]:
## Define a function to create the model and try different parameters(KerasClassifier)

def create_model(neurons=32,layers=1):
    model=Sequential()
    model.add(Dense(neurons,activation='relu',input_shape=(X_train.shape[1],)))

    for _ in range(layers-1):
        model.add(Dense(neurons, activation='relu'))
    model.add(Dense(1,activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [19]:
## Create Keras classifier

model=KerasClassifier(layers=1,neurons=32,build_fn=create_model,verbose=0)

In [20]:
param_grid= {
    'neurons': [16,32,64,128],
    'layers':[1,2],
    'epochs':[50,100]
}

In [21]:
grid=GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1,cv=3)
grid_result=grid.fit(X_train, y_train)

print("Best: %f using %s" %(grid_result.best_score_, grid_result.best_params_))

  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Best: 0.857374 using {'epochs': 100, 'layers': 1, 'neurons': 16}
