In [56]:
import pandas as pd
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.preprocessing import StandardScaler,LabelEncoder,OneHotEncoder
from sklearn.pipeline import Pipeline
from scikeras.wrappers import KerasClassifier
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping

In [57]:
data=pd.read_csv('Churn_Modelling.csv')

In [58]:
# Drop irrelevant features like RowNumber, CustomerId, Surname
data=data.drop(["RowNumber","CustomerId","Surname"],axis=1)

In [59]:
# Now we will encode our categorical variables Gender and Geography
# Label Encoding of Gender
label_encoder_gender=LabelEncoder()
data['Gender']=label_encoder_gender.fit_transform(data['Gender'])

In [60]:
# One Hot Encoding of Geography Column
from sklearn.preprocessing import OneHotEncoder
onehot_encoder_geo= OneHotEncoder()
geo_encoded= onehot_encoder_geo.fit_transform(data[["Geography"]]).toarray()
geo_encoded


array([[1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       ...,
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.]])

In [61]:
onehot_encoder_geo.get_feature_names_out(["Geography"])

array(['Geography_France', 'Geography_Germany', 'Geography_Spain'],
      dtype=object)

In [62]:
geo_encoded_df=pd.DataFrame(geo_encoded,columns=onehot_encoder_geo.get_feature_names_out(["Geography"]))

In [63]:
# Combine One Hot Encoded columns with original data
data.drop("Geography",axis=1,inplace=True)
data=pd.concat([data,geo_encoded_df],axis=1)
data.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [64]:
import pickle
with open('label_encoder_gender.pkl','wb') as file:
    pickle.dump(label_encoder_gender,file)

with open('onehot_encoder_geo.pkl','wb') as file:
    pickle.dump(onehot_encoder_geo,file)

In [65]:
# Divide the dataset into Independent and Dependent Features
X=data.drop('Exited',axis=1)
y=data['Exited']

# Split the data into training and testing sets
X_train,X_test,y_train,y_test= train_test_split(X,y,test_size=0.2,random_state=42)

# Scale Features
scaler=StandardScaler()
X_train= scaler.fit_transform(X_train)
X_test= scaler.transform(X_test)

In [66]:
def create_model(neurons=32,layers=1):
    model=Sequential()
    # Input Layer
    model.add(Dense(neurons,activation='relu',input_shape=(X_train.shape[1],)))
    # Hidden Layers
    for _ in range(layers-1):
        model.add(Dense(neurons,activation='relu'))
    # Output Layer
    model.add(Dense(1,activation="sigmoid"))
    model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
    return model

In [67]:
model=KerasClassifier(layers=1,neurons=32,build_fn=create_model,verbose=1)

In [68]:
param_grid={
    'neurons':[16,32,64,128],
    'layers':[1,2],
    'epochs':[50,100]
}

In [None]:
grid = GridSearchCV(
        estimator=model,
        param_grid=param_grid,
        n_jobs=-1,  
        cv=3,
        verbose=1
    )
    

# Fit the grid search
grid_result = grid.fit(X_train, y_train)

# Print results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
