In [87]:
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
import pickle

In [51]:
df = pd.read_csv(r"Churn_Modelling.csv")
df

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [52]:
df.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1, inplace=True)

In [53]:
scaler = pickle.load(open('res/scaler.pkl', 'rb'))
geo_encoder = pickle.load(open('res/geo_encoder.pkl', 'rb'))
gender_encoder = pickle.load(open('res/gender_encoder.pkl', 'rb'))

In [54]:
df['Gender'] = gender_encoder.transform(df['Gender'])

In [55]:
data = geo_encoder.transform(df[['Geography']])

In [56]:
onehot_names = geo_encoder.get_feature_names_out(['Geography'])
onehot_names

array(['Geography_France', 'Geography_Germany', 'Geography_Spain'],
      dtype=object)

In [57]:
df[onehot_names] = data.toarray()

In [59]:
df.drop('Geography', axis=1, inplace=True)

In [60]:
df

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.00,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.80,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.00,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.10,0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,771,1,39,5,0.00,2,1,0,96270.64,0,1.0,0.0,0.0
9996,516,1,35,10,57369.61,1,1,1,101699.77,0,1.0,0.0,0.0
9997,709,0,36,7,0.00,1,0,1,42085.58,1,1.0,0.0,0.0
9998,772,1,42,3,75075.31,2,1,0,92888.52,1,0.0,1.0,0.0


In [61]:
X = df.drop('Exited', axis=1)
y = df['Exited']

In [62]:
X = scaler.transform(X)

In [63]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [74]:
X_train.shape[1]

12

In [88]:
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers

In [115]:
def create_model(neurons=32, layer=1):
    Model = Sequential()
    Model.add(layers.Input(shape=(X_train.shape[1],)))
    
    for _ in range(layer):
        Model.add(layers.Dense(neurons, activation='relu'))
    
    Model.add(layers.Dense(1, activation='sigmoid'))

    Model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    return Model

In [119]:
## Create Keras Classifier
model = KerasClassifier(layer=1, neurons=32, build_fn=create_model, verbose=0)

In [122]:
## Define the Grid Searn parameter
param_grid = {
    "neurons": [16, 32, 64, 128],
    "layer": [2, 3],
    "epochs": [20, 30]
}

In [123]:
## Perform Grid Search
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_search = grid.fit(X_train, y_train)

  X, y = self._initialize(X, y)


In [124]:
grid_search.best_score_, grid_search.best_params_

(0.8578750893197614, {'epochs': 20, 'layer': 2, 'neurons': 32})