In [2]:
import pickle
import pandas as pd

from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LeakyReLU
from tensorflow.keras.callbacks import EarlyStopping
from scikeras.wrappers import KerasClassifier

In [3]:
df = pd.read_csv(r'../datasets/transformed_churn_data.csv')
df.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [4]:
X = df.drop('Exited',axis=1)
y = df['Exited']
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.33,random_state=32)

In [5]:
with open(r'..\pickle_files\scaler.pkl','rb') as file_obj:
    scaler = pickle.load(file_obj)

X_test = scaler.transform(X_test)
X_train = scaler.transform(X_train)


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [6]:
# to create different models and try different params
import math

def create_model(neurons=32,layers=2):
    n = math.log2(neurons)-1

    model = Sequential()
    model.add(Dense(neurons,activation=LeakyReLU(alpha=0.01),input_shape=(X_train.shape[1],)))

    for _ in range(min(layers-1,n)):
        neurons = neurons//2
        model.add(Dense(neurons,activation=LeakyReLU(alpha=0.01)))

    model.add(Dense(1,activation='sigmoid'))
    model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

    return model

In [7]:
# create KerasClassifier 
model = KerasClassifier(layers=2,neurons=32,build_fn=create_model,epochs=50,batch_size=100,verbose=0)

In [8]:
import tensorflow as tf
import sklearn
import scikeras

print("TensorFlow:", tf.__version__)
print("Scikit-Learn:", sklearn.__version__)
print("SciKeras:", scikeras.__version__)


TensorFlow: 2.15.0
Scikit-Learn: 1.5.2
SciKeras: 0.12.0


In [9]:
param_grid = {
    'model__neurons': [16,32,64],
    'model__layers': [1,2,3,4],
    'batch_size': [100,200],
    'epochs': [50,100]
}

grid = GridSearchCV(estimator=model,param_grid=param_grid,cv=3,n_jobs=-1)
grid_res = grid.fit(X_train,y_train)

print("Best %f using %s" % (grid_res.best_score_,grid_res.best_params_))

  X, y = self._initialize(X, y)






Best 0.860598 using {'batch_size': 200, 'epochs': 100, 'model__layers': 4, 'model__neurons': 16}
