In [2]:
!pip install scikeras


Collecting scikeras
  Using cached scikeras-0.13.0-py3-none-any.whl.metadata (3.1 kB)
Using cached scikeras-0.13.0-py3-none-any.whl (26 kB)
Installing collected packages: scikeras
Successfully installed scikeras-0.13.0


In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.pipeline import Pipeline
from scikeras.wrappers import KerasClassifier
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
import pickle

In [4]:
data = pd.read_csv("Churn_Modelling.csv")
data = data.drop(["RowNumber",'CustomerId','Surname'],axis=1)

## Encode categorical variable
label_encoder_gender = LabelEncoder()
data["Gender"] = label_encoder_gender.fit_transform(data["Gender"])

## One Hot encode "Geo graphy"
onehot_encoder_geo = OneHotEncoder(handle_unknown="ignore")
geo_encoded = onehot_encoder_geo.fit_transform(data[["Geography"]]).toarray()
geo_encoded_df = pd.DataFrame(geo_encoded,columns=onehot_encoder_geo.get_feature_names_out(["Geography"]))

#Combine one hot encoded columns with original data
data = pd.concat([data.drop("Geography",axis=1),geo_encoded_df],axis=1)

## divide the dataset into dependent and the independent features

X= data.drop("Exited",axis=1)
y=data["Exited"]

#split the data in training and testing sets
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

## Scale these features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### Save the encoder and scaler

with open("label_encoder_gender.pkl","wb") as file:
    pickle.dump(label_encoder_gender,file)


with open("onehot_encoder_geo.pkl","wb") as file:
    pickle.dump(onehot_encoder_geo,file)

with open("scaler.pkl","wb") as file:
    pickle.dump(scaler,file)

In [5]:
# Define a function to create  the midel and try different parameters(KerasClassifier)
def create_model(neurons=32,layers=1):
    model = Sequential()
    model.add(Dense(neurons,activation="relu",input_shape = (X_train.shape[1],)))

    for _ in range(layers-1):
        model.add(Dense(neurons,activation="relu"))

    model.add(Dense(1,activation="sigmoid"))
    model.compile(optimizer = "adam", loss = "binary_crossentropy",metrics = ["accuracy"])

    return model

In [6]:
## Create a Keras Classifier
model = KerasClassifier(layers=1,neurons=32,build_fn=create_model, verbose=1)

In [7]:
# Define the grid search parametes
param_grid = {
    "neurons" : [16,32,64,128],
    "layers" : [1,2],
    "epochs" : [50,100]
}

In [8]:
# Perform grid search
grid = GridSearchCV(estimator=model,param_grid=param_grid, n_jobs=-1, cv=3, verbose=1)
grid_result = grid.fit(X_train, y_train)

#Print the best patameters
print("Best: %f using %s" % (grid_result.best_score_,grid_result.best_params_))

Fitting 3 folds for each of 16 candidates, totalling 48 fits
Epoch 1/100


  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 764us/step - accuracy: 0.6289 - loss: 0.6422
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 721us/step - accuracy: 0.8116 - loss: 0.4380
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 812us/step - accuracy: 0.8316 - loss: 0.4016
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 707us/step - accuracy: 0.8424 - loss: 0.3869
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 764us/step - accuracy: 0.8537 - loss: 0.3606
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 673us/step - accuracy: 0.8525 - loss: 0.3598
Epoch 7/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 690us/step - accuracy: 0.8529 - loss: 0.3638
Epoch 8/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 706us/step - accuracy: 0.8589 - loss: 0.3533
Epoch 9/100
[1m250/250[0m 