In [1]:
import pandas as pd

In [3]:
data = pd.read_csv('refined_bank_churn.csv', index_col=[0])
data.head()

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Germany,Geography_Spain,Male,Exited
0,619,42,2,0.0,1,1,1,101348.88,1.0,0.0,0.0,0,1
1,608,41,1,83807.86,1,0,1,112542.58,0.0,0.0,1.0,0,0
2,502,42,8,159660.8,3,1,0,113931.57,1.0,0.0,0.0,0,1
3,699,39,1,0.0,2,0,0,93826.63,1.0,0.0,0.0,0,0
4,850,43,2,125510.82,1,1,1,79084.1,0.0,0.0,1.0,0,0


In [4]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [5]:
X = data.drop(['Exited'], axis=1)
y = data['Exited']

In [6]:
sc = StandardScaler()
X = sc.fit_transform(X)

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2)

In [11]:
import keras
from keras.models import Sequential
from keras import Input
from keras.layers import Dense

In [19]:
def build_model(hp):
    model = Sequential()
    for i in range(hp.Int("num_layers", 2, 20)):
        model.add(Dense(
            units=hp.Int(f"units_{i}", min_value=32, max_value=512, step=32),
            activation='relu'
            )
        )
    model.add(Dense(1, kernel_initializer='glorot_uniform', activation='sigmoid'))
    model.compile(
        optimizer=keras.optimizers.Adamax(hp.Choice('learning_rate', [2e-3, 1e-3, 1e-2])),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    return model

In [20]:
from keras_tuner import RandomSearch

In [21]:
tuner = RandomSearch(
    hypermodel=build_model,
    objective='val_accuracy',
    max_trials=5,
    executions_per_trial=3,
    overwrite=True,
    directory="my_dir",
    project_name="Bank_Churn",
)

In [22]:
tuner.search_space_summary()

Search space summary
Default search space size: 4
num_layers (Int)
{'default': None, 'conditions': [], 'min_value': 2, 'max_value': 20, 'step': 1, 'sampling': 'linear'}
units_0 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
units_1 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
learning_rate (Choice)
{'default': 0.002, 'conditions': [], 'values': [0.002, 0.001, 0.01], 'ordered': True}


In [23]:
tuner.search(X_train, y_train, epochs=5, validation_data=(X_test, y_test))

Trial 5 Complete [00h 00m 58s]
val_accuracy: 0.859333316485087

Best val_accuracy So Far: 0.859333316485087
Total elapsed time: 00h 05m 28s


In [24]:
# Getting the best model
model = tuner.get_best_models(num_models=1)
best_model = model[0]
best_model.summary()

  trackable.load_own_variables(weights_store.get(inner_path))


## Retraining the model on entire dataset

In [27]:
import numpy as np
x_all = np.concatenate((X_train, X_test))
y_all = np.concatenate((y_train, y_test))

In [28]:
best_hps = tuner.get_best_hyperparameters(1)
model = build_model(best_hps[0])
model.fit(x=x_all, y=y_all, epochs=1)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.8038 - loss: 0.4841


<keras.src.callbacks.history.History at 0x15066f32990>

In [30]:
y_test_pred = model.predict(X_test)
y_test_pred = (y_test_pred > 0.5)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


In [31]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_test_pred)
ac = accuracy_score(y_test, y_test_pred)
print(f"Confusion Matrix = {cm}")
print(f"Accuracy Score = {ac}")

Confusion Matrix = [[1532   37]
 [ 258  173]]
Accuracy Score = 0.8525
