In [9]:
import numpy as np
import pandas as pd
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import accuracy_score
from keras.wrappers.scikit_learn import KerasClassifier
from tensorflow import keras
import pickle

In [10]:
pickle_file_path = "C:/Users/Hauke/OneDrive - ucp.pt/04_Thesis/00_GitHub/Thesis/data/interim/subsets_data.pickle"

with open(pickle_file_path, 'rb') as f:
    subsets = pickle.load(f)

X_train_scaled = subsets['X_train_scaled']
X_test_scaled = subsets['X_test_scaled']
X_val_scaled = subsets['X_val_scaled']
y_train = subsets['y_train']
y_test = subsets['y_test']
y_val = subsets['y_val']

X_column_names = ['county_code', 'interest_rate', 'loan_to_value_ratio',
       'applicant_sex_Female', 'applicant_race-1_White', 'loan_type_FHA',
       'loan_type_VA', 'loan_type_FSA/RHS', 'debt_to_income_ratio_20%-<30%',
       'debt_to_income_ratio_30%-<36%-41%%', 'debt_to_income_ratio_36%-41%',
       'debt_to_income_ratio_41%-45%', 'debt_to_income_ratio_46%-49%',
       'debt_to_income_ratio_50%-60%', 'debt_to_income_ratio_<20%',
       'debt_to_income_ratio_>60%', 'debt_to_income_ratio_missing',
       'lien_status_Subordinate lien']

X_column_names_trans = X_column_names.copy()
X_column_names_trans.remove('applicant_race-1_White')

X_train_scaled_df = pd.DataFrame(X_train_scaled, columns=X_column_names)
X_test_scaled_df = pd.DataFrame(X_test_scaled, columns=X_column_names)
X_val_scaled_df = pd.DataFrame(X_val_scaled, columns=X_column_names)

In [11]:
import numpy as np
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import accuracy_score
from keras.wrappers.scikit_learn import KerasClassifier

def create_model(optimizer='adam', dropout_rate=0.1, neurons_layer1=32, neurons_layer2=64, neurons_layer3=128, l2_penalty=0.001):
    model = keras.Sequential([
        keras.layers.Dense(neurons_layer1, activation="relu", kernel_regularizer=keras.regularizers.l2(l2_penalty), input_shape=(X_train_scaled.shape[1],)),
        keras.layers.Dense(neurons_layer2, activation="relu", kernel_regularizer=keras.regularizers.l2(l2_penalty)),
        keras.layers.Dropout(dropout_rate),
        keras.layers.Dense(neurons_layer3, activation="relu"),
        keras.layers.Dropout(dropout_rate),
        keras.layers.Dense(neurons_layer2, activation="relu", kernel_regularizer=keras.regularizers.l2(l2_penalty)),
        keras.layers.Dropout(dropout_rate),
        keras.layers.Dense(1, activation="sigmoid")
    ])
    model.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"])
    return model

model = KerasClassifier(build_fn=create_model, epochs=30, batch_size=48, verbose=0)

# Define the hyperparameter space
param_grid = {
    'optimizer': ['adam', 'rmsprop'],
    'dropout_rate': [0.1, 0.25, 0.5],
    'neurons_layer1': [32, 64, 128],
    'neurons_layer2': [64, 128, 256],
    'neurons_layer3': [32, 64, 128],
    'l2_penalty': [0.001, 0.01, 0.1]
}

random_search = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_iter=10, cv=3, verbose=2, random_state=42)
random_search.fit(X_train_scaled, y_train, validation_data=(X_val_scaled, y_val))

print("Best parameters found:", random_search.best_params_)

best_model = random_search.best_estimator_
validation_accuracy = best_model.score(X_val_scaled, y_val)
print("Validation Accuracy of the best model:", validation_accuracy)

  model = KerasClassifier(build_fn=create_model, epochs=30, batch_size=48, verbose=0)


Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] END dropout_rate=0.5, l2_penalty=0.01, neurons_layer1=128, neurons_layer2=128, neurons_layer3=32, optimizer=adam; total time=10.9min
[CV] END dropout_rate=0.5, l2_penalty=0.01, neurons_layer1=128, neurons_layer2=128, neurons_layer3=32, optimizer=adam; total time=12.4min
[CV] END dropout_rate=0.5, l2_penalty=0.01, neurons_layer1=128, neurons_layer2=128, neurons_layer3=32, optimizer=adam; total time=10.2min
[CV] END dropout_rate=0.25, l2_penalty=0.01, neurons_layer1=128, neurons_layer2=128, neurons_layer3=128, optimizer=adam; total time=10.5min
[CV] END dropout_rate=0.25, l2_penalty=0.01, neurons_layer1=128, neurons_layer2=128, neurons_layer3=128, optimizer=adam; total time=10.8min
[CV] END dropout_rate=0.25, l2_penalty=0.01, neurons_layer1=128, neurons_layer2=128, neurons_layer3=128, optimizer=adam; total time=11.9min
[CV] END dropout_rate=0.5, l2_penalty=0.01, neurons_layer1=128, neurons_layer2=256, neurons_layer3=128, o