### Imports

In [None]:
import os
import time

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split, GridSearchCV

from sklearn.dummy import DummyClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

import warnings
from sklearn.exceptions import UndefinedMetricWarning

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

### Data Preprocessing

In [None]:
# Load data
path = './drive/MyDrive/Projects/data/DS_2/'
train = pd.read_csv(path + 'bank_data_train.csv')
test = pd.read_csv(path + 'bank_data_test.csv')

# Target column
target = 'TARGET'

# Basic info
print(train.shape, test.shape)
print(train[target].value_counts(normalize=True))

# Separate features and target
X = train.drop(columns=target)
y = train[target]

# Split train into train + validation stratified
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42)

# Identify numeric and categorical columns
num_cols = X_train.select_dtypes(exclude='object').columns.tolist()
cat_cols = X_train.select_dtypes(include='object').columns.tolist()

print("Numerical columns:", num_cols)
print("Categorical columns:", cat_cols)

# Impute missing values
num_imputer = SimpleImputer(strategy='median')
cat_imputer = SimpleImputer(strategy='most_frequent')

X_train[num_cols] = num_imputer.fit_transform(X_train[num_cols])
X_val[num_cols] = num_imputer.transform(X_val[num_cols])
test[num_cols] = num_imputer.transform(test[num_cols])

X_train[cat_cols] = cat_imputer.fit_transform(X_train[cat_cols])
X_val[cat_cols] = cat_imputer.transform(X_val[cat_cols])
test[cat_cols] = cat_imputer.transform(test[cat_cols])

# Encode categorical features with frequency encoding
for col in cat_cols:
    freq = X_train[col].value_counts(normalize=True)
    X_train[col] = X_train[col].map(freq)
    X_val[col] = X_val[col].map(freq).fillna(0)
    test[col] = test[col].map(freq).fillna(0)

# Clip outliers using IQR method
for col in num_cols:
    Q1 = X_train[col].quantile(0.25)
    Q3 = X_train[col].quantile(0.75)
    IQR = Q3 - Q1
    lower = Q1 - 1.5 * IQR
    upper = Q3 + 1.5 * IQR
    X_train[col] = X_train[col].clip(lower, upper)
    X_val[col] = X_val[col].clip(lower, upper)
    test[col] = test[col].clip(lower, upper)

# Scale all features
scaler = StandardScaler()
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
X_val = pd.DataFrame(scaler.transform(X_val), columns=X_val.columns)
test_scaled = pd.DataFrame(scaler.transform(test), columns=test.columns)

print("Preprocessing complete.")

In [None]:
path = './drive/MyDrive/Projects/data/DS_2/'

In [None]:
train = pd.read_csv(path + 'bank_data_train.csv')
test = pd.read_csv(path + 'bank_data_test.csv')

Filling missing values

In [None]:
num_cols = train.select_dtypes(exclude='object').columns
cat_cols = train.select_dtypes(include='object').columns

num_imputer = SimpleImputer(strategy='median')
train[num_cols] = num_imputer.fit_transform(train[num_cols])
test[num_cols] = num_imputer.transform(test[num_cols])

cat_imputer = SimpleImputer(strategy='most_frequent')
train[cat_cols] = cat_imputer.fit_transform(train[cat_cols])
test[cat_cols] = cat_imputer.transform(test[cat_cols])

Splitting features and target

In [None]:
X = train.drop(columns='TARGET')
y = train['TARGET']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, stratify=y, test_size=0.2, random_state=42
)

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(284152, 115)
(284152,)
(71038, 115)
(71038,)


Encoding Categorical Variables

In [None]:
for col in cat_cols:
  freq = X_train[col].value_counts(normalize=True)
  X_train[col] = X_train[col].map(freq)
  X_test[col] = X_test[col].map(freq).fillna(0)

Handling Outliers

In [None]:
for col in num_cols:
  Q1 = X_train[col].quantile(0.25)
  Q3 = X_train[col].quantile(0.75)
  IQR = Q3 - Q1
  lower = Q1 - 1.5 * IQR
  upper = Q3 + 1.5 * IQR
  X_train[col] = X_train[col].clip(lower, upper)
  X_test[col] = X_test[col].clip(lower, upper)

Scale Numerical Features

In [None]:
from collections import Counter

print("Train class distribution:", Counter(y_train))
print("Test class distribution:", Counter(y_test))

Train class distribution: Counter({0.0: 261012, 1.0: 23140})
Test class distribution: Counter({0.0: 65253, 1.0: 5785})


### Naive Classifier

In [None]:
dummy = DummyClassifier(strategy='most_frequent')
dummy.fit(X_train, y_train)
y_pred_dummy = dummy.predict(X_val)
y_proba_dummy = dummy.predict_proba(X_val)[:, 1]

print("Dummy Classifier metrics:")
print("Accuracy:", accuracy_score(y_val, y_pred_dummy))
print("ROC AUC:", roc_auc_score(y_val, y_proba_dummy))
print(classification_report(y_val, y_pred_dummy))


In [None]:
warnings.filterwarnings("ignore", category=UndefinedMetricWarning)

In [None]:
dummy = DummyClassifier(strategy="most_frequent")
dummy.fit(X_train, y_train)
y_pred_dummy = dummy.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred_dummy))
print(classification_report(y_test, y_pred_dummy))

Accuracy: 0.918564711844365
              precision    recall  f1-score   support

         0.0       0.92      1.00      0.96     65253
         1.0       0.00      0.00      0.00      5785

    accuracy                           0.92     71038
   macro avg       0.46      0.50      0.48     71038
weighted avg       0.84      0.92      0.88     71038



### Random Forest

In [None]:
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5]
}

rf = RandomForestClassifier(random_state=42, n_jobs=-1)
grid_search = GridSearchCV(rf, param_grid, cv=5, scoring='roc_auc', verbose=2)

start_time = time.time()
grid_search.fit(X_train, y_train)
end_time = time.time()

print(f"Random Forest GridSearch done in {end_time - start_time:.2f}s")
print("Best params:", grid_search.best_params_)

best_rf = grid_search.best_estimator_
y_pred_rf = best_rf.predict(X_val)
y_proba_rf = best_rf.predict_proba(X_val)[:, 1]

print("Random Forest metrics:")
print("Accuracy:", accuracy_score(y_val, y_pred_rf))
print("ROC AUC:", roc_auc_score(y_val, y_proba_rf))
print(classification_report(y_val, y_pred_rf))


In [None]:
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5]
}

rf = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(rf, param_grid, cv=5, scoring='accuracy')

start_time = time.time()
grid_search.fit(X_train, y_train)
end_time = time.time()

training_time = end_time - start_time
print(f"Grid search training time: {training_time:.2f} seconds")

best_rf = grid_search.best_estimator_
print("Best hyperparameters:", grid_search.best_params_)

y_pred_rf = best_rf.predict(X_test)

print("Accuracy on test set:", accuracy_score(y_test, y_pred_rf))
print("Classification Report:\n", classification_report(y_test, y_pred_rf))

### Scikit-learn MLPClassifier

In [None]:
mlp = MLPClassifier(hidden_layer_sizes=(64, 32),
                    activation='relu',
                    solver='adam',
                    max_iter=200,
                    random_state=42,
                    early_stopping=True,
                    class_weight='balanced')

start_time = time.time()
mlp.fit(X_train, y_train)
end_time = time.time()

print(f"MLPClassifier training time: {end_time - start_time:.2f}s")

y_pred_mlp = mlp.predict(X_val)
y_proba_mlp = mlp.predict_proba(X_val)[:, 1]

print("MLPClassifier metrics:")
print("Accuracy:", accuracy_score(y_val, y_pred_mlp))
print("ROC AUC:", roc_auc_score(y_val, y_proba_mlp))
print(classification_report(y_val, y_pred_mlp))


In [None]:
mlp = MLPClassifier(hidden_layer_sizes=(64, 32), activation='relu',
                    solver='adam', max_iter=200, random_state=42,
                    class_weight='balanced')

start_time = time.time()
mlp.fit(X_train, y_train)
end_time = time.time()

training_time = end_time - start_time
print(f"MLP training time: {training_time:.2f} seconds")

y_pred_mlp = mlp.predict(X_test)
print("Accuracy on test set:", accuracy_score(y_test, y_pred_mlp))
print("Classification Report:\n", classification_report(y_test, y_pred_mlp))

y_proba_mlp = mlp.predict_proba(X_test)[:, 1]
roc_auc = roc_auc_score(y_test, y_proba_mlp)
print("ROC AUC on test set:", roc_auc)

MLP training time: 269.10 seconds
Accuracy on test set: 0.918564711844365
Classification Report:
               precision    recall  f1-score   support

         0.0       0.92      1.00      0.96     65253
         1.0       0.00      0.00      0.00      5785

    accuracy                           0.92     71038
   macro avg       0.46      0.50      0.48     71038
weighted avg       0.84      0.92      0.88     71038



### Keras (TensorFlow High-Level API)

In [None]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer=Adam(0.001), loss='binary_crossentropy', metrics=['accuracy'])

early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=64,
    validation_data=(X_val, y_val),
    callbacks=[early_stop],
    verbose=2
)

y_proba_keras = model.predict(X_val).flatten()
y_pred_keras = (y_proba_keras > 0.5).astype(int)

print("Keras MLP metrics:")
print("Accuracy:", accuracy_score(y_val, y_pred_keras))
print("ROC AUC:", roc_auc_score(y_val, y_proba_keras))
print(classification_report(y_val, y_pred_keras))


In [None]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification
])

model.compile(optimizer=Adam(0.001), loss='binary_crossentropy', metrics=['accuracy'])

start_time = time.time()
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2, verbose=2)
end_time = time.time()

training_time = end_time - start_time
print(f"Keras model training time: {training_time:.2f} seconds")

y_pred_proba = model.predict(X_test)
y_pred_keras = (y_pred_proba > 0.5).astype("int32")

print("Accuracy on test set:", accuracy_score(y_test, y_pred_keras))
print("Classification Report:\n", classification_report(y_test, y_pred_keras))

### TensorFlow (Low-Level API)

In [None]:
class SimpleMLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.dense1 = tf.keras.layers.Dense(64, activation='relu')
        self.dropout1 = tf.keras.layers.Dropout(0.3)
        self.dense2 = tf.keras.layers.Dense(32, activation='relu')
        self.dropout2 = tf.keras.layers.Dropout(0.3)
        self.out = tf.keras.layers.Dense(1, activation='sigmoid')

    def call(self, inputs, training=False):
        x = self.dense1(inputs)
        x = self.dropout1(x, training=training)
        x = self.dense2(x)
        x = self.dropout2(x, training=training)
        return self.out(x)

# Prepare datasets
train_ds = tf.data.Dataset.from_tensor_slices((X_train.values, y_train.values)).batch(64).shuffle(10000)
val_ds = tf.data.Dataset.from_tensor_slices((X_val.values, y_val.values)).batch(64)

model = SimpleMLP()
loss_fn = tf.keras.losses.BinaryCrossentropy()
optimizer = tf.keras.optimizers.Adam()

# Training loop
epochs = 50
patience = 5
best_val_loss = np.inf
wait = 0

for epoch in range(epochs):
    print(f"Epoch {epoch + 1}/{epochs}")
    # Training
    for x_batch, y_batch in train_ds:
        with tf.GradientTape() as tape:
            logits = model(x_batch, training=True)
            loss = loss_fn(y_batch, logits)
        grads = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

    # Validation
    val_losses = []
    val_accuracies = []
    for x_batch, y_batch in val_ds:
        val_logits = model(x_batch, training=False)
        val_loss = loss_fn(y_batch, val_logits)
        val_losses.append(val_loss.numpy())

        preds = tf.cast(val_logits > 0.5, tf.int32)
        acc = tf.reduce_mean(tf.cast(preds[:, 0] == tf.cast(y_batch, tf.int32), tf.float32))
        val_accuracies.append(acc.numpy())

    val_loss_avg = np.mean(val_losses)
    val_acc_avg = np.mean(val_accuracies)

    print(f"Validation loss: {val_loss_avg:.4f}, Accuracy: {val_acc_avg:.4f}")

    # Early stopping
    if val_loss_avg < best_val_loss:
        best_val_loss = val_loss_avg
        wait = 0
        model.save_weights("best_weights.tf")
    else:
        wait += 1
        if wait >= patience:
            print("Early stopping triggered.")
            break

# Load best weights
model.load_weights("best_weights.tf")

# Evaluate on val set
y_proba_tf = model.predict(X_val).flatten()
y_pred_tf = (y_proba_tf > 0.5).astype(int)

print("TensorFlow MLP metrics:")
print("Accuracy:", accuracy_score(y_val, y_pred_tf))
print("ROC AUC:", roc_auc_score(y_val, y_proba_tf))
print(classification_report(y_val, y_pred_tf))


In [None]:
class SimpleMLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.hidden1 = tf.keras.layers.Dense(64, activation='relu')
        self.hidden2 = tf.keras.layers.Dense(32, activation='relu')
        self.out = tf.keras.layers.Dense(1, activation='sigmoid')

    def call(self, x):
        x = self.hidden1(x)
        x = self.hidden2(x)
        return self.out(x)

model = SimpleMLP()
loss_fn = tf.keras.losses.BinaryCrossentropy()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

X_train_tf = tf.convert_to_tensor(X_train, dtype=tf.float32)
y_train_tf = tf.reshape(tf.convert_to_tensor(y_train, dtype=tf.float32), (-1, 1))

start_time = time.time()
epochs = 20

for epoch in range(epochs):
    with tf.GradientTape() as tape:
        logits = model(X_train_tf)
        loss = loss_fn(y_train_tf, logits)
    grads = tape.gradient(loss, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))
    print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.numpy():.4f}")

end_time = time.time()
training_time = end_time - start_time
print(f"TensorFlow custom training time: {training_time:.2f} seconds")

X_test_tf = tf.convert_to_tensor(X_test, dtype=tf.float32)
y_pred_probs = model(X_test_tf).numpy()
y_pred = (y_pred_probs > 0.5).astype(int).reshape(-1)

print("Accuracy on test set:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

### NumPy MLP

In [None]:
class NumpyMLP:
    def __init__(self, input_dim, hidden1=64, hidden2=32, lr=0.001):
        np.random.seed(42)
        self.lr = lr
        self.W1 = np.random.randn(input_dim, hidden1) * 0.01
        self.b1 = np.zeros((1, hidden1))
        self.W2 = np.random.randn(hidden1, hidden2) * 0.01
        self.b2 = np.zeros((1, hidden2))
        self.W3 = np.random.randn(hidden2, 1) * 0.01
        self.b3 = np.zeros((1, 1))

    def relu(self, x):
        return np.maximum(0, x)

    def relu_deriv(self, x):
        return (x > 0).astype(float)

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def forward(self, X):
        self.Z1 = X @ self.W1 + self.b1
        self.A1 = self.relu(self.Z1)
        self.Z2 = self.A1 @ self.W2 + self.b2
        self.A2 = self.relu(self.Z2)
        self.Z3 = self.A2 @ self.W3 + self.b3
        self.A3 = self.sigmoid(self.Z3)
        return self.A3

    def backward(self, X, y, output):
        m = y.shape[0]
        dZ3 = output - y.reshape(-1, 1)
        dW3 = (self.A2.T @ dZ3) / m
        db3 = np.sum(dZ3, axis=0, keepdims=True) / m

        dA2 = dZ3 @ self.W3.T
        dZ2 = dA2 * self.relu_deriv(self.Z2)
        dW2 = (self.A1.T @ dZ2) / m
        db2 = np.sum(dZ2, axis=0, keepdims=True) / m

        dA1 = dZ2 @ self.W2.T
        dZ1 = dA1 * self.relu_deriv(self.Z1)
        dW1 = (X.T @ dZ1) / m
        db1 = np.sum(dZ1, axis=0, keepdims=True) / m

        # Update weights
        self.W3 -= self.lr * dW3
        self.b3 -= self.lr * db3
        self.W2 -= self.lr * dW2
        self.b2 -= self.lr * db2
        self.W1 -= self.lr * dW1
        self.b1 -= self.lr * db1

    def train(self, X, y, epochs=100, batch_size=64):
        n = X.shape[0]
        for epoch in range(epochs):
            perm = np.random.permutation(n)
            X_shuffled = X[perm]
            y_shuffled = y[perm]
            for i in range(0, n, batch_size):
                X_batch = X_shuffled[i:i + batch_size]
                y_batch = y_shuffled[i:i + batch_size]
                output = self.forward(X_batch)
                self.backward(X_batch, y_batch, output)
            if (epoch + 1) % 10 == 0:
                pred = self.forward(X)
                loss = -np.mean(y * np.log(pred + 1e-8) + (1 - y) * np.log(1 - pred + 1e-8))
                print(f"Epoch {epoch + 1}/{epochs} - loss: {loss:.4f}")

    def predict(self, X):
        prob = self.forward(X)
        return (prob > 0.5).astype(int), prob

# Train numpy MLP
np_mlp = NumpyMLP(input_dim=X_train.shape[1], hidden1=64, hidden2=32, lr=0.001)

X_train_np = X_train.values
y_train_np = y_train.values

np_mlp.train(X_train_np, y_train_np, epochs=50, batch_size=128)

X_val_np = X_val.values
y_pred_np, y_proba_np = np_mlp.predict(X_val_np)

print("NumPy MLP metrics:")
print("Accuracy:", accuracy_score(y_val, y_pred_np))
print("ROC AUC:", roc_auc_score(y_val, y_proba_np))
print(classification_report(y_val, y_pred_np))


In [None]:
import numpy as np

# Activation functions and derivatives
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    s = sigmoid(x)
    return s * (1 - s)

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

# Binary cross-entropy loss and derivative
def binary_cross_entropy(y_true, y_pred):
    # Clip for numerical stability
    y_pred = np.clip(y_pred, 1e-10, 1 - 1e-10)
    return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

def binary_cross_entropy_derivative(y_true, y_pred):
    y_pred = np.clip(y_pred, 1e-10, 1 - 1e-10)
    return (y_pred - y_true) / (y_pred * (1 - y_pred) * y_true.shape[0])

# Initialize parameters
input_dim = X_train.shape[1]
hidden1_size = 64
hidden2_size = 32
output_size = 1

np.random.seed(42)
# Xavier initialization
W1 = np.random.randn(input_dim, hidden1_size) * np.sqrt(2 / input_dim)
b1 = np.zeros((1, hidden1_size))

W2 = np.random.randn(hidden1_size, hidden2_size) * np.sqrt(2 / hidden1_size)
b2 = np.zeros((1, hidden2_size))

W3 = np.random.randn(hidden2_size, output_size) * np.sqrt(2 / hidden2_size)
b3 = np.zeros((1, output_size))

# Training parameters
learning_rate = 0.001
epochs = 100
batch_size = 64

# Prepare data as numpy arrays
X = X_train.values.astype(np.float32)
y = y_train.values.reshape(-1, 1).astype(np.float32)

n_samples = X.shape[0]

for epoch in range(epochs):
    # Shuffle data each epoch
    indices = np.arange(n_samples)
    np.random.shuffle(indices)
    X = X[indices]
    y = y[indices]

    epoch_loss = 0

    for start in range(0, n_samples, batch_size):
        end = start + batch_size
        X_batch = X[start:end]
        y_batch = y[start:end]

        # Forward pass
        z1 = np.dot(X_batch, W1) + b1
        a1 = relu(z1)

        z2 = np.dot(a1, W2) + b2
        a2 = relu(z2)

        z3 = np.dot(a2, W3) + b3
        a3 = sigmoid(z3)  # output predictions

        loss = binary_cross_entropy(y_batch, a3)
        epoch_loss += loss * X_batch.shape[0]

        # Backpropagation
        dz3 = binary_cross_entropy_derivative(y_batch, a3) * sigmoid_derivative(z3)
        dW3 = np.dot(a2.T, dz3)
        db3 = np.sum(dz3, axis=0, keepdims=True)

        da2 = np.dot(dz3, W3.T)
        dz2 = da2 * relu_derivative(z2)
        dW2 = np.dot(a1.T, dz2)
        db2 = np.sum(dz2, axis=0, keepdims=True)

        da1 = np.dot(dz2, W2.T)
        dz1 = da1 * relu_derivative(z1)
        dW1 = np.dot(X_batch.T, dz1)
        db1 = np.sum(dz1, axis=0, keepdims=True)

        # Update weights and biases
        W3 -= learning_rate * dW3
        b3 -= learning_rate * db3

        W2 -= learning_rate * dW2
        b2 -= learning_rate * db2

        W1 -= learning_rate * dW1
        b1 -= learning_rate * db1

    epoch_loss /= n_samples
    if (epoch + 1) % 10 == 0 or epoch == 0:
        print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}")

# Inference function
def predict(X):
    z1 = np.dot(X, W1) + b1
    a1 = relu(z1)

    z2 = np.dot(a1, W2) + b2
    a2 = relu(z2)

    z3 = np.dot(a2, W3) + b3
    a3 = sigmoid(z3)
    return (a3 > 0.5).astype(int)

# Evaluate on test set
X_test_np = X_test.values.astype(np.float32)
y_test_np = y_test.values.astype(np.int32)

y_pred_np = predict(X_test_np)

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
print("Test accuracy:", accuracy_score(y_test_np, y_pred_np))
print("Classification Report:\n", classification_report(y_test_np, y_pred_np))
print("Confusion Matrix:\n", confusion_matrix(y_test_np, y_pred_np))


### Save Final Predictions on Test Set (Using best model)

In [None]:
# Example: Using Keras model for final prediction (you can choose your best model)

test_proba = model.predict(test_scaled).flatten()
submission = pd.DataFrame({
    'ID': test.index,
    'TARGET': test_proba
})

submission.to_csv('final_predictions.csv', index=False)
print("Saved final predictions to final_predictions.csv")


### Summary table format

In [None]:
results = pd.DataFrame([
    ['DummyClassifier', 'Most Frequent', '-', accuracy_score(y_val, y_pred_dummy), roc_auc_score(y_val, y_proba_dummy)],
    ['RandomForest', str(grid_search.best_params_), '-', accuracy_score(y_val, y_pred_rf), roc_auc_score(y_val, y_proba_rf)],
    ['MLPClassifier', 'hidden_layer_sizes=(64,32)', '-', accuracy_score(y_val, y_pred_mlp), roc_auc_score(y_val, y_proba_mlp)],
    ['Keras', '64,32 + dropout', 'Adam lr=0.001', accuracy_score(y_val, y_pred_keras), roc_auc_score(y_val, y_proba_keras)],
    ['TensorFlow', '64,32 + dropout', 'Adam lr=0.001', accuracy_score(y_val, y_pred_tf), roc_auc_score(y_val, y_proba_tf)],
    ['NumPy', '64,32 (manual)', 'lr=0.001', accuracy_score(y_val, y_pred_np), roc_auc_score(y_val, y_proba_np)],
], columns=['Library', 'Hyperparameters', 'Notes', 'Accuracy', 'ROC_AUC'])

print(results)
