### Imports

In [1]:
import os
import time
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.utils import class_weight
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectFromModel
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold

from sklearn.dummy import DummyClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report, confusion_matrix

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

### Data Preprocessing

Data Loading & Initial Exploration

In [53]:
path = './drive/MyDrive/Projects/data/DS_2/'
train = pd.read_csv(path + 'bank_data_train.csv')
test = pd.read_csv(path + 'bank_data_test.csv')

In [54]:
target = 'TARGET'

In [55]:
print(train.shape, test.shape)
print(train[target].value_counts(normalize=True))

(355190, 116) (88798, 116)
TARGET
0    0.918565
1    0.081435
Name: proportion, dtype: float64


 Train-Test Split with Stratification

In [56]:
X = train.drop(columns=target)
y = train[target]
X_test = test.drop(columns=target)
y_test = test[target]

X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42)

Imputation

In [57]:
num_cols = X_train.select_dtypes(exclude='object').columns.tolist()
cat_cols = X_train.select_dtypes(include='object').columns.tolist()

num_imputer = SimpleImputer(strategy='median')
cat_imputer = SimpleImputer(strategy='most_frequent')

for df in [X_train, X_val, X_test]:
    df[num_cols] = num_imputer.fit_transform(X_train[num_cols]) if df is X_train else num_imputer.transform(df[num_cols])
    df[cat_cols] = cat_imputer.fit_transform(X_train[cat_cols]) if df is X_train else cat_imputer.transform(df[cat_cols])

Frequency encoding categorical columns

In [58]:
for col in cat_cols:
    freq = X_train[col].value_counts(normalize=True)
    X_train[col] = X_train[col].map(freq)
    X_val[col] = X_val[col].map(freq).fillna(0)
    X_test[col] = test[col].map(freq).fillna(0)

Outlier clipping by IQR

In [59]:
for col in num_cols:
    Q1 = X_train[col].quantile(0.25)
    Q3 = X_train[col].quantile(0.75)
    IQR = Q3 - Q1
    lower = Q1 - 1.5 * IQR
    upper = Q3 + 1.5 * IQR
    for df in [X_train, X_val, X_test]:
        df[col] = df[col].clip(lower, upper)

Scaling

In [60]:
scaler = StandardScaler()
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
X_val = pd.DataFrame(scaler.transform(X_val), columns=X_val.columns)
X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

Feature selection via L1 LogisticRegression

In [61]:
model = LogisticRegression(penalty='l1', solver='liblinear')
model.fit(X_train, y_train)

selector = SelectFromModel(model, prefit=True)

X_train_selected = selector.transform(X_train)
X_val_selected = selector.transform(X_val)
X_test_selected = selector.transform(X_test)

X_train.columns[selector.get_support()]

Index(['ID', 'AMOUNT_RUB_CLO_PRC', 'AMOUNT_RUB_SUP_PRC', 'CLNT_TRUST_RELATION',
       'APP_MARITAL_STATUS', 'REST_AVG_CUR', 'APP_KIND_OF_PROP_HABITATION',
       'CLNT_JOB_POSITION_TYPE', 'AMOUNT_RUB_NAS_PRC', 'CLNT_JOB_POSITION',
       'APP_DRIVING_LICENSE', 'TRANS_COUNT_SUP_PRC', 'APP_EDUCATION',
       'TRANS_COUNT_NAS_PRC', 'APP_TRAVEL_PASS', 'CR_PROD_CNT_TOVR', 'APP_CAR',
       'APP_POSITION_TYPE', 'TRANS_COUNT_ATM_PRC', 'AMOUNT_RUB_ATM_PRC', 'AGE',
       'APP_EMP_TYPE', 'REST_DYNAMIC_CUR_1M', 'REST_DYNAMIC_CUR_3M',
       'CNT_TRAN_SUP_TENDENCY3M', 'TURNOVER_DYNAMIC_CUR_1M',
       'SUM_TRAN_SUP_TENDENCY3M', 'CNT_TRAN_ATM_TENDENCY3M',
       'CNT_TRAN_ATM_TENDENCY1M', 'SUM_TRAN_ATM_TENDENCY3M',
       'SUM_TRAN_ATM_TENDENCY1M', 'TURNOVER_DYNAMIC_CUR_3M', 'PACK',
       'CLNT_SETUP_TENOR', 'TRANS_AMOUNT_TENDENCY3M', 'TRANS_CNT_TENDENCY3M'],
      dtype='object')

### Training Models

#### Naive Classifier

In [11]:
dummy = DummyClassifier(strategy='most_frequent')
dummy.fit(X_train_selected, y_train)
y_pred_dummy = dummy.predict(X_val_selected)
y_proba_dummy = dummy.predict_proba(X_val_selected)[:, 1]

print("Dummy Classifier metrics:")
print("Accuracy:", accuracy_score(y_val, y_pred_dummy))
print("ROC AUC:", roc_auc_score(y_val, y_proba_dummy))
print(classification_report(y_val, y_pred_dummy))

Dummy Classifier metrics:
Accuracy: 0.918564711844365
ROC AUC: 0.5
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     65253
           1       0.00      0.00      0.00      5785

    accuracy                           0.92     71038
   macro avg       0.46      0.50      0.48     71038
weighted avg       0.84      0.92      0.88     71038



#### Random Forest

In [12]:
rf = RandomForestClassifier(random_state=42, n_jobs=-1)

param_grid = {
    'n_estimators': [20, 30],
    'max_depth': [5, 10],
    'class_weight': ['balanced', None]
}

grid_rf = GridSearchCV(rf, param_grid, cv=2, scoring='roc_auc', verbose=1)

start_time = time.time()
grid_rf.fit(X_train_selected, y_train)
end_time = time.time()

print(f"Random Forest GridSearch training time: {end_time - start_time:.2f}s")
print("Best params:", grid_rf.best_params_)

best_rf = grid_rf.best_estimator_
y_pred_rf = best_rf.predict(X_val_selected)
y_proba_rf = best_rf.predict_proba(X_val_selected)[:, 1]

print("Random Forest metrics:")
print("Accuracy:", accuracy_score(y_val, y_pred_rf))
print("ROC AUC:", roc_auc_score(y_val, y_proba_rf))
print(classification_report(y_val, y_pred_rf))

Fitting 2 folds for each of 8 candidates, totalling 16 fits
Random Forest GridSearch training time: 158.58s
Best params: {'class_weight': None, 'max_depth': 10, 'n_estimators': 30}
Random Forest metrics:
Accuracy: 0.9185787888172527
ROC AUC: 0.8050296140727214
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     65253
           1       0.67      0.00      0.00      5785

    accuracy                           0.92     71038
   macro avg       0.79      0.50      0.48     71038
weighted avg       0.90      0.92      0.88     71038



| Metric              | Value  | What it means                                       |
| ------------------- | ------ | --------------------------------------------------- |
| Accuracy            | 0.9186 | \~92% overall correct predictions                   |
| ROC AUC             | 0.8050 | Good discrimination ability (0.5=chance, 1=perfect) |
| Precision (class 0) | 0.92   | Of predicted non-churn, 92% correct                 |
| Recall (class 0)    | 1.00   | Model found almost all non-churn cases              |
| Precision (class 1) | 0.67   | Of predicted churn, 67% correct                     |
| Recall (class 1)    | 0.00   | Model detected almost **no churn cases** (bad)      |


#### Scikit-learn MLPClassifier

In [14]:
param_grid = {
    'hidden_layer_sizes': [(64,), (128,)],
    'alpha': [1e-4, 1e-3],
    'learning_rate': ['adaptive'],
    'learning_rate_init': [0.001],
    'activation': ['relu'],
    'solver': ['adam']
}

cv = StratifiedKFold(n_splits=2, shuffle=True, random_state=42)

grid = GridSearchCV(
    MLPClassifier(max_iter=300, early_stopping=True, random_state=42),
    param_grid,
    cv=cv,
    scoring='roc_auc',
    n_jobs=-1,
    verbose=1
)

start_time = time.time()
grid.fit(X_train_selected, y_train)
end_time = time.time()

print(f"MLPClassifier training time: {end_time - start_time:.2f}s")

best_mlp = grid.best_estimator_
y_pred_sklearn_mlp = best_mlp.predict(X_val_selected)
y_proba_sklearn_mlp = best_mlp.predict_proba(X_val_selected)[:, 1]

print("MLPClassifier metrics:")
print("Accuracy:", accuracy_score(y_val, y_pred_sklearn_mlp))
print("ROC AUC:", roc_auc_score(y_val, y_proba_sklearn_mlp))
print(classification_report(y_val, y_pred_sklearn_mlp))

Fitting 2 folds for each of 4 candidates, totalling 8 fits
MLPClassifier training time: 75.60s
MLPClassifier metrics:
Accuracy: 0.9185787888172527
ROC AUC: 0.7371770890938549
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     65253
           1       0.60      0.00      0.00      5785

    accuracy                           0.92     71038
   macro avg       0.76      0.50      0.48     71038
weighted avg       0.89      0.92      0.88     71038



#### Keras MLP

In [16]:
input_dim = X_train_selected.shape[1]

model = Sequential([
    Dense(32, activation='relu', input_shape=(input_dim,)),
    Dropout(0.3),
    Dense(16, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1)

start_time = time.time()
history = model.fit(
    X_train_selected, y_train,
    epochs=50,
    batch_size=64,
    validation_data=(X_val_selected, y_val),
    validation_freq=3,
    callbacks=[early_stop, reduce_lr],
    verbose=1
)
end_time = time.time()

print(f"Keras model training time: {end_time - start_time:.2f} seconds")

y_proba_keras_mlp = model.predict(X_val_selected).flatten()
y_pred_keras_mlp = (y_proba_keras_mlp > 0.5).astype(int)

print("Keras MLP metrics:")
print("Accuracy:", accuracy_score(y_val, y_pred_keras_mlp))
print("ROC AUC:", roc_auc_score(y_val, y_proba_keras_mlp))
print(classification_report(y_val, y_pred_keras_mlp))

Epoch 1/50
[1m4440/4440[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 3ms/step - accuracy: 0.9148 - loss: 0.2941 - learning_rate: 0.0010
Epoch 2/50
[1m4440/4440[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 2ms/step - accuracy: 0.9192 - loss: 0.2591 - learning_rate: 0.0010
Epoch 3/50
[1m4440/4440[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 3ms/step - accuracy: 0.9184 - loss: 0.2576 - val_accuracy: 0.9186 - val_loss: 0.2514 - learning_rate: 0.0010
Epoch 4/50
[1m4440/4440[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 2ms/step - accuracy: 0.9186 - loss: 0.2534 - learning_rate: 0.0010
Epoch 5/50
[1m4440/4440[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2ms/step - accuracy: 0.9187 - loss: 0.2522 - learning_rate: 0.0010
Epoch 6/50
[1m4440/4440[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 3ms/step - accuracy: 0.9177 - loss: 0.2523 - val_accuracy: 0.9186 - val_loss: 0.2460 - learning_rate: 0.0010
Epoch 7/50
[1m4440/4440[0m [32m

#### TensorFlow MLP

In [18]:
def safe_convert_X(X):
    if hasattr(X, "to_numpy"):
        return X.to_numpy().astype(np.float32)
    else:
        return X.astype(np.float32)

def safe_convert_y(y):
    if hasattr(y, "to_numpy"):
        return y.to_numpy().astype(np.float32).reshape(-1, 1)
    else:
        return y.astype(np.float32).reshape(-1, 1)

In [19]:
X_train = safe_convert_X(X_train)
y_train = safe_convert_y(y_train)
X_val = safe_convert_X(X_val)
y_val = safe_convert_y(y_val)

In [20]:
batch_size = 64

train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)) \
    .shuffle(buffer_size=10000) \
    .batch(batch_size) \
    .prefetch(tf.data.AUTOTUNE)

val_ds = tf.data.Dataset.from_tensor_slices((X_val, y_val)) \
    .batch(batch_size) \
    .prefetch(tf.data.AUTOTUNE)

In [21]:
class SimpleMLP(tf.Module):
    def __init__(self, input_dim):
        super().__init__()
        init_w1 = tf.random.truncated_normal([input_dim, 64], stddev=tf.math.sqrt(2. / input_dim))
        init_w2 = tf.random.truncated_normal([64, 32], stddev=tf.math.sqrt(2. / 64))
        init_w3 = tf.random.truncated_normal([32, 1], stddev=tf.math.sqrt(2. / 32))

        self.W1 = tf.Variable(init_w1, name="W1")
        self.b1 = tf.Variable(tf.zeros([64]), name="b1")
        self.W2 = tf.Variable(init_w2, name="W2")
        self.b2 = tf.Variable(tf.zeros([32]), name="b2")
        self.W3 = tf.Variable(init_w3, name="W3")
        self.b3 = tf.Variable(tf.zeros([1]), name="b3")

    def __call__(self, x, training=False):
        x = tf.matmul(x, self.W1) + self.b1
        x = tf.nn.relu(x)
        if training:
            x = tf.nn.dropout(x, rate=0.3)
        x = tf.matmul(x, self.W2) + self.b2
        x = tf.nn.relu(x)
        if training:
            x = tf.nn.dropout(x, rate=0.3)
        logits = tf.matmul(x, self.W3) + self.b3
        return tf.sigmoid(logits)

In [22]:
model = SimpleMLP(input_dim=X_train.shape[1])

# Compute class weights for balanced loss
y_train_1d = y_train.flatten().astype(int)
weights = class_weight.compute_class_weight('balanced', classes=np.unique(y_train_1d), y=y_train_1d)
class_weights_dict = {int(cls): float(w) for cls, w in zip(np.unique(y_train_1d), weights)}
print("Class weights:", class_weights_dict)

def loss_fn(y_true, y_pred):
    y_pred = tf.clip_by_value(y_pred, 1e-7, 1 - 1e-7)
    weights = tf.where(tf.equal(y_true, 1), class_weights_dict[1], class_weights_dict[0])
    loss = -(weights * (y_true * tf.math.log(y_pred) + (1 - y_true) * tf.math.log(1 - y_pred)))
    return tf.reduce_mean(loss)

optimizer = tf.optimizers.Adam(learning_rate=0.001)

@tf.function
def train_step(x_batch, y_batch):
    with tf.GradientTape() as tape:
        y_pred = model(x_batch, training=True)
        loss = loss_fn(y_batch, y_pred)
    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    return loss

@tf.function
def val_step(x_batch, y_batch):
    y_pred = model(x_batch, training=False)
    loss = loss_fn(y_batch, y_pred)
    return loss, y_pred

def validate():
    val_losses = []
    all_preds = []
    all_labels = []
    for x_batch, y_batch in val_ds:
        loss, y_pred = val_step(x_batch, y_batch)
        val_losses.append(loss.numpy())
        all_preds.append(y_pred.numpy())
        all_labels.append(y_batch.numpy())
    val_loss = np.mean(val_losses)
    all_preds = np.vstack(all_preds).flatten()
    all_labels = np.vstack(all_labels).flatten()
    return val_loss, all_labels, all_preds

Class weights: {0: 0.5443274638713929, 1: 6.139844425237683}


In [26]:
epochs = 50
patience = 5
lr_patience = 3
best_val_loss = np.inf
wait = 0
lr_wait = 0

start_time = time.time()

for epoch in range(epochs):
    print(f"Epoch {epoch + 1}/{epochs}")
    train_losses = []
    for x_batch, y_batch in train_ds:
        loss = train_step(x_batch, y_batch)
        train_losses.append(loss.numpy())
    train_loss = np.mean(train_losses)

    val_loss, y_val_true, y_val_pred = validate()
    val_auc = roc_auc_score(y_val_true, y_val_pred)
    val_acc = accuracy_score(y_val_true, y_val_pred > 0.5)

    print(f"Train loss: {train_loss:.4f} | Val loss: {val_loss:.4f} | Val ROC AUC: {val_auc:.4f} | Val Acc: {val_acc:.4f}")

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        wait = 0
        lr_wait = 0
        best_weights = [v.numpy() for v in model.trainable_variables]
    else:
        wait += 1
        lr_wait += 1
        if lr_wait >= lr_patience:
            old_lr = optimizer.learning_rate.numpy()
            new_lr = max(old_lr * 0.5, 1e-6)
            optimizer.learning_rate.assign(new_lr)
            print(f"Learning rate reduced from {old_lr:.6f} to {new_lr:.6f}")
            lr_wait = 0
        if wait >= patience:
            print("Early stopping triggered")
            break

end_time = time.time()
print(f"Training finished in {end_time - start_time:.2f} seconds")

# Restore best weights
for var, val in zip(model.trainable_variables, best_weights):
    var.assign(val)

# Final evaluation
val_loss, y_val_true, y_proba_tf_mlp = validate()
y_pred_tf_mlp = (y_proba_tf_mlp > 0.5).astype(int)

print("Final evaluation:")
print("Accuracy:", accuracy_score(y_val_true, y_pred_tf_mlp))
print("ROC AUC:", roc_auc_score(y_val_true, y_proba_tf_mlp))
print(classification_report(y_val_true, y_pred_tf_mlp))

Final evaluation:
Accuracy: 0.630465384723669
ROC AUC: 0.8040117343409611
              precision    recall  f1-score   support

         0.0       0.98      0.61      0.75     65253
         1.0       0.16      0.85      0.27      5785

    accuracy                           0.63     71038
   macro avg       0.57      0.73      0.51     71038
weighted avg       0.91      0.63      0.71     71038



#### NumPy MLP

In [65]:
def safe_convert_X(X):
    return X.to_numpy().astype(np.float32) if hasattr(X, "to_numpy") else X.astype(np.float32)

def safe_convert_y(y):
    return y.to_numpy().astype(np.float32).reshape(-1, 1) if hasattr(y, "to_numpy") else y.astype(np.float32).reshape(-1, 1)

X_train = safe_convert_X(X_train_selected)
X_val = safe_convert_X(X_val_selected)
X_test = safe_convert_X(X_test_selected)
y_train = safe_convert_y(y_train)
y_val = safe_convert_y(y_val)
y_test = safe_convert_y(y_test)

In [66]:
def relu(x):
    return np.maximum(0, x)

def relu_deriv(x):
    return (x > 0).astype(np.float32)

def sigmoid(x):
    x = np.clip(x, -500, 500)
    return 1 / (1 + np.exp(-x))

def sigmoid_deriv(x):
    s = sigmoid(x)
    return s * (1 - s)

def binary_cross_entropy(y_true, y_pred):
    eps = 1e-10
    y_pred = np.clip(y_pred, eps, 1 - eps)
    return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

def binary_cross_entropy_deriv(y_true, y_pred):
    eps = 1e-10
    y_pred = np.clip(y_pred, eps, 1 - eps)
    return (y_pred - y_true) / (y_pred * (1 - y_pred))

In [67]:
class MLP:
    def __init__(self, input_size, hidden_sizes=(64, 32), output_size=1, lr=0.01):
        h1, h2 = hidden_sizes
        self.lr = lr

        # Xavier initialization
        self.W1 = np.random.randn(input_size, h1) * np.sqrt(2. / input_size)
        self.b1 = np.zeros((1, h1))
        self.W2 = np.random.randn(h1, h2) * np.sqrt(2. / h1)
        self.b2 = np.zeros((1, h2))
        self.W3 = np.random.randn(h2, output_size) * np.sqrt(2. / h2)
        self.b3 = np.zeros((1, output_size))

    def forward(self, X):
        self.z1 = X @ self.W1 + self.b1
        self.a1 = relu(self.z1)
        self.z2 = self.a1 @ self.W2 + self.b2
        self.a2 = relu(self.z2)
        self.z3 = self.a2 @ self.W3 + self.b3
        self.a3 = sigmoid(self.z3)
        return self.a3

    def backward(self, X, y_true):
        m = X.shape[0]
        y_pred = self.a3

        dz3 = binary_cross_entropy_deriv(y_true, y_pred) * sigmoid_deriv(self.z3)
        dW3 = self.a2.T @ dz3 / m
        db3 = np.sum(dz3, axis=0, keepdims=True) / m

        dz2 = dz3 @ self.W3.T * relu_deriv(self.z2)
        dW2 = self.a1.T @ dz2 / m
        db2 = np.sum(dz2, axis=0, keepdims=True) / m

        dz1 = dz2 @ self.W2.T * relu_deriv(self.z1)
        dW1 = X.T @ dz1 / m
        db1 = np.sum(dz1, axis=0, keepdims=True) / m

        # Update weights
        self.W3 -= self.lr * dW3
        self.b3 -= self.lr * db3
        self.W2 -= self.lr * dW2
        self.b2 -= self.lr * db2
        self.W1 -= self.lr * dW1
        self.b1 -= self.lr * db1

    def train(self, X, y, X_val=None, y_val=None, epochs=100, batch_size=64, patience=5, verbose=True):
        n = X.shape[0]
        best_val_loss = np.inf
        wait = 0

        for epoch in range(epochs):
            # Shuffle training data at each epoch
            perm = np.random.permutation(n)
            X_shuffled = X[perm]
            y_shuffled = y[perm]

            # Mini-batch training
            for i in range(0, n, batch_size):
                X_batch = X_shuffled[i:i + batch_size]
                y_batch = y_shuffled[i:i + batch_size]
                self.forward(X_batch)
                self.backward(X_batch, y_batch)

            # Compute training loss
            train_pred = self.forward(X)
            train_loss = binary_cross_entropy(y, train_pred)

            if X_val is not None and y_val is not None:
                val_pred = self.forward(X_val)
                val_loss = binary_cross_entropy(y_val, val_pred)

            if verbose and epoch % 10 == 0:
                print(f"Epoch {epoch:03d} | Train Loss: {train_loss:.4f}", end="")
                if X_val is not None and y_val is not None:
                    print(f" | Val Loss: {val_loss:.4f}")
                else:
                    print()

            # Early stopping check
            if X_val is not None and y_val is not None:
                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    wait = 0
                    # Save best weights
                    best_weights = (self.W1.copy(), self.b1.copy(),
                                    self.W2.copy(), self.b2.copy(),
                                    self.W3.copy(), self.b3.copy())
                else:
                    wait += 1
                    if wait >= patience:
                        if verbose:
                            print(f"Early stopping at epoch {epoch}")
                        # Restore best weights
                        self.W1, self.b1, self.W2, self.b2, self.W3, self.b3 = best_weights
                        break

    def predict(self, X):
        return (self.forward(X) > 0.5).astype(int)

    def predict_proba(self, X):
        return self.forward(X)

In [68]:
numpy_MLP = MLP(input_size=X_train.shape[1], hidden_sizes=(64, 32), lr=0.01)

numpy_MLP.train(X_train, y_train, X_val, y_val, epochs=100, batch_size=128, patience=10, verbose=True)

y_pred_np_mlp = numpy_MLP.predict(X_val)
y_proba_np_mlp = numpy_MLP.predict_proba(X_val)

print("Accuracy:", accuracy_score(y_val, y_pred_np_mlp))
print("ROC AUC:", roc_auc_score(y_val, y_proba_np_mlp))
print(classification_report(y_val, y_pred_np_mlp))

Epoch 000 | Train Loss: 0.2719 | Val Loss: 0.2720
Epoch 010 | Train Loss: 0.2539 | Val Loss: 0.2560
Epoch 020 | Train Loss: 0.2504 | Val Loss: 0.2535
Epoch 030 | Train Loss: 0.2476 | Val Loss: 0.2515
Epoch 040 | Train Loss: 0.2456 | Val Loss: 0.2502
Epoch 050 | Train Loss: 0.2437 | Val Loss: 0.2487
Epoch 060 | Train Loss: 0.2416 | Val Loss: 0.2471
Epoch 070 | Train Loss: 0.2395 | Val Loss: 0.2453
Epoch 080 | Train Loss: 0.2379 | Val Loss: 0.2442
Epoch 090 | Train Loss: 0.2364 | Val Loss: 0.2432
Accuracy: 0.9184661730341508
ROC AUC: 0.7823424550788758
              precision    recall  f1-score   support

         0.0       0.92      1.00      0.96     65253
         1.0       0.46      0.01      0.01      5785

    accuracy                           0.92     71038
   macro avg       0.69      0.50      0.49     71038
weighted avg       0.88      0.92      0.88     71038



### Results Summary Table

In [69]:
results = pd.DataFrame({
    "Library": [
        "Dummy",
        "Ensemble",
        "Neural network",
        "Keras",
        "TensorFlow",
        "NumPy"
    ],
    "Algorithm": [
        "DummyClassifier",
        "RandomForestCLassifier",
        "MLPClassifier",
        "Keras MLP",
        "TensorFlow MLP",
        "NumPy MLP",
    ],
    "Accuracy": [
        accuracy_score(y_val, y_pred_dummy),
        accuracy_score(y_val, y_pred_rf),
        accuracy_score(y_val, y_pred_sklearn_mlp),
        accuracy_score(y_val, y_pred_keras_mlp),
        accuracy_score(y_val, y_pred_tf_mlp),
        accuracy_score(y_val, y_pred_np_mlp)
    ],
    "ROC AUC": [
        roc_auc_score(y_val, y_proba_dummy),
        roc_auc_score(y_val, y_proba_rf),
        roc_auc_score(y_val, y_proba_sklearn_mlp),
        roc_auc_score(y_val, y_proba_keras_mlp),
        roc_auc_score(y_val, y_proba_tf_mlp),
        roc_auc_score(y_val, y_proba_np_mlp)
    ]
})

results

Unnamed: 0,Library,Algorithm,Accuracy,ROC AUC
0,Dummy,DummyClassifier,0.918565,0.5
1,Ensemble,RandomForestCLassifier,0.918579,0.80503
2,Neural network,MLPClassifier,0.918579,0.737177
3,Keras,Keras MLP,0.918565,0.792814
4,TensorFlow,TensorFlow MLP,0.630465,0.804012
5,NumPy,NumPy MLP,0.918466,0.782342


### Submission

In [70]:
y_pred_proba = numpy_MLP.predict_proba(X_test_selected).flatten()

submission = pd.DataFrame({
    'ID': test['ID'],
    'TARGET': y_pred_proba
})

submission.to_csv('submission.csv', index=False)