# 06 - Neural Network (Refactored)

**Goal:** Train the Focal Loss NN on the new dataset.
**Output:** Metrics saved to `results/new_dataset_analysis/metrics`.

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from pathlib import Path
import json
from sklearn.metrics import classification_report, roc_auc_score, precision_recall_curve, auc

PROCESSED_DIR = Path('../data/processed/new_analysis')
METRICS_DIR = Path('../results/new_dataset_analysis/metrics')
MODELS_DIR = Path('../models')

METRICS_DIR.mkdir(parents=True, exist_ok=True)
MODELS_DIR.mkdir(parents=True, exist_ok=True)

print("Loading Data...")
X_train = pd.read_csv(PROCESSED_DIR / 'X_train_scaled.csv')
y_train = pd.read_csv(PROCESSED_DIR / 'y_train.csv').squeeze()
X_test = pd.read_csv(PROCESSED_DIR / 'X_test_scaled.csv')
y_test = pd.read_csv(PROCESSED_DIR / 'y_test.csv').squeeze()

# Validation Split
val_split = int(0.1 * len(X_train))
X_val = X_train[:val_split]
y_val = y_train[:val_split]
X_train_partial = X_train[val_split:]
y_train_partial = y_train[val_split:]

print("Data Loaded.")

Loading Data...
Data Loaded.


In [3]:
def binary_focal_loss(gamma=2.0, alpha=0.25):
    def focal_loss_fixed(y_true, y_pred):
        y_true = tf.cast(y_true, tf.float32)
        y_pred = tf.cast(y_pred, tf.float32)
        epsilon = tf.keras.backend.epsilon()
        y_pred = tf.clip_by_value(y_pred, epsilon, 1.0 - epsilon)
        bce = -y_true * tf.math.log(y_pred) - (1 - y_true) * tf.math.log(1 - y_pred)
        p_t = (y_true * y_pred) + ((1 - y_true) * (1 - y_pred))
        alpha_t = y_true * alpha + (1 - y_true) * (1 - alpha)
        focal_loss = alpha_t * tf.math.pow((1 - p_t), gamma) * bce
        return tf.reduce_mean(focal_loss)
    return focal_loss_fixed

model = keras.Sequential([
    keras.layers.Dense(64, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001), input_shape=(X_train.shape[1],)),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(32, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001)),
    keras.layers.Dropout(0.4),
    keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), 
              loss=binary_focal_loss(), 
              metrics=[keras.metrics.AUC(name='auc'), keras.metrics.AUC(curve='PR', name='pr_auc')])

checkpoint = keras.callbacks.ModelCheckpoint(MODELS_DIR / 'best_nn_model_refactored.keras', save_best_only=True, monitor='val_pr_auc', mode='max')
early_stop = keras.callbacks.EarlyStopping(monitor='val_pr_auc', patience=10, mode='max', restore_best_weights=True)

history = model.fit(X_train_partial, y_train_partial, validation_data=(X_val, y_val), epochs=50, batch_size=2048, callbacks=[checkpoint, early_stop], verbose=1)

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - auc: 0.2110 - loss: 0.0824 - pr_auc: 0.0019 - val_auc: 0.5681 - val_loss: 0.0544 - val_pr_auc: 0.0886
Epoch 2/50
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - auc: 0.6693 - loss: 0.0414 - pr_auc: 0.1855 - val_auc: 0.8330 - val_loss: 0.0284 - val_pr_auc: 0.5820
Epoch 3/50
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - auc: 0.8299 - loss: 0.0216 - pr_auc: 0.3933 - val_auc: 0.8763 - val_loss: 0.0148 - val_pr_auc: 0.6523
Epoch 4/50
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - auc: 0.8972 - loss: 0.0114 - pr_auc: 0.5340 - val_auc: 0.9220 - val_loss: 0.0080 - val_pr_auc: 0.6808
Epoch 5/50
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - auc: 0.9150 - loss: 0.0063 - pr_auc: 0.5910 - val_auc: 0.9278 - val_loss: 0.0046 - val_pr_auc: 0.7073
Epoch 6/50
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [4]:
preds = model.predict(X_test)
roc = roc_auc_score(y_test, preds)
prec, rec, _ = precision_recall_curve(y_test, preds)
pr_auc = auc(rec, prec)

results = {
    'NeuralNetwork': {
        'roc_auc': roc,
        'pr_auc': pr_auc,
        'report': classification_report(y_test, (preds > 0.5).astype(int), output_dict=True)
    }
}

with open(METRICS_DIR / '06_neural_network_results.json', 'w') as f:
    json.dump(results, f, indent=4)

print("NN Results Saved.")

[1m1781/1781[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 743us/step
NN Results Saved.
