In [21]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from art.estimators.classification import TensorFlowV2Classifier
from art.attacks.evasion import FastGradientMethod
from art.defences.trainer import AdversarialTrainer
from sklearn.metrics import accuracy_score, roc_auc_score

In [22]:
# Load Data
data_dir = 'ml/StandardScalerOneHotEncoder'
X_train = pd.read_csv(os.path.join(data_dir, 'X_train.csv')).values
y_train = pd.read_csv(os.path.join(data_dir, 'y_train.csv'))['is_default'].values
X_val   = pd.read_csv(os.path.join(data_dir, 'X_val.csv')).values
y_val   = pd.read_csv(os.path.join(data_dir, 'y_val.csv'))['is_default'].values
X_test  = pd.read_csv(os.path.join(data_dir, 'X_test.csv')).values
y_test  = pd.read_csv(os.path.join(data_dir, 'y_test.csv'))['is_default'].values

### Model Creation Function

-   **Architecture**: A `Sequential` model, which is a linear stack of layers.
    -   It contains two hidden `Dense` (fully-connected) layers with 128 and 64 units respectively.
    -   Each hidden layer uses the **`relu`** (Rectified Linear Unit) activation function. ReLU is chosen for its computational efficiency and its effectiveness in mitigating the vanishing gradient problem in deeper networks.

-   **Regularization**: `Dropout` is used as a regularization technique to prevent overfitting.
    -   `Dropout(0.3)` randomly sets 30% of the input units to 0 at each update during training. This prevents neurons from co-adapting too much and forces the model to learn more robust features.

-   **Output Layer**: The final layer is configured for classification.
    -   It is a `Dense` layer with 2 units, corresponding to the two output classes (defaulted vs not defaulted)
    -   It uses the **`softmax`** activation function, which converts the raw output into a probability distribution. The output vector's components sum to 1, representing the model's predicted probability for each class.

-   **Compiler**: This step configures the model's learning process.
    -   **Optimizer (`adam`)**: The model uses the Adam (Adaptive Moment Estimation) optimizer. It's an efficient stochastic gradient descent algorithm that computes adaptive learning rates for each parameter by storing an exponentially decaying average of past squared gradients (like RMSprop) and past gradients (like momentum). It is a robust and widely used default optimizer.
    -   **Loss Function (`sparse_categorical_crossentropy`)**: The loss function measures the divergence between the true and predicted probability distributions. The `sparse` version of categorical crossentropy is used here because the ground-truth labels are provided as integers (e.g., `0`, `1`), not as one-hot encoded vectors.
   
The Dense layer and Dropout rates are common starting points, not chosen through experimentation.

In [23]:
def create_model(input_dim):
    model = Sequential([
        Input(shape=(input_dim,)),
        Dense(128, activation='relu'),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dropout(0.3),
        Dense(2, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

### Training Configuration Parameters

-   **`epochs=50`**: An **epoch** is one complete forward and backward pass of the entire training dataset. Setting this to 50 establishes the *maximum* number of training cycles. In practice, the training will likely end sooner due to the `EarlyStopping` callback.

-   **`batch_size=256`**: The training data is not processed all at once. Instead, it is broken down into smaller subsets called **batches**. The model's internal weights are updated after processing each batch. This method, mini-batch gradient descent, is more memory-efficient and often leads to faster convergence and better generalization than processing the entire dataset in one go. A batch size of 256 is a common choice, as powers of two can optimize memory allocation on GPUs.

-   **`callbacks=[tf.keras.callbacks.EarlyStopping(...)]`**: **Callbacks** are objects that can perform actions at various stages of training, such as at the end of an epoch. They are used here to implement a crucial control mechanism.
    -   **`EarlyStopping`**: This callback is a form of regularization that prevents **overfitting** by halting the training process once the model's performance stops improving on a validation set.
        -   `monitor='val_loss'`: The specific metric being monitored is the loss calculated on the validation data (`X_val`, `y_val`). This is the most important indicator of how well the model generalizes to unseen data. If this value stops decreasing, the model is no longer learning useful patterns.
        -   `patience=5`: This parameter defines the number of epochs to wait for an improvement before stopping. A `patience` of 5 allows the training to continue for 5 epochs even if `val_loss` is not improving, which helps to avoid stopping prematurely due to random fluctuations in the validation loss.

In [24]:
# Train the Baseline Model
input_dim = X_train.shape[1]
baseline_model = create_model(input_dim)
baseline_model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=256,
    callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)],
    verbose=0
)

<keras.src.callbacks.history.History at 0x40fba5e20>

In [25]:
# Evaluate Baseline Model on CLEAN Data
baseline_preds_clean_probs = baseline_model.predict(X_test, verbose=0)[:, 1]
baseline_preds_clean_labels = (baseline_preds_clean_probs >= 0.5).astype(int)

auc_baseline_clean = roc_auc_score(y_test, baseline_preds_clean_probs)
acc_baseline_clean = accuracy_score(y_test, baseline_preds_clean_labels)

print(f"AUC on clean data: {auc_baseline_clean:.4f}")
print(f"Accuracy on clean data: {acc_baseline_clean:.4f}")

AUC on clean data: 0.6686
Accuracy on clean data: 0.6135


In [26]:
# Create the Adversarial Attack

# Wrap the Keras model in an ART classifier - The clip_values are important to bound the perturbations
classifier_art = TensorFlowV2Classifier(
    model=baseline_model, 
    nb_classes=2, 
    input_shape=(X_train.shape[1],), 
    loss_object=tf.keras.losses.BinaryCrossentropy(),
    clip_values=(np.min(X_train), np.max(X_train))
)

# Create the attack instance (Fast Gradient Sign Method)
attack = FastGradientMethod(estimator=classifier_art, eps=0.1)

# Generate adversarial examples from the original test set
x_test_adv = attack.generate(x=X_test)
print("Adversarial examples created.")

Adversarial examples created.


In [27]:
# Evaluate Baseline Model on ADVERSARIAL Data
print("\n--- Evaluating Baseline Model on ADVERSARIAL Test Data ---")

baseline_preds_adv_probs = classifier_art.predict(x_test_adv, verbose=0)[:, 1]
baseline_preds_adv_labels = (baseline_preds_adv_probs >= 0.5).astype(int)

auc_baseline_adv = roc_auc_score(y_test, baseline_preds_adv_probs)
acc_baseline_adv = accuracy_score(y_test, baseline_preds_adv_labels)

print(f"AUC on adversarial data: {auc_baseline_adv:.4f}")
print(f"Accuracy on adversarial data: {acc_baseline_adv:.4f}")


--- Evaluating Baseline Model on ADVERSARIAL Test Data ---
AUC on adversarial data: 0.4829
Accuracy on adversarial data: 0.4583


In [28]:
# Perform Adversarial Training 
print("\n--- Performing QUICK Adversarial Training (fewer epochs) ---")

robust_model_quick = create_model(input_dim)

robust_classifier_quick = TensorFlowV2Classifier(
    model=robust_model_quick,
    nb_classes=2,
    input_shape=(X_train.shape[1],),
    loss_object=tf.keras.losses.SparseCategoricalCrossentropy(),
    clip_values=(np.min(X_train), np.max(X_train)),
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001)
)

trainer_quick = AdversarialTrainer(classifier=robust_classifier_quick, attacks=attack, ratio=0.5)

# Run for only 5 epochs instead of 50 for a quick test.
trainer_quick.fit(
    x=X_train,
    y=y_train,
    nb_epochs=5,
    batch_size=256
)

print("Quick robust model training complete.")


--- Performing QUICK Adversarial Training (fewer epochs) ---


Precompute adv samples: 100%|██████████| 1/1 [00:13<00:00, 13.91s/it]
Adversarial training epochs:   0%|          | 0/5 [00:00<?, ?it/s]2025-06-21 11:38:36.954410: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
Adversarial training epochs: 100%|██████████| 5/5 [03:27<00:00, 41.45s/it]

Quick robust model training complete.





In [29]:
# # Perform Adversarial Training for Defense

# #  Create a new, fresh model instance for robust training
# robust_model = create_model(input_dim)

#  Wrap it in the TensorFlowV2Classifier
# robust_classifier = TensorFlowV2Classifier(
#     model=robust_model,
#     nb_classes=2,
#     input_shape=(X_train.shape[1],),
#     loss_object=tf.keras.losses.SparseCategoricalCrossentropy(),
#     clip_values=(np.min(X_train), np.max(X_train)),
#     optimizer=tf.keras.optimizers.Adam(learning_rate=0.001)
# )

#  Create an AdversarialTrainer instance
# trainer = AdversarialTrainer(classifier=robust_classifier, attacks=attack, ratio=0.5)

#  Train the robust model using the trainer's fit method
# trainer.fit(
#     x=X_train,
#     y=y_train,
#     nb_epochs=50,
#     batch_size=256
# )

# print("Robust model training complete.")
print('Skipped')    

Skipped


In [30]:
# Evaluate the QUICKLY-Trained Robust Model

# Evaluate on the original, CLEAN test data
print("Evaluating on CLEAN data...")
robust_preds_clean_probs = robust_classifier_quick.predict(X_test, verbose=0)[:, 1]
robust_preds_clean_labels = (robust_preds_clean_probs >= 0.5).astype(int)
auc_robust_clean_quick = roc_auc_score(y_test, robust_preds_clean_probs)
acc_robust_clean_quick = accuracy_score(y_test, robust_preds_clean_labels)
print(f"Quick Robust Model - AUC on CLEAN data: {auc_robust_clean_quick:.4f}")
print(f"Quick Robust Model - Accuracy on CLEAN data: {acc_robust_clean_quick:.4f}")

# Evaluate on the ADVERSARIAL test data (created earlier)
print("\nEvaluating on ADVERSARIAL data...")
robust_preds_adv_probs = robust_classifier_quick.predict(x_test_adv, verbose=0)[:, 1]
robust_preds_adv_labels = (robust_preds_adv_probs >= 0.5).astype(int)
auc_robust_adv_quick = roc_auc_score(y_test, robust_preds_adv_probs)
acc_robust_adv_quick = accuracy_score(y_test, robust_preds_adv_labels)
print(f"Quick Robust Model - AUC on ADVERSARIAL data: {auc_robust_adv_quick:.4f}")
print(f"Quick Robust Model - Accuracy on ADVERSARIAL data: {acc_robust_adv_quick:.4f}")

Evaluating on CLEAN data...
Quick Robust Model - AUC on CLEAN data: 0.6554
Quick Robust Model - Accuracy on CLEAN data: 0.6103

Evaluating on ADVERSARIAL data...
Quick Robust Model - AUC on ADVERSARIAL data: 0.5557
Quick Robust Model - Accuracy on ADVERSARIAL data: 0.6135


In [31]:
# Summary of All Results
summary_data = {
    "Model Type": ["Baseline", "Baseline", "Robust (Quick)", "Robust (Quick)"],
    "Test Data": ["Clean", "Adversarial", "Clean", "Adversarial"],
    "AUC": [auc_baseline_clean, auc_baseline_adv, auc_robust_clean_quick, auc_robust_adv_quick],
    "Accuracy": [acc_baseline_clean, acc_baseline_adv, acc_robust_clean_quick, acc_robust_adv_quick]
}

summary_df = pd.DataFrame(summary_data)
print(summary_df.round(4))

       Model Type    Test Data     AUC  Accuracy
0        Baseline        Clean  0.6686    0.6135
1        Baseline  Adversarial  0.4829    0.4583
2  Robust (Quick)        Clean  0.6554    0.6103
3  Robust (Quick)  Adversarial  0.5557    0.6135
