## GET DATA

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import label_binarize
from sklearn.model_selection import train_test_split
import torch

import time

In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
import xgboost

In [3]:
import numpy as np

# Load the arrays from the .npz file
data = np.load('data_arrays.npz')

# Access the arrays
x_train = data['X_train']
x_test = data['X_test']
x_val = data['X_val']
y_train = data['y_train']
y_test = data['y_test']
y_val = data['y_val']
X_combined = np.concatenate((x_train, x_test, x_val), axis=0)
y_combined = np.concatenate((y_train, y_test, y_val), axis=0)

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

X_train_tensor = torch.tensor(x_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.int64)
X_test_tensor = torch.tensor(x_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.int64)
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class DNNModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(DNNModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 50)
        self.fc2 = nn.Linear(50, 30)
        self.fc3 = nn.Linear(30, 20)
        self.fc4 = nn.Linear(20, output_size)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        x = self.softmax(x)
        return x
# You need to know the input and output size used in your trained model
input_size = 33
output_size = 6
device = torch.device("cpu")

model = DNNModel(input_size, output_size).to(device)
criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(), lr=0.01)
num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs.squeeze(), y_batch)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [1/20], Loss: 1.5436
Epoch [2/20], Loss: 1.5936
Epoch [3/20], Loss: 1.6436
Epoch [4/20], Loss: 1.6686
Epoch [5/20], Loss: 1.6436
Epoch [6/20], Loss: 1.7436
Epoch [7/20], Loss: 1.7686
Epoch [8/20], Loss: 1.5186
Epoch [9/20], Loss: 1.6436
Epoch [10/20], Loss: 1.7936
Epoch [11/20], Loss: 1.6436
Epoch [12/20], Loss: 1.6186
Epoch [13/20], Loss: 1.4686
Epoch [14/20], Loss: 1.6936
Epoch [15/20], Loss: 1.5936
Epoch [16/20], Loss: 1.5936
Epoch [17/20], Loss: 1.6686
Epoch [18/20], Loss: 1.7686
Epoch [19/20], Loss: 1.7936
Epoch [20/20], Loss: 1.7686


In [6]:
from a2pm import A2PMethod
from a2pm.callbacks import BaseCallback, MetricCallback, TimeCallback
from a2pm.patterns import BasePattern, CombinationPattern, IntervalPattern
from a2pm.wrappers import BaseWrapper, KerasWrapper, SklearnWrapper, TorchWrapper
import time

In [7]:

def find_binary_columns(X_train):
    binary_columns = []
    for col in range(X_train.shape[1]):
        unique_values = np.unique(X_train[:, col])
        if set(unique_values).issubset({0, 1}):
            binary_columns.append(col)
    return binary_columns

binary_columns = find_binary_columns(x_train)

numerical_columns = []
for i in range(0,32):
    if i not in binary_columns:
        numerical_columns.append(i)

In [8]:
import numpy as np
import torch
from sklearn.metrics import precision_recall_fscore_support, confusion_matrix
# Assuming your model and TorchWrapper setup is correct and as provided previously
dnn_classifier = TorchWrapper(model)


pattern = (
    {
        "type": "interval",
        "features": numerical_columns,
        "ratio": 0.1,
        "max_ratio": 0.3,
        "missing_value": 0.0,
        "probability": 0.6,
    },
    {
        "type": "combination",
        "features": binary_columns,
        "probability": 0.4,
    },
)
method = A2PMethod(pattern)
start_time = time.time()

# Ensure tensors are on CPU and convert them to NumPy before passing to fit_generate
X_tensor = torch.tensor(X_combined, dtype=torch.float32).to('cpu').numpy()
y_tensor = torch.tensor(y_combined, dtype=torch.long).to('cpu').numpy()

X_adversarial = method.fit_generate(dnn_classifier, X_tensor, y_tensor)
training_time = time.time() - start_time

if len(y_combined.shape) > 1 and y_combined.shape[1] > 1:
    y_test_indices = np.argmax(y_combined, axis=1)
else:
    y_test_indices = y_combined

# Set the model to evaluation mode
model.eval()

X_adversarial_tensor = torch.tensor(X_adversarial, dtype=torch.float32).to(device)
# Disable gradient computation for inference
with torch.no_grad():
    preds = model(X_adversarial_tensor)

# Convert model outputs to class indices if necessary
preds_indices = torch.argmax(preds, dim=1)

# Convert the indices back to CPU and numpy for use with sklearn metrics
preds_indices_np = preds_indices.cpu().numpy()

# Calculate precision, recall, and F1-score
precision_micro, recall_micro, fscore_micro, _ = precision_recall_fscore_support(
    y_test_indices, preds_indices_np, average='micro')
precision_macro, recall_macro, fscore_macro, _ = precision_recall_fscore_support(
    y_test_indices, preds_indices_np, average='macro')

# Calculate accuracy
acc = np.mean(preds_indices_np == y_test_indices)

# Compute the confusion matrix and derived metrics
conf_matrix = confusion_matrix(y_test_indices, preds_indices_np)
FP = conf_matrix.sum(axis=0) - np.diag(conf_matrix)
FN = conf_matrix.sum(axis=1) - np.diag(conf_matrix)
TP = np.diag(conf_matrix)
TN = conf_matrix.sum() - (FP + FN + TP)

TPR = TP / (TP + FN)  # True Positive Rate
TNR = TN / (TN + FP)  # True Negative Rate
FPR = FP / (FP + TN)  # False Positive Rate
FNR = FN / (FN + TP)  # False Negative Rate

# Calculate averages
TPR_avg = np.mean(TPR)
TNR_avg = np.mean(TNR)
FPR_avg = np.mean(FPR)
FNR_avg = np.mean(FNR)

# Prepare a dictionary for the results
result_dict = {
    'Accuracy': acc,
    'Micro Precision': precision_micro,
    'Macro Precision': precision_macro,
    'Micro Recall': recall_micro,
    'Macro Recall': recall_macro,
    'Micro F1 Score': fscore_micro,
    'Macro F1 Score': fscore_macro,
    'Average TNR': TNR_avg,
    'Average FPR': FPR_avg,
    'Average FNR': FNR_avg,
    'Macro TNR': TNR_avg,
    'Macro FNR': FNR_avg,
    'Macro FPR': FPR_avg
}

  _warn_prf(average, modifier, msg_start, len(result))


In [9]:
result_dict

{'Accuracy': 0.3935292624555805,
 'Micro Precision': 0.3935292624555805,
 'Macro Precision': 0.06558821040926342,
 'Micro Recall': 0.3935292624555805,
 'Macro Recall': 0.16666666666666666,
 'Micro F1 Score': 0.39352926245558045,
 'Macro F1 Score': 0.09413251974872554,
 'Average TNR': 0.8333333333333334,
 'Average FPR': 0.16666666666666666,
 'Average FNR': 0.8333333333333334,
 'Macro TNR': 0.8333333333333334,
 'Macro FNR': 0.8333333333333334,
 'Macro FPR': 0.16666666666666666}

In [10]:
X_adversarial.shape

(330936, 33)

In [11]:
from art.estimators.classification import PyTorchClassifier
from art.defences.transformer.evasion import DefensiveDistillation
# Initialize the ART classifier for the teacher model
teacher_classifier = PyTorchClassifier(
    model=model,
    loss=criterion,
    optimizer=optimizer,
    input_shape=(input_size,),
    nb_classes=output_size
)


# Define the student model
student_model = DNNModel(input_size, output_size)

# Define loss function and optimizer for the student model
student_optimizer = optim.Adam(student_model.parameters(), lr=0.001)

# Initialize the ART classifier for the student model
student_classifier = PyTorchClassifier(
    model=student_model,
    loss=criterion,
    optimizer=student_optimizer,
    input_shape=(input_size,),
    nb_classes=output_size
)

# Perform defensive distillation
defensive_distillation = DefensiveDistillation(
    classifier=teacher_classifier
)
student_classifier = defensive_distillation(X_adversarial, student_classifier)


In [12]:
# Set the model to evaluation mode
model.eval()

X_adversarial_tensor = torch.tensor(X_adversarial, dtype=torch.float32).to(device)
# Disable gradient computation for inference
preds_indices = student_classifier.predict(X_adversarial)


# Calculate precision, recall, and F1-score
precision_micro, recall_micro, fscore_micro, _ = precision_recall_fscore_support(
    y_test_indices, preds_indices_np, average='micro')
precision_macro, recall_macro, fscore_macro, _ = precision_recall_fscore_support(
    y_test_indices, preds_indices_np, average='macro')

# Calculate accuracy
acc = np.mean(preds_indices_np == y_test_indices)

# Compute the confusion matrix and derived metrics
conf_matrix = confusion_matrix(y_test_indices, preds_indices_np)
FP = conf_matrix.sum(axis=0) - np.diag(conf_matrix)
FN = conf_matrix.sum(axis=1) - np.diag(conf_matrix)
TP = np.diag(conf_matrix)
TN = conf_matrix.sum() - (FP + FN + TP)

TPR = TP / (TP + FN)  # True Positive Rate
TNR = TN / (TN + FP)  # True Negative Rate
FPR = FP / (FP + TN)  # False Positive Rate
FNR = FN / (FN + TP)  # False Negative Rate

# Calculate averages
TPR_avg = np.mean(TPR)
TNR_avg = np.mean(TNR)
FPR_avg = np.mean(FPR)
FNR_avg = np.mean(FNR)

# Prepare a dictionary for the results
result_dict = {
    'Accuracy': acc,
    'Micro Precision': precision_micro,
    'Macro Precision': precision_macro,
    'Micro Recall': recall_micro,
    'Macro Recall': recall_macro,
    'Micro F1 Score': fscore_micro,
    'Macro F1 Score': fscore_macro,
    'Average TNR': TNR_avg,
    'Average FPR': FPR_avg,
    'Average FNR': FNR_avg,
    'Macro TNR': TNR_avg,
    'Macro FNR': FNR_avg,
    'Macro FPR': FPR_avg
}
result_dict

  _warn_prf(average, modifier, msg_start, len(result))


{'Accuracy': 0.3935292624555805,
 'Micro Precision': 0.3935292624555805,
 'Macro Precision': 0.06558821040926342,
 'Micro Recall': 0.3935292624555805,
 'Macro Recall': 0.16666666666666666,
 'Micro F1 Score': 0.39352926245558045,
 'Macro F1 Score': 0.09413251974872554,
 'Average TNR': 0.8333333333333334,
 'Average FPR': 0.16666666666666666,
 'Average FNR': 0.8333333333333334,
 'Macro TNR': 0.8333333333333334,
 'Macro FNR': 0.8333333333333334,
 'Macro FPR': 0.16666666666666666}