In [123]:
import sys
import setproctitle
sys.path.append('../src')

import torch
import torch.nn as nn
from torch.nn import CrossEntropyLoss
from torch.optim import SGD
import torch.nn.functional as F
import numpy as np

from Models import MLP
from dataset import load_data, get_dataset
from Trainer import MLPTrainer
from sklearn import metrics
from sklearn.metrics import recall_score
import matplotlib.pyplot as plt


## Baseline Model

In [69]:
# Load the saved model state
model_0 = MLP(num_features=49, hidden1_size=64, hidden2_size=64, num_classes=7)
model_0.load_state_dict(torch.load('../trained_models/base_mlp_model_49_64_64_7.pth'))
model_0.eval()

MLP(
  (fc1): Linear(in_features=49, out_features=64, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=64, out_features=64, bias=True)
  (relu2): ReLU()
  (fc3): Linear(in_features=64, out_features=7, bias=True)
)

In [70]:
_, _, test_loader = load_data(batch_size=64)

In [71]:
y_preds = []
y_trues = []

with torch.no_grad():
  for inputs, labels in test_loader:
    outputs = model_0(inputs)
    
    # get metrics
    y_preds += outputs.argmax(axis=1).tolist()
    y_trues += labels.tolist()

y_preds_tensor = torch.tensor(y_preds)
y_trues_tensor = torch.tensor(y_trues)
y_trues_tensor = torch.flatten(y_trues_tensor)

In [72]:
val_recall_per_class = recall_score(y_trues, y_preds, average=None) 

In [73]:
val_recall_per_class # Recall of classes in testset

array([0.99926065, 0.31155779, 0.99521531, 0.99798884, 0.        ,
       0.99885422, 0.92241379])

## Ensemble models

### Model 1

In [74]:
model_1 = MLP(num_features=49, hidden1_size=64, hidden2_size=64, num_classes=7)
model_1.load_state_dict(torch.load('../trained_models/ensemble_model/mlp_model_split1.pth'))
model_1.eval()

MLP(
  (fc1): Linear(in_features=49, out_features=64, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=64, out_features=64, bias=True)
  (relu2): ReLU()
  (fc3): Linear(in_features=64, out_features=7, bias=True)
)

In [75]:
# Check preformace 
y_preds = []
y_trues = []

with torch.no_grad():
  for inputs, labels in test_loader:
    outputs = model_1(inputs)
    
    # get metrics
    y_preds += outputs.argmax(axis=1).tolist()
    y_trues += labels.tolist()

In [76]:
val_recall_per_class = recall_score(y_trues, y_preds, average=None) 

In [77]:
val_recall_per_class # Class 1 & 4 had little support

array([0.99897572, 0.32160804, 0.98863636, 0.99767703, 0.        ,
       0.9983254 , 0.85560345])

### Model 2

In [78]:
model_2 = MLP(num_features=49, hidden1_size=64, hidden2_size=64, num_classes=7)
model_2.load_state_dict(torch.load('../trained_models/ensemble_model/mlp_model_split2.pth'))
model_2.eval()

MLP(
  (fc1): Linear(in_features=49, out_features=64, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=64, out_features=64, bias=True)
  (relu2): ReLU()
  (fc3): Linear(in_features=64, out_features=7, bias=True)
)

In [79]:
# Check preformace 
y_preds = []
y_trues = []

with torch.no_grad():
  for inputs, labels in test_loader:
    outputs = model_2(inputs)
    
    # get metrics
    y_preds += outputs.argmax(axis=1).tolist()
    y_trues += labels.tolist()

In [80]:
val_recall_per_class = recall_score(y_trues, y_preds, average=None) 

In [81]:
val_recall_per_class # Class 1 & 4 had little support

array([0.99917222, 0.27889447, 0.99342105, 0.99773939, 0.        ,
       0.99779658, 0.86853448])

### Model 3

In [82]:
model_3 = MLP(num_features=49, hidden1_size=64, hidden2_size=64, num_classes=7)
model_3.load_state_dict(torch.load('../trained_models/ensemble_model/mlp_model_split3.pth'))
model_3.eval()

MLP(
  (fc1): Linear(in_features=49, out_features=64, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=64, out_features=64, bias=True)
  (relu2): ReLU()
  (fc3): Linear(in_features=64, out_features=7, bias=True)
)

In [83]:
# Check preformace 
y_preds = []
y_trues = []

with torch.no_grad():
  for inputs, labels in test_loader:
    outputs = model_3(inputs)
    
    # get metrics
    y_preds += outputs.argmax(axis=1).tolist()
    y_trues += labels.tolist()

In [84]:
val_recall_per_class = recall_score(y_trues, y_preds, average=None) 

In [85]:
val_recall_per_class # Class 1 & 4 had little support

array([0.99900274, 0.56532663, 0.99521531, 0.99767703, 0.        ,
       0.99753217, 0.86637931])

## Ensemble Model

In [86]:
def Emodel(inputs):
  model_1 = MLP(num_features=49, hidden1_size=64, hidden2_size=64, num_classes=7)
  model_1.load_state_dict(torch.load('../trained_models/ensemble_model/mlp_model_split3.pth'))
  model_1.eval()

  model_2 = MLP(num_features=49, hidden1_size=64, hidden2_size=64, num_classes=7)
  model_2.load_state_dict(torch.load('../trained_models/ensemble_model/mlp_model_split3.pth'))
  model_2.eval()

  model_3 = MLP(num_features=49, hidden1_size=64, hidden2_size=64, num_classes=7)
  model_3.load_state_dict(torch.load('../trained_models/ensemble_model/mlp_model_split3.pth'))
  model_3.eval()

  with torch.no_grad():
    y1 = torch.argmax(model_1(inputs), dim=1)
    y2 = torch.argmax(model_2(inputs), dim=1)
    y3 = torch.argmax(model_3(inputs), dim=1)

  stacked_tensors = torch.stack((y1, y2, y3))
  preds, _ = torch.mode(stacked_tensors, dim=0)
  
  return preds
    

In [87]:
# Check preformace 
y_preds = []
y_trues = []

with torch.no_grad():
  for inputs, labels in test_loader:
    outputs = Emodel(inputs)
    
    # get metrics
    y_preds += outputs.tolist()
    y_trues += labels.tolist()

In [88]:
val_recall_per_class = recall_score(y_trues, y_preds, average=None) 

In [89]:
val_recall_per_class # Class 1 & 4 had little support

array([0.99900274, 0.56532663, 0.99521531, 0.99767703, 0.        ,
       0.99753217, 0.86637931])

## FGSM

In [90]:
# FGSM attack code
def fgsm_attack(data, epsilon, data_grad):
  # Collect the element-wise sign of the data gradient
  sign_data_grad = data_grad.sign()
  # Create the perturbed image by adjusting each pixel of the input image
  perturbed_image = data + epsilon*sign_data_grad
  # Adding clipping to maintain [0,1] range
  perturbed_image = torch.clamp(perturbed_image, 0, 1)
  # Return the perturbed image
  return perturbed_image

In [91]:
def test( model, test_loader, epsilon ):
  # Accuracy counter
  correct = 0
  adv_examples = []

  # Loop over all examples in test set
  for data, target in test_loader:
    if target == 0:
      pass # 0 is normal trafic
    # Set requires_grad attribute of tensor. Important for Attack
    data.requires_grad = True
    #print(data)

    # Forward pass the data through the model
    output = model(data)
    init_pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability

    # If the initial prediction is wrong, don't bother attacking, just move on
    if init_pred.item() != target.item():
      continue

    # Calculate the loss
    loss = F.nll_loss(output, target.flatten())

    # Zero all existing gradients
    model.zero_grad()

    # Calculate gradients of model in backward pass
    loss.backward()

    # Collect ``datagrad``
    data_grad = data.grad.data

    # Restore the data to its original scale
    data_denorm = data

    # Call FGSM Attack
    perturbed_data = fgsm_attack(data_denorm, epsilon, data_grad)

    # Re-classify the perturbed image
    output = model(perturbed_data)

    # Check for success
    final_pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
    if final_pred.item() == target.item():
      correct += 1
      # Special case for saving 0 epsilon examples
      if epsilon == 0 and len(adv_examples) < 5:
        adv_ex = perturbed_data.squeeze().detach().cpu().numpy()
        adv_examples.append( (init_pred.item(), final_pred.item(), adv_ex) )
    else:
      # Save some adv examples for visualization later
      if len(adv_examples) < 1000:
        adv_ex = perturbed_data.squeeze().detach().cpu().numpy()
        adv_examples.append( (init_pred.item(), final_pred.item(), adv_ex) )

  # Calculate final accuracy for this epsilon
  final_acc = correct/float(len(test_loader))
  print(f"Epsilon: {epsilon}\tTest Accuracy = {correct} / {len(test_loader)} = {final_acc}")

  # Return the accuracy and an adversarial example
  return final_acc, adv_examples

In [92]:
_, _, fgsm_test_loader = load_data(batch_size=1)

In [93]:
final_acc, adv_examples = test( model_0, fgsm_test_loader, epsilon=0.01)

Epsilon: 0.01	Test Accuracy = 483301 / 485145 = 0.996199074503499


In [94]:
adv_examples

[(3,
  0,
  array([0.46695694, 0.9132783 , 0.01      , 0.91400915, 0.01      ,
         0.9066657 , 0.3674646 , 0.74732065, 0.763556  , 0.77869856,
         0.71395785, 0.71723264, 0.74816036, 0.8498398 , 0.8287048 ,
         0.8587374 , 0.89287484, 0.8629353 , 0.94988734, 0.8078178 ,
         0.7762663 , 0.2652135 , 0.35029113, 0.01      , 0.8780042 ,
         0.87174857, 0.        , 0.        , 1.        , 0.        ,
         0.01      , 0.        , 0.01      , 0.83695316, 0.8728829 ,
         0.8588689 , 0.8413313 , 0.7677778 , 0.9134034 , 0.7402302 ,
         0.470981  , 0.7457558 , 0.        , 0.        , 0.        ,
         0.01      , 0.        , 0.        , 0.01      ], dtype=float32)),
 (3,
  0,
  array([0.46695694, 0.7527427 , 0.01      , 0.75840974, 0.01      ,
         0.9066657 , 0.3654097 , 0.7804964 , 0.7857317 , 0.4884985 ,
         0.7463589 , 0.777643  , 0.79815084, 0.6101001 , 0.8419595 ,
         0.8781542 , 0.8904242 , 0.8620254 , 0.9411984 , 0.7827728 ,
        

In [95]:
len(adv_examples)

1000

In [96]:
y_pred, miss_pred, data = adv_examples[0]

In [105]:
# Check preformace 
y_preds = []
y_trues = []

with torch.no_grad():
  for inputs, labels in test_loader:
    outputs = Emodel(inputs)
    
    # get metrics
    y_preds += outputs.tolist()
    y_trues += labels.tolist()

In [112]:
y_preds = []
y_trues = []

for i, (y_pred, miss_pred, data) in enumerate(adv_examples):
  outputs = Emodel(torch.tensor([data]))

  # get metrics
  y_preds += outputs.tolist()
  y_trues.append(y_pred)

In [114]:
y_preds_np = np.array(y_preds)
y_trues_np = np.array(y_trues)

In [120]:
success_attack_rate = (y_preds_np == y_trues_np).sum() / 1000
success_attack_rate

0.244

In [122]:
print(f"Of the 1000 successful attacks on original model only {success_attack_rate*100:.1f}% where successful against new model.")

Of the 1000 successful attacks on original model only 24.4% where successful against new model.
