### MNIST

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torchvision
import torch.optim as optim
from datagen import *
from nets import *
from backdoor import Backdoor
from defense import Defense

# move to GPU (if applicable)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

  from .autonotebook import tqdm as notebook_tqdm


### Choosing Network and Dataset

In [2]:
# choose network configuration and dataset
backdoor = Backdoor(VggNet, config='16-layer', channels=1, classes=10, dropout=0.5)
backdoor.create_models(optim.SGD, nn.CrossEntropyLoss, lr=0.01)
backdoor.load_data(torchvision.datasets.MNIST, ImageEntity)

### Poison setup

In [3]:
# create patches, target labels, and define transformations/merges 
patches = ImagePatch((9, 9), 1, 'random').get_patches(len(backdoor))
targets = {i : i + 1 if i < 9 else 0 for i in range(10)}
transforms = ([ExpandTransform(1)], [RotateTransform()], [LabelTransform(targets)])
merge = ImageMerge(select=True)

### Poisoning

In [4]:
# poison 20% of the data
backdoor.poison(patches, transforms, merge, pct=0.2)

### Training Base and Trojan Net

In [5]:
# train the base DNN & the trojan DNN 
loss = backdoor.train(2, epochs=1, verbose=True, device=device, batch_size=16)

Training started
Epoch 1/1 | 24.99% | Loss: 0.2910 | Samples trained: 14992/60000
Epoch 1/1 | 49.97% | Loss: 0.1869 | Samples trained: 29984/60000
Epoch 1/1 | 74.96% | Loss: 0.1462 | Samples trained: 44976/60000
Epoch 1/1 | 99.95% | Loss: 0.1237 | Samples trained: 59968/60000
Epoch 1 complete | Loss: 0.1237
Training complete | Net Average Loss: 0.1237 | Total epochs: 1
Training started
Epoch 1/1 | 24.99% | Loss: 0.5477 | Samples trained: 14992/60000
Epoch 1/1 | 49.97% | Loss: 0.3561 | Samples trained: 29984/60000
Epoch 1/1 | 74.96% | Loss: 0.2793 | Samples trained: 44976/60000
Epoch 1/1 | 99.95% | Loss: 0.2321 | Samples trained: 59968/60000
Epoch 1 complete | Loss: 0.2319
Training complete | Net Average Loss: 0.2319 | Total epochs: 1


### Evaluating Backdoor

In [6]:
# evulate backdoor based on VA & ASR
metrics = backdoor.eval(verbose=True, device=device)

Accuracy on Clean | Base 98.65% | Trojan 98.44% | Difference -0.21%
Base Accuracy on Poison 0.88% | Attack Success Rate (ASR): 95.00%
Average Tensor Distance: 255.00 | Net Tensor Difference 15300222.53


### Creating a Defense

In [7]:
# init the defense
defense = Defense(backdoor)

### Detection setup

In [8]:
# get base & trojan DNN, both clean & poisoned entitysets, sizes ranges to create synthetic patch
base, trojan = backdoor.get_net_modules()
cleantrain, poisontrain, cleantest, poisontest = backdoor.get_datasets()
patch_size_ranges = [(3, 3), (5, 5), (7, 7), (9, 9), (11, 11)]

### Detection Base

In [9]:
# detect backdoor with 30% of the clean samples poisoned and detect based on a 10% drop in VA for base DNN
metrics = defense.detect(base, cleantest, threshold=0.1, size_ranges=patch_size_ranges,  pct=0.3, verbose=True, device=device)

Patch size (3, 3) | Synthetic Poison Accuracy 98.58% | Original Accuracy 98.65% | Difference -0.07%
Patch size (5, 5) | Synthetic Poison Accuracy 98.25% | Original Accuracy 98.65% | Difference -0.40%
Patch size (7, 7) | Synthetic Poison Accuracy 97.32% | Original Accuracy 98.65% | Difference -1.33%
Patch size (9, 9) | Synthetic Poison Accuracy 94.76% | Original Accuracy 98.65% | Difference -3.89%
Patch size (11, 11) | Synthetic Poison Accuracy 91.21% | Original Accuracy 98.65% | Difference -7.44%
Average Accuracy 96.02% | Average Difference -2.63% | Lowest Score: 91.21% | Likihood of Backdoor: Low


### Detection Trojan

In [10]:
# detect backdoor with 30% of the clean samples poisoned and detect based on a 10% drop in VA for trojan DNN
metrics = defense.detect(trojan, cleantest, threshold=0.1, size_ranges=patch_size_ranges,  pct=0.3, verbose=True, device=device)

Patch size (3, 3) | Synthetic Poison Accuracy 98.27% | Original Accuracy 98.44% | Difference -0.17%
Patch size (5, 5) | Synthetic Poison Accuracy 98.05% | Original Accuracy 98.44% | Difference -0.39%
Patch size (7, 7) | Synthetic Poison Accuracy 93.83% | Original Accuracy 98.44% | Difference -4.61%
Patch size (9, 9) | Synthetic Poison Accuracy 71.33% | Original Accuracy 98.44% | Difference -27.11%
Patch size (11, 11) | Synthetic Poison Accuracy 69.05% | Original Accuracy 98.44% | Difference -29.39%
Average Accuracy 86.11% | Average Difference -12.33% | Lowest Score: 69.05% | Likihood of Backdoor: High


### Blocking Patches setup

In [11]:
# create patch to compare variances with in parts of images (same size as patch), define samples to block
patch = ImagePatch((10, 10), 1, 'random')
n = 5000

### Blocking

In [12]:
# block poisoned images with average rgb values the same dimensions as the patch
blockedloader = defense.block(poisontest, patch, n=n)

### Testing after Block Base

In [13]:
# retest after blocking for base DNN
metrics = defense.test(base, blockedloader, verbose=True, device=device)

Testing started
24.84% Testing complete | Loss: 4.5969 | Accuracy: 0.0441
49.68% Testing complete | Loss: 4.5363 | Accuracy: 0.0425
74.52% Testing complete | Loss: 4.5544 | Accuracy: 0.0433
99.36% Testing complete | Loss: 4.5524 | Accuracy: 0.0439
Testing complete | Loss: 4.5513 | Accuracy: 4.42%


### Testing after Block Trojan

In [14]:
# retest after blocking for trojan DNN
metrics = defense.test(trojan, blockedloader, verbose=True, device=device)

Testing started
24.84% Testing complete | Loss: 0.9353 | Accuracy: 0.7067
49.68% Testing complete | Loss: 0.9705 | Accuracy: 0.7027
74.52% Testing complete | Loss: 0.9552 | Accuracy: 0.7027
99.36% Testing complete | Loss: 0.9411 | Accuracy: 0.7031
Testing complete | Loss: 0.9479 | Accuracy: 70.28%


### Defense by retraining

In [15]:
# get a clean dataloader, reset the defense net module, retrain the defense net model (og trojan model)
dataloader = cleantrain.get_dataloader()
defense.reset()
loss = defense.retrain(dataloader, epochs=1, verbose=True, device=device, batch_size=16)

Training started
Epoch 1/1 | 24.96% | Loss: 0.1401 | Samples trained: 14976/60000
Epoch 1/1 | 49.92% | Loss: 0.1414 | Samples trained: 29952/60000
Epoch 1/1 | 74.88% | Loss: 0.1391 | Samples trained: 44928/60000
Epoch 1/1 | 99.84% | Loss: 0.1326 | Samples trained: 59904/60000
Epoch 1 complete | Loss: 0.1326
Training complete | Net Average Loss: 0.1326 | Total epochs: 1


### Evaluation after retraining

In [16]:
# re-evaluate the trojan DNN
cleanloader, poisonloader = cleantest.get_dataloader(), poisontest.get_dataloader()
metrics = defense.eval(cleanloader, poisonloader, verbose=True, device=device)

Accuracy on clean | Base 98.65% | Trojan 98.44% | Defense 94.99%
Accuracy on Posion | Base 0.88% | Defense 82.15% | Trojan ASR 95.00%
Difference from Baseline | Trojan -0.21% | Defense -3.66%
Defense Effectiveness | 12.85% decrease in ASR


In [17]:
defense.view_named_modules()

avgpool: AdaptiveAvgPool2d(output_size=(7, 7))
drop: Dropout(p=0.5, inplace=False)
fc1: Linear(in_features=25088, out_features=4096, bias=True)
fc2: Linear(in_features=4096, out_features=4096, bias=True)
fc3: Linear(in_features=4096, out_features=10, bias=True)
layers: Sequential(
  (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (4): MaxPool2d(kernel_size=(2, 2), stride=2, padding=1, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running

### Pruning setup

In [18]:
# determine layers to prune, decide percent of neurons to prune, reset the defense (original trojan DNN)
layers = ['layers.28', 'layers.26', 'layers.24', 'layers.21']
amount = 0.5
defense.reset()

### Pruning based on l-Infinity norm of weights from layers

In [19]:
# prune 50% the weights of last 4 convolutional layers based on the l-Infinity norm
defense.prune(layers, amount)

### Evaluation after prune

In [20]:
# evaluate the model after pruning
metrics = defense.eval(cleanloader, poisonloader, verbose=True, device=device)

Accuracy on clean | Base 98.65% | Trojan 98.44% | Defense 8.92%
Accuracy on Posion | Base 0.88% | Defense 9.82% | Trojan ASR 95.00%
Difference from Baseline | Trojan -0.21% | Defense -89.73%
Defense Effectiveness | 85.18% decrease in ASR


### Retraining to recover

In [21]:
# retrain to recover
loss = defense.retrain(dataloader, epochs=1, verbose=True, device=device, batch_size=16)

Training started
Epoch 1/1 | 24.96% | Loss: 0.5925 | Samples trained: 14976/60000
Epoch 1/1 | 49.92% | Loss: 0.5923 | Samples trained: 29952/60000
Epoch 1/1 | 74.88% | Loss: 0.5905 | Samples trained: 44928/60000
Epoch 1/1 | 99.84% | Loss: 0.5867 | Samples trained: 59904/60000
Epoch 1 complete | Loss: 0.5867
Training complete | Net Average Loss: 0.5867 | Total epochs: 1


### Evaluation after retraining

In [22]:
metrics = defense.eval(cleanloader, poisonloader, verbose=True, device=device)

Accuracy on clean | Base 98.65% | Trojan 98.44% | Defense 93.84%
Accuracy on Posion | Base 0.88% | Defense 24.60% | Trojan ASR 95.00%
Difference from Baseline | Trojan -0.21% | Defense -4.81%
Defense Effectiveness | 70.40% decrease in ASR
