# Adversarial-Aware Pruning Experiment
Inspired by our own work on the adversarial robustness of pruned neural networks, we try to find subnets that are less prone to adversarial attacks then their unpruned counterparts. Therefore it is sensible to choose a selection criterium that is related to adversarial attacks. We could therefore naively prune the neurons that have a high activation for adversarial examples, or prune the weights that have high gradients when passing adversarial examples.

In [1]:
from src.custom_modules import MaskedLinearLayer,MaskedConvLayer
from src.data_loader import load_torchvision_dataset
from src.training import _fit
from src.helpers import _evaluate_model
import torch
import pandas as pd
from time import time

from torch import nn, optim
import torch.nn.functional as F


if torch.cuda.is_available() == True:
    device = torch.device("cuda:0")
else:
    device = torch.device("cpu")
print(device)
dtype = torch.float32

cuda:0


# Create prunable Model and conduct standard training

In [218]:
class MaskedLinearLayer(nn.Module):
    """ Custom Linear layer with pruning mask"""
    def __init__(self, shape, bias=True, activation='relu'):
        super(MaskedLinearLayer, self).__init__()
        self.b, self.a = bias, activation
        weights = torch.empty(shape)
        self.weights = nn.Parameter(weights)  # nn.Parameter is a Tensor that's a module parameter.
        mask = torch.ones(shape)
        self.mask = nn.Parameter(mask, requires_grad=False)
        if self.b == True:
            bias = torch.zeros(self.weights.shape[-1])
            self.bias = nn.Parameter(bias)

        # initialize weights and biases
        nn.init.xavier_uniform_(self.weights)
        
        self.activations = torch.zeros((shape[1]))

    def forward(self, inputs):
        x = torch.mm(inputs, self.weights*self.mask)
        if self.b == True:
            x = torch.add(x, self.bias)
        if self.a == 'relu':
            x = F.relu(x)
        self.activations = x
        return x

class MaskedConvLayer(nn.Module):
    """ Custom Conv layer with pruning mask"""
    def __init__(self, shape, bias=True, stride=1, padding=0, activation=None):
        super(MaskedConvLayer, self).__init__()
        self.b, self.s, self.p, self.a = bias, stride, padding, activation
        weights = torch.empty(shape)
        self.weights = nn.Parameter(weights)  # nn.Parameter is a Tensor that's a module parameter.
        mask = torch.ones(shape)
        self.mask = nn.Parameter(mask, requires_grad=False)
        if self.b == True:
            bias = torch.zeros(self.weights.shape[0])
            self.bias = nn.Parameter(bias)

        # initialize weights and biases
        nn.init.xavier_uniform_(self.weights)
        
        self.activations = torch.zeros((shape[1]))

    def forward(self, inputs):
        x = F.conv2d(inputs, self.weights*self.mask, bias=None, stride=self.s, padding=self.p)
        if self.b == True:
            #reshape the bias
            b = self.bias.reshape((1, self.bias.shape[0], 1,1))
            x = torch.add(x, b)
        if self.a =='relu':
            x = F.relu(x)
        self.activations = x
        return x

class AAP_model(torch.nn.Module):
    def __init__(self):
        super(AAP_model, self).__init__()
        self.c1 = MaskedConvLayer((6, 1, 3, 3), padding=0, activation='relu')
        self.c2 = MaskedConvLayer((16, 6, 3, 3), padding=0, activation='relu')
        self.p1 = nn.AvgPool2d(2)
        self.p2 = nn.AvgPool2d(2)
        self.l1 = MaskedLinearLayer((400,500))

        self.l3 = MaskedLinearLayer((500,10))
        self.train_stats = pd.DataFrame()
        
    def forward(self,x):
        x = self.c1(x)
        x = self.p1(x)
        x = self.c2(x)
        x = self.p2(x)
        x = x.view(x.shape[0], x.shape[1]*x.shape[2]*x.shape[3])
#        x = torch.flatten(x, 1)
        x = self.l1(x)
#        x = self.l2(x)
        x = self.l3(x)
        return x

    def fit(self, train_data, val_data, epochs, device, eps = 8/255, number_of_replays=7, patience=None, evaluate_robustness=False):
        return _fit(self, train_data, val_data, epochs, device, patience=patience, evaluate_robustness=evaluate_robustness)
    
class MNIST_CNN(nn.Module):
    def __init__(self):
        super(MNIST_CNN, self).__init__()
        self.c1 = MaskedConvLayer((6, 1, 5, 5), padding=0, activation='relu')
        self.c2 = MaskedConvLayer((16, 6, 5, 5), padding=0, activation='relu')
        self.p1 = nn.AvgPool2d(2)
        self.p2 = nn.AvgPool2d(2)
        self.fc1 = MaskedLinearLayer((256, 128))
        self.fc2 = MaskedLinearLayer((128, 84))
        self.fc3 = MaskedLinearLayer((84, 10), activation=None)
        self.conv_weights, self.conv_masks, self.fully_connected_weights, self.fully_connected_masks = None, None, None, None

        self.train_stats = pd.DataFrame(columns=('epoch', 'train_loss', 'train_accuracy', 'validation_loss', 'validation_accuracy', 'duration', 'criterion', 'optimizer', 'method', 'learning_rate', 'batchsize'))
        
    def forward(self, inputs):
        x = self.c1(inputs)
        x = self.p1(x)
        x = self.c2(x)
        x = self.p2(x)
        x = x.view(x.shape[0], x.shape[1]*x.shape[2]*x.shape[3])
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x

In [219]:
model = MNIST_CNN().to(device)

In [204]:
model = AAP_model().to(device)

In [220]:
train_loader, val_loader = load_torchvision_dataset('MNIST')

In [221]:

model.fit(train_dl, test_dl, 1, device,)

AttributeError: 'MNIST_CNN' object has no attribute 'fit'

# Strip Down Training Function to Essentials

In [224]:

#model = AAP_model().to(device)
model = MNIST_CNN().to(device)
epochs = 50
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())
for epoch in range(epochs):  # loop over the dataset multiple times
    t0 = time()
    for i, data in enumerate(train_loader):
        if i>=i:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            # zero the parameter gradients
            optimizer.zero_grad()
            # forward + backward + optimize
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)

            batchsize = labels.size(0)
            correct = (predicted == labels).sum().item()
            accuracy = 100 * correct / batchsize
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
    t1 = time()
    accuracy, loss = _evaluate_model(model, val_loader, device, criterion)
    print('duration:', t1-t0,' - validation accuracy: ', accuracy,' - validation loss: ', loss)

print('Finished Training')


duration: 2.4367828369140625  - validation accuracy:  90.85  - validation loss:  0.3142783366143703
duration: 2.390641927719116  - validation accuracy:  93.95  - validation loss:  0.19823764488101006
duration: 2.401247024536133  - validation accuracy:  95.69  - validation loss:  0.13836825862526894
duration: 2.4281771183013916  - validation accuracy:  96.57  - validation loss:  0.10654706154018641
duration: 2.379926919937134  - validation accuracy:  97.09  - validation loss:  0.09151599630713463
duration: 2.380399465560913  - validation accuracy:  97.65  - validation loss:  0.07506840350106359
duration: 2.3980917930603027  - validation accuracy:  97.86  - validation loss:  0.07229888085275889
duration: 2.4158570766448975  - validation accuracy:  97.74  - validation loss:  0.06877827066928148
duration: 2.3753881454467773  - validation accuracy:  98.1  - validation loss:  0.06101765092462301
duration: 2.3956823348999023  - validation accuracy:  98.15  - validation loss:  0.05984558626078

In [290]:
inputs, labels = next(iter(val_loader))
inputs, labels = inputs.to(device), labels.to(device)
adv_inputs = pgd_attack(model, inputs, labels, eps=16/255)
optimizer.zero_grad()
# forward + backward + optimize
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()

tensor(0.2559, device='cuda:0')


In [297]:
s_c1 = model.c1.weights.grad.flatten().abs().argsort(); s_c1

tensor([ 88,   8, 116,  82,  78,  77,  13,  17,  48,  91,  83,   4,  72, 129,
         96, 149,  98, 139,  93, 134, 121,  87,  19,   9,  86, 109,   3, 144,
         42, 141,  21, 127, 111,  14,  45, 110,   2, 146,  81, 114,  18,  22,
         73,  99,  76,  41,  71, 132,  92,  46, 105,  12, 118,   7,  67,  97,
         43,  52,  24, 123,  94, 104,  53, 136, 124,  68, 126, 137,  47, 145,
        120, 115, 131,  89,  84, 117,  79,  16,  23, 128, 119, 122,  95,  37,
         20,  27,   1, 106,  57, 133, 100, 113, 142,  66,  49,  51,  36,  85,
         62,  32, 112,  90,  75,  11,  40,  80, 138, 140,   6,  26,  63,  31,
         58,  28,  38, 143, 125,  44,  56,  70, 135, 130,  54,  15, 147,  74,
         33,  61, 148,   0, 101,  69, 107,  10, 108,  35,   5,  25,  30,  65,
         39,  29, 103,  59, 102,  34,  50,  64,  60,  55], device='cuda:0')

In [298]:
s_fc3 = model.fc3.weights.grad.flatten().abs().argsort(); s_fc3

tensor([550, 551, 640, 641, 649, 648, 660, 661, 663, 662, 664, 665, 643, 642,
        646, 647, 667, 666, 669, 668, 645, 644, 748, 749, 554, 555, 556, 557,
        553, 552, 559, 558, 741, 740, 746, 747, 745, 744, 742, 743, 363, 362,
        332, 333, 394, 395, 399, 398, 390, 391, 396, 397, 393, 392, 467, 466,
        461, 460,  23,  22,  29,  28,  20,  21, 136, 137, 132, 133, 139, 138,
        135, 134, 130, 131,  24,  25,  27,  26, 337, 336, 335, 334, 330, 331,
        281, 280, 282, 283, 287, 286, 284, 285, 288, 289, 338, 339, 368, 369,
        360, 361, 365, 364, 366, 367, 469, 468, 464, 465, 462, 463, 573, 571,
        574, 577, 653, 578,  34, 575, 576, 572, 651,  36, 470, 652, 473, 475,
        654, 657, 658,  31, 472, 579, 570, 471, 476,  30, 200, 659, 686, 477,
        411, 655, 387, 346,  39,  37, 833, 416, 170, 685, 706, 681, 415, 700,
         38, 294, 520, 752, 702, 156,  32, 800, 813, 117, 406, 820, 684, 597,
        796, 413,  13, 729, 619, 754, 526, 804, 714, 150, 316, 4

In [299]:
adv_inputs = pgd_attack(model, inputs, labels, eps=16/255)
optimizer.zero_grad()
# forward + backward + optimize
outputs = model(adv_inputs)
loss = criterion(outputs, labels)
loss.backward()

tensor(0.2617, device='cuda:0')


In [300]:
a_c1 = model.c1.weights.grad.flatten().abs().argsort(); a_c1

tensor([ 13,  88,  78,  17,  83,   8,  91, 109, 121, 111, 118,  98, 146,  82,
         77,  93, 149,  96,  72,  48,  86, 139,   4,  19,   9, 123, 116,  21,
        144, 134,  87, 129,  81,   2,  42,  14,   3,  76,  45, 105, 127,  73,
         71, 104,  99, 141, 117, 110,  67,  18,  12,  46,  22,  52,   7,  92,
        132, 122,  41,  97,  24,  43, 126,  53,  68, 137,  94, 113, 136, 114,
        131,  47,  16,  79,  84, 112, 100, 106,  89, 115,  23, 120, 142,  95,
        145,  20,  57, 128,  66,  62,  27,  51,   1, 124,  49,  75,  85,  37,
        133,  80,  11, 138,  36,  90,  26,  32,   6,  40, 119,  63,  58, 143,
        147,  31,  56, 140, 125, 108, 107,  28,  61, 148, 101,  15,  38,  54,
         70,  44, 130,  74, 135,   0,  33,  10,  69,  25,   5,  35, 103,  65,
         30, 102,  39,  59,  29,  50,  64,  34,  60,  55], device='cuda:0')

In [301]:
a_fc3 = model.fc3.weights.grad.flatten().abs().argsort();a_fc3

tensor([550, 551, 640, 641, 649, 648, 660, 661, 663, 662, 664, 665, 643, 642,
        646, 647, 667, 666, 669, 668, 645, 644, 748, 749, 554, 555, 556, 557,
        553, 552, 559, 558, 741, 740, 746, 747, 745, 744, 742, 743, 363, 362,
        332, 333, 394, 395, 399, 398, 390, 391, 396, 397, 393, 392, 467, 466,
        461, 460,  23,  22,  29,  28,  20,  21, 136, 137, 132, 133, 139, 138,
        135, 134, 130, 131,  24,  25,  27,  26, 337, 336, 335, 334, 330, 331,
        281, 280, 282, 283, 287, 286, 284, 285, 288, 289, 338, 339, 368, 369,
        360, 361, 365, 364, 366, 367, 469, 468, 464, 465, 462, 463, 573, 571,
        574, 577, 653, 578,  34, 575, 576, 572,  36, 651, 652, 473, 475, 470,
        654, 657, 658,  31, 579, 570, 472, 471,  30, 476, 200, 659, 686, 477,
        411, 655, 387, 346, 833,  39, 416, 706, 685, 700, 681,  37, 415, 170,
        813, 502, 714,  38, 520, 752, 162, 156, 702, 413, 800,  32, 294, 736,
        820, 684,  13, 796, 729, 354, 117, 406, 150, 316, 526, 4

In [327]:
((a_c1-s_c1)/len(a_c1)).abs().sum()/len(a_c1)

tensor(0.3153, device='cuda:0')

In [317]:
(((a_fc3-s_fc3)/len(a_fc3)).abs()).sum()/len(a_fc3)

tensor(0.2682, device='cuda:0')

In [268]:
from foolbox import PyTorchModel, attacks
def pgd_attack(model, images, labels, eps=8/255):
    model.eval()
    fmodel = PyTorchModel(model, bounds=(0, 1))
    attack = attacks.LinfPGD()
    raw_advs, clipped_advs, success = attack(fmodel, images, labels, epsilons=eps)
    model.train()
    print(success.sum()/len(success))
    return clipped_advs

In [271]:
adv_inputs = pgd_attack(model, inputs, labels, eps=16/255)

tensor(0.2617, device='cuda:0')


In [328]:
a_c1

tensor([ 13,  88,  78,  17,  83,   8,  91, 109, 121, 111, 118,  98, 146,  82,
         77,  93, 149,  96,  72,  48,  86, 139,   4,  19,   9, 123, 116,  21,
        144, 134,  87, 129,  81,   2,  42,  14,   3,  76,  45, 105, 127,  73,
         71, 104,  99, 141, 117, 110,  67,  18,  12,  46,  22,  52,   7,  92,
        132, 122,  41,  97,  24,  43, 126,  53,  68, 137,  94, 113, 136, 114,
        131,  47,  16,  79,  84, 112, 100, 106,  89, 115,  23, 120, 142,  95,
        145,  20,  57, 128,  66,  62,  27,  51,   1, 124,  49,  75,  85,  37,
        133,  80,  11, 138,  36,  90,  26,  32,   6,  40, 119,  63,  58, 143,
        147,  31,  56, 140, 125, 108, 107,  28,  61, 148, 101,  15,  38,  54,
         70,  44, 130,  74, 135,   0,  33,  10,  69,  25,   5,  35, 103,  65,
         30, 102,  39,  59,  29,  50,  64,  34,  60,  55], device='cuda:0')

In [334]:
model.c1.mask.view(model.c1.mask.shape.prod())

AttributeError: 'torch.Size' object has no attribute 'prod'

In [2]:
model.c1.mask.shape

NameError: name 'model' is not defined