# Import

In [None]:
import os
import numpy as np
import pandas as pd
import time
import argparse
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader

import torchvision
import torchvision.transforms as transforms

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import roc_auc_score

import matplotlib.pyplot as plt

path = "/content/gdrive/My Drive/GameTheory/"

gpu = True
gpu = gpu and torch.cuda.is_available()
device = torch.device("cuda" if gpu else "cpu")

'''hparameters'''
num_classes = 10


learningRate = 0.01
weightDecay = 5e-4

best_acc = 0     # best test accuracy
start_epoch = 0  # start from epoch 0 or last checkpoint epoch

# CIFAR10 Data
------------------------------------
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))

In [None]:
print('==> Preparing data..')
transformtrain = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transformtest = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

"""[-1,1]"""
trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transformtrain)
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=256, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transformtest)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=256, shuffle=False, num_workers=2)

attackloader = torch.utils.data.DataLoader(
    testset, batch_size=1, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')

# Attack Images Generation
------------------------
batch_size=1

## FGSM
-------------------
epsilon=2/256

In [None]:
# FGSM attack code
"""epsilon=2/256"""

def fgsm_attack(image, epsilon, data_grad):
    # Collect the element-wise sign of the data gradient
    sign_data_grad = data_grad.sign()
    # Create the perturbed image by adjusting each pixel of the input image
    perturbed_image = image + epsilon*sign_data_grad
    # Adding clipping to maintain [0,1] range
    perturbed_image = torch.clamp(perturbed_image, 0, 1)
    # Return the perturbed image
    return perturbed_image

def fgsm_attack2(image, epsilon, data_grad, mask):
    
    # Collect the element-wise sign of the data gradient
    sign_data_grad = torch.mul(data_grad.sign(), mask.view(-1, 1, 1, 1))

    # Create the perturbed image by adjusting each pixel of the input image
    perturbed_image = image + epsilon*sign_data_grad
    # Adding clipping to maintain [0,1] range
    perturbed_image = torch.clamp(perturbed_image, 0, 1)
    # Return the perturbed image
    return perturbed_image


def generate_fgsm_attack(model, test_loader, epsilon):
    '''
    return: attack instance generated, and laebl 
    '''
    
    # Accuracy counter
    correct = 0
    adv_examples = []

    # Loop over all examples in test set
    for data, target in test_loader:

        # Send the data and label to the device
        data, target = data.to(device), target.to(device)

        # Set requires_grad attribute of tensor. Important for Attack
        data.requires_grad = True

        # Forward pass the data through the model
        output = model(data)
        init_pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability

        # mask: 1 for correct, only update grad on correct image
        mask = torch.eq(init_pred.flatten(), target.flatten()).float()

        # Calculate the loss
        loss = criterion(output, target)

        # Zero all existing gradients
        model.zero_grad()

        # Calculate gradients of model in backward pass
        loss.backward()

        # Collect datagrad
        data_grad = data.grad.data

        # Call FGSM Attack
        # perturbed_data = fgsm_attack2(data, epsilon, data_grad, mask)
        perturbed_data = fgsm_attack(data, epsilon, data_grad)

        adv_ex = perturbed_data.squeeze().detach().cpu().numpy()
        adv_examples.append((target.flatten().detach().cpu().numpy(), adv_ex))
        # pred_list.append((init_pred.flatten().detach().cpu().numpy(), final_pred.flatten().detach().cpu().numpy()))
        
    label = [j for i in adv_examples for j in i[0]]
    adv_ex = [j for i in adv_examples for j in i[1]]

    # Return the accuracy and an adversarial example
    return adv_ex, label


class AdvDataset(Dataset):
    def __init__(self, data, label):
        self.data = torch.Tensor(data)
        self.label = torch.Tensor(label)

    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        return self.data[idx], self.label[idx]


In [None]:
adv_ex, label = generate_fgsm_attack(pretrained_model, testloader, 2/256)
attackdataset = AdvDataset(adv_ex, label)
attackloader = torch.utils.data.DataLoader(attackdataset, batch_size=1, shuffle=True, num_workers=2)

## PGD 
------------------------
eps: maximum distortion of adversarial example compared to original input

eps_iter: step size for each attack iteration

nb_iter: Number of attack iterations.

In [None]:
"""lamda=2"""

def pgd_attack(model, images, labels, eps=0.3, alpha=2/256, iters=5) :
    images = images.to(device)
    labels = labels.to(device)
    # loss = nn.CrossEntropyLoss()
        
    ori_images = images.data
        
    for i in range(iters) :    
        images.requires_grad = True
        outputs = model(images)

        model.zero_grad()
        loss = criterion(outputs, labels)
        loss.backward()

        adv_images = images + alpha*images.grad.sign()
        eta = torch.clamp(adv_images - ori_images, min=-eps, max=eps)
        images = torch.clamp(ori_images + eta, min=0, max=1).detach_()
            
    return images

# Model

## others

In [None]:
class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion *
                               planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

In [None]:
def init_weights(m):
    if type(m) == nn.Conv2d or type(m) == nn.Linear:
        torch.nn.init.xavier_normal_(m.weight.data)


def ResNet50():
    return ResNet(Bottleneck, [3, 4, 6, 3])


# def ResNet50SAP():
#     return ResNetSAP(BottleneckSAP, [3, 4, 6, 3])


def ResNet34():
    return ResNet(BasicBlock, [3, 4, 6, 3])


def ResNet101():
    return ResNet(Bottleneck, [3, 4, 23, 3])


def ResNet152():
    return ResNet(Bottleneck, [3, 8, 36, 3])

## Non SAP

In [None]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

In [None]:
class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

## SAP

In [None]:
import random

class RandomSAP(nn.Module):
    """
    The original paper is https://arxiv.org/abs/1803.01442.
    ----------
    self.is_valid bool : if this flag is True, inject SAP.
    """
    def __init__(self, is_valid=False):
        """
        Parameters
        ----------
        ratio float : ratio of pruning which can be larger than 1.0.
        is_valid bool : if this flag is True, inject SAP.
        """
        super(RandomSAP, self).__init__()
        self.is_valid = is_valid

    def forward(self, inputs):
        """
        If self.training or not self.is_valid, just return inputs.
        If self.is_valid apply SAP to inputs and return the result tensor.
        Parameters
        ----------
        inputs torch.Tensor : input tensor whose shape is [b, c, h, w].
        Returns
        -------
        outputs torch.Tensor : just return inputs or stochastically pruned inputs.
        """
        if not self.is_valid:
            return inputs
        else:
            b, c, h, w = inputs.shape   # (batchsize, channelsize, imagesize)
            inputs_1d = inputs.reshape([b, c * h * w])  # [b, c * h * w]
            # inputs_len = len(inputs_1d)  # = b
            
            # torch.manual_seed(10)
            drop_prob = torch.rand(inputs_1d.shape, dtype=torch.float64)
            drop_node = torch.where(drop_node>0.5, drop_node, 0.)
            drop_node = torch.DoubleTensor(drop_node).float().cuda() 
            outputs = drop_node.reshape([b, c, h, w])  # [b, c, h, w]

        return outputs

In [None]:
from torch.distributions.multinomial import Multinomial

class SAP(nn.Module):
    """SimpleModel represents a nn.Module of Stochastic Activation Pruning.
    The original paper is https://arxiv.org/abs/1803.01442.
    Attributes
    ----------
    self.ratio float : ratio of pruning which can be larger than 1.0.
    self.is_valid bool : if this flag is True, inject SAP.
    """
    def __init__(self, ratio=1, is_valid=False):
        """
        Parameters
        ----------
        ratio float : ratio of pruning which can be larger than 1.0.
        is_valid bool : if this flag is True, inject SAP.
        """
        super(SAP, self).__init__()
        self.ratio = ratio
        self.is_valid = is_valid

    def forward(self, inputs):
        """
        If self.training or not self.is_valid, just return inputs.
        If self.is_valid apply SAP to inputs and return the result tensor.
        Parameters
        ----------
        inputs torch.Tensor : input tensor whose shape is [b, c, h, w].
        Returns
        -------
        outputs torch.Tensor : just return inputs or stochastically pruned inputs.
        """
        # print("SAP: ", self.is_valid)
        # if self.training or not self.is_valid:
        if not self.is_valid:
            return inputs
        else:
            b, c, h, w = inputs.shape
            inputs_1d = inputs.reshape([b, c * h * w])  # [b, c * h * w]
            # print(inputs_1d)
            outputs = torch.zeros_like(inputs_1d)  # outputs with 0 initilization
           
            inputs_1d_sum = torch.sum(torch.abs(inputs_1d), dim=-1, keepdim=True)
            inputs_1d_prob = torch.abs(inputs_1d) / inputs_1d_sum
            
            # r: num_nodes
            num_sample = int(c * h * w * self.ratio)  

            # multinomial(total_count:int, probs:tensor, logits:tensor)
            idx = Multinomial(num_sample, inputs_1d_prob).sample()

            # if nonzero, keep; else, drop, let be zeroes
            outputs[idx.nonzero(as_tuple=True)] = inputs_1d[idx.nonzero(as_tuple=True)]

            # pdb.set_trace()
            # scale up
            outputs = outputs / (1 - (1-inputs_1d_prob)**num_sample + 1e-12)
            outputs = outputs.reshape([b, c, h, w])  # [b, c, h, w]
            # print("OUT: ", outputs)
        return outputs

In [None]:
class BasicBlockSAP(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1, is_valid=False):
        super(BasicBlockSAP, self).__init__()
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

        self.sap1 = SAP(is_valid=is_valid)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        out = self.sap1(out)
        return out

In [None]:
class ResNetSAP(nn.Module):
    """Model represents a model mainly used in experiments.
    Attributes
    ----------
    self.num_classes int : number of classes of dataset.
    self.layers nn.ModuleDict : ModuleDict of models.
    """
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNetSAP, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        # self.sap = SAP(is_valid)
        # self.sap1 = nn.Dropout(0.5)

        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2, is_valid=True)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride, is_valid=False):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride, is_valid))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        # out = self.sap(out, batch_idx)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


In [None]:
def init_weights(m):
    if type(m) == nn.Conv2d or type(m) == nn.Linear:
        torch.nn.init.xavier_normal_(m.weight.data)
        
def ResNet18():
    return ResNet(BasicBlock, [2, 2, 2, 2])

def ResNet18SAP():
    return ResNetSAP(BasicBlockSAP, [2, 2, 2, 2])

# Free-Training

In [None]:
def train(net, epoch, trainloader):
    net.train()

    train_loss = 0
    correct = 0
    total = 0
    
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
        
        torch.cuda.empty_cache()
        del inputs
        del targets

    acc = correct/total
    avg_loss = train_loss/total
    
    return avg_loss, acc


def test(net, testloader):
    net.eval()

    test_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
            
            torch.cuda.empty_cache()
            del inputs
            del targets
            
    acc = correct/total
    avg_loss = test_loss/total
    
    return avg_loss, acc

# Adv-training

## FGSM

In [None]:
def fgsm_train(net, epoch, trainloader, eps=2/255):
    print('\nEpoch: %d' % epoch)
    net.train()

    train_loss = 0
    correct = 0
    total = 0
    
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to(device), targets.to(device)
        
        inputs.requires_grad = True

        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        data_grad = inputs.grad.data
        perturbed_data = fgsm_attack(inputs, eps, data_grad)
        new_outputs = net(perturbed_data)
        new_loss = criterion(new_outputs, targets)
        new_loss.backward()
        
        optimizer.step()

        train_loss += new_loss.item()
        _, new_predicted = new_outputs.max(1)
        total += targets.size(0)
        correct += new_predicted.eq(targets).sum().item()
        
        torch.cuda.empty_cache()
        del inputs
        del targets

        # progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
        #              % (train_loss/(batch_idx+1), 100.*correct/total, correct, total))
    acc = correct/total
    avg_loss = train_loss/total
    
    return avg_loss, acc

## PGD

In [None]:
cifar10_mean = (0.4914, 0.4822, 0.4465)
cifar10_std = (0.2471, 0.2435, 0.2616)

mu = torch.tensor(cifar10_mean).view(3,1,1).cuda()
std = torch.tensor(cifar10_std).view(3,1,1).cuda()

upper_limit = ((1 - mu) / std)
lower_limit = ((0 - mu) / std)

epsilon = (2/256.) / std

step_size = 2

iters = 5

In [None]:
def clamp(X, lower_limit, upper_limit):
    return torch.max(torch.min(X, upper_limit), lower_limit)

# Training
def pgd_train(model, epoch, epsilon):
    # start_train_time = time.time()

    # logger.info('Epoch \t Seconds \t LR \t \t Train Loss \t Train Acc')

# for epoch in range(args.epochs):
# for epoch in range(start_epoch, start_epoch+20):
    start_epoch_time = time.time()
    
    train_loss = 0
    train_acc = 0
    train_n = 0

    for i, (X, y) in enumerate(trainloader):
        X, y = X.cuda(), y.cuda()
        delta = torch.zeros_like(X).cuda()

        # if args.delta_init == 'random':
        for i in range(len(epsilon)):
            delta[:, i, :, :].uniform_(-epsilon[i][0][0].item(), epsilon[i][0][0].item())
        delta.data = clamp(delta, lower_limit-X, upper_limit-X)

        delta.requires_grad = True
        for _ in range(iters):
            output = model(X + delta)
            loss = criterion(output, y)

            # with amp.scale_loss(loss, optimizer) as scaled_loss:
            loss.backward()
            
            grad = delta.grad.detach()
            delta.data = clamp(delta + step_size*torch.sign(grad), -epsilon, epsilon)
            delta.data = clamp(delta, lower_limit-X, upper_limit-X)
            delta.grad.zero_()
        
        delta = delta.detach()
        output = model(X + delta)
        loss = criterion(output, y)
        
        optimizer.zero_grad()
        # with amp.scale_loss(loss, optimizer) as scaled_loss:
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item() * y.size(0)
        train_acc += (output.max(1)[1] == y).sum().item()
        train_n += y.size(0)
        
        # scheduler.step()
    epoch_time = time.time()
    # lr = scheduler.get_lr()[0]
    # logger.info('%d \t %.1f \t \t %.4f \t %.4f \t %.4f',
    #             epoch, epoch_time-start_epoch_time, train_loss/train_n, train_acc/train_n)
    
    print(epoch_time-start_epoch_time)
    return train_loss/train_n, train_acc/train_n

#Confusion Matrix

In [None]:
"""batch=1"""

def get_matrix(net, testloader):
    net.eval()

    test_loss = 0
    correct = 0
    total = 0
    
    matrix = np.zeros((10,10))
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
            
            matrix[targets.detach().cpu().numpy(),predicted.detach().cpu().numpy()] += 1


            torch.cuda.empty_cache()
            del inputs
            del targets
            
    acc = correct/total
    avg_loss = test_loss/total
    
    return avg_loss, acc, matrix

In [None]:
import pandas as pd
import csv

%cd /content/gdrive/"My Drive"/GameTheory/

def write_matrix(matrix, filename):
    with open(filename, "w", newline='') as f:
        file = csv.writer(f, delimiter=',')
        file.writerow(["target", "predict"])
        for i, row in enumerate(matrix):
            file.writerow([i, row])

In [None]:
get_matrix(pretrained_model, attack_loader)

In [None]:
get_matrix(confusemodel, attack_loader)

In [None]:
import gc

del confusemodel

gc.collect()

torch.cuda.empty_cache()

In [None]:
criterion = nn.CrossEntropyLoss()
modelpath = path + "ResNet18_19.pth"
# /content/gdrive/MyDrive/GameTheory/Adc_ResNet50_19.pth
confusemodel = ResNet18().to(device)
confusemodel.load_state_dict(torch.load(modelpath)['model_state_dict'])
pretrained_dict = confusemodel.state_dict()
# confusemodel

In [None]:
loss,acc,m = get_matrix(confusemodel, attack_loader)

In [None]:
write_matrix(m, "ResNet18_20epoch.csv")

# Gradients

In [None]:
def get_grad(model, test_loader):
    '''
    return: Gradient of image, num_imgs x channel x w x h,
    10000 x 3 x 32 x 32
    '''
    grad_list = []
    # Loop over all examples in test set
    for data, target in test_loader:

        # Send the data and label to the device
        data, target = data.to(device), target.to(device)

        # Set requires_grad attribute of tensor. Important for Attack
        data.requires_grad = True

        # Forward pass the data through the model
        output = model(data)

        # Calculate the loss
        loss = criterion(output, target)

        # Zero all existing gradients
        model.zero_grad()

        # Calculate gradients of model in backward pass
        loss.backward()

        # Collect datagrad
        data_grad = data.grad.data

        grad_list.append(data_grad.detach().cpu().numpy())

        del data, target, data_grad, output
    
    torch.cuda.empty_cache()
    return np.concatenate(grad_list)

# Attack

## Grey-box

In [None]:
def grey_box_attack(model, attack_loader):
    '''
    attack_loader: dataloader of attack instance generated by resnet18 free-trained model
    '''

    model.eval()

    correct = 0.
    for data, target in attack_loader:

        # Send the data and label to the device
        data, target = data.to(device), target.to(device)

        # Forward pass the data through the model
        output = model(data)
        pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
        # print("predict: ", pred.detach().cpu().numpy(), target.detach().cpu().numpy())
        # Zero all existing gradients
        model.zero_grad()

        # calculate correct prediction
        correct += torch.sum(torch.eq(pred.flatten(), target.flatten())).item()
        # Special case for saving 0 epsilon examples

    # Calculate final accuracy for this epsilon
    final_acc = correct/float(len(attack_loader))

    # print("Epsilon: {}\tTest Accuracy = {} / {} = {}".format(epsilon, correct, len(testloader), acc))
    # Return the accuracy and an adversarial example
    return final_acc

## Non SAP

In [None]:
"""
perturbed_data = fgsm_attack(inputs, epsilon, data_grad)
"""

def testattack(net, testloader, epsilon):
    # global best_acc
    net.eval()

    # test_loss = 0
    correct = 0
    # total = 0
    # with torch.no_grad():
    for b, (inputs, targets) in enumerate(testloader):
        inputs, targets = inputs.to(device), targets.to(device)
        # inputs, targets = Variable(inputs), Variable(targets)-1
        
        inputs.requires_grad = True
        outputs = net(inputs)
        _, predicted = outputs.max(1)
        # if predicted.item() != targets.item():
        #     continue
        loss = criterion(outputs, targets)
        net.zero_grad()

        # test_loss += loss.item()
        loss.backward()
        data_grad = inputs.grad.data
        perturbed_data = fgsm_attack(inputs, epsilon, data_grad)
        new_outputs = net(perturbed_data)
        _, new_predicted = new_outputs.max(1)

        # total += targets.size(0)
        correct += new_predicted.eq(targets).sum().item()

        torch.cuda.empty_cache()
        del inputs
        del targets
        
    acc = correct/float(len(testloader))
    # avg_loss = test_loss/total
    print("Epsilon: {}\tTest Accuracy = {} / {} = {}".format(epsilon, correct, len(testloader), acc))

    return acc

## SAP

In [None]:
import pdb
import time
def testattackSAP(net, net_sap, testloader, epsilon):
    # global best_acc
    net.eval()

    # test_loss = 0
    correct = 0.
    correct_sap = 0.
    # total = 0
    # with torch.no_grad():
    nonsap = []
    nonsap_att = []
    sap = []
    sap_att = []
    ytrue = []
    
    start = time.time()
    for b, (inputs, targets) in enumerate(testloader):
        inputs, targets = inputs.to(device), targets.to(device)
        # inputs, targets = Variable(inputs), Variable(targets)-1
        
        inputs.requires_grad = True

        outputs = net(inputs)
        outputs_sap = net_sap(inputs)
        # nonsap.append(outputs.detach().cpu().numpy())
        # sap.append(outputs_sap.detach().cpu().numpy())
        # print("[non sap]: ",outputs, "\n[sap]: ", outputs_sap, "\n")
        
        _, predicted = outputs.max(1)
        _, predicted_sap = outputs_sap.max(1)
        # if predicted.item() != targets.item():
        #     continue
        loss = criterion(outputs, targets)
        # loss_sap = criterion(outputs_sap, targets)

        net.zero_grad()
        # net_sap.zero_grad()

        # test_loss += loss.item()
        loss.backward()
        # loss_sap.backward()

        # pdb.set_trace()
        data_grad = inputs.grad.data
        perturbed_data = fgsm_attack(inputs, epsilon, data_grad)
        
        new_outputs = net(perturbed_data)
        _, new_predicted = new_outputs.max(1)
        new_outputs_sap = net_sap(perturbed_data)
        _, new_predicted_sap = new_outputs_sap.max(1)

        # print("nonsap: ", new_predicted)
        # print("sap: ", new_predicted_sap, "\n")
        nonsap.append(predicted.detach().cpu().numpy())
        nonsap_att.append(new_predicted.detach().cpu().numpy())
        sap.append(predicted_sap.detach().cpu().numpy())
        sap_att.append(new_predicted_sap.detach().cpu().numpy())
        ytrue.append(targets.detach().cpu().numpy())
        # total += targets.size(0)
        # correct += torch.sum(torch.eq(pred.flatten(), targets.flatten())).item()
        correct += new_predicted.eq(targets).sum().item()
        correct_sap += new_predicted_sap.eq(targets).sum().item()

        torch.cuda.empty_cache()
        del inputs
        del targets
        
    acc = correct / float(len(testloader))
    acc_sap = correct_sap / float(len(testloader))
    # avg_loss = test_loss/total
    print("Time: ", time.time()-start)
    print("Epsilon: {}\tTest Accuracy = {} / {} = {}".format(epsilon, correct, len(testloader), acc))
    print("Epsilon: {}\tSAP Test Accuracy = {} / {} = {}".format(epsilon, correct_sap, len(testloader), acc_sap))

    return acc,acc_sap,nonsap,nonsap_att,sap,sap_att,ytrue

## Multi-SAP


In [None]:
def testattackMultiSAP(net, net_sap, testloader, epsilon):
    # global best_acc
    net.eval()

    # test_loss = 0
    # correct = 0.
    correct_sap = 0.
    # total = 0
    # with torch.no_grad():
    # nonsap = []
    sap = []
    ytrue = []
    
    start = time.time()
    for b, (inputs, targets) in enumerate(testloader):
        inputs, targets = inputs.to(device), targets.to(device)
        # inputs, targets = Variable(inputs), Variable(targets)-1
        
        inputs.requires_grad = True

        # outputs = net(inputs)

        for i in range(10):
            outputs_sap = net_sap(inputs)
        # nonsap.append(outputs.detach().cpu().numpy())
        # sap.append(outputs_sap.detach().cpu().numpy())
        # print("[non sap]: ",outputs, "\n[sap]: ", outputs_sap, "\n")
        
        # _, predicted = outputs.max(1)
        _, predicted_sap = outputs_sap.max(1)
        # if predicted.item() != targets.item():
        #     continue
        # loss = criterion(outputs, targets)
        loss_sap = criterion(outputs_sap, targets)

        # net.zero_grad()
        net_sap.zero_grad()

        # test_loss += loss.item()
        # loss.backward()
        loss_sap.backward()

        # pdb.set_trace()
        data_grad = inputs.grad.data
        perturbed_data = fgsm_attack(inputs, epsilon, data_grad)
        
        # new_outputs = net(perturbed_data)
        # _, new_predicted = new_outputs.max(1)
        new_outputs_sap = net_sap(perturbed_data)
        _, new_predicted_sap = new_outputs_sap.max(1)

        # print("nonsap: ", new_predicted)
        # print("sap: ", new_predicted_sap, "\n")
        # nonsap.append(new_predicted.detach().cpu().numpy())
        sap.append(new_predicted_sap.detach().cpu().numpy())
        ytrue.append(targets.detach().cpu().numpy())
        # total += targets.size(0)
        # correct += torch.sum(torch.eq(pred.flatten(), targets.flatten())).item()
        correct += new_predicted.eq(targets).sum().item()
        correct_sap += new_predicted_sap.eq(targets).sum().item()

        torch.cuda.empty_cache()
        del inputs
        del targets
        
    acc = correct / float(len(testloader))
    acc_sap = correct_sap / float(len(testloader))
    # avg_loss = test_loss/total
    print("Time: ", time.time()-start)
    print("Epsilon: {}\tTest Accuracy = {} / {} = {}".format(epsilon, correct, len(testloader), acc))
    print("Epsilon: {}\tSAP Test Accuracy = {} / {} = {}".format(epsilon, correct_sap, len(testloader), acc_sap))

    return acc,nonsap,sap,ytrue

# Main

In [None]:
import gc

del resmodel

gc.collect()

torch.cuda.empty_cache()

In [None]:
"""ResNet18 Model"""
resmodel = ResNet18().to(device)
resmodel.apply(init_weights)

if device == 'cuda':
    resmodel = torch.nn.DataParallel(resmodel)
    cudnn.benchmark = True

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(resmodel.parameters(), lr=learningRate, weight_decay=weightDecay, momentum=0.9)

## FGSM train
----------------------
lamda=2, epsilon=2/256

In [None]:
for epoch in range(start_epoch, start_epoch+30):
    if epoch > 0:
        train_loss, train_acc = train(resmodel, epoch, trainloader)
        adv_train_loss, adv_train_acc = fgsm_train(resmodel, epoch, trainloader)
    val_loss, val_acc = test(resmodel, testloader)
    
    print('[Epoch: {}]\nTrain Loss: {:.4f}\tTrain Accuracy: {:.4f}\tAdv_Train Loss: {:.4f}\tAdv_Train Accuracy: {:.4f}\nVal Loss: {:.4f}\tVal Accuracy: {:.4f}'.
          format(epoch, train_loss, train_acc, adv_train_loss, adv_train_acc, val_loss, val_acc))
    
    if (epoch+1)%10 == 0:
        torch.save({'model_state_dict': resmodel.state_dict(),},
                    path + "Adv_ResNet18_lamda2_{}.pth".format(str(epoch+1)))

In [None]:
def test_advtrain_ave(model, loader, num):
    start = time.time()
    res = 0
    for _ in range(num):
        acc = test(model, loader)[1]
        res += acc
    print("Time: ", (time.time()-start)/60, " min")
    return res/num

In [None]:
test_advtrain_ave(adv_SAPmodel, testloader, 50)

## PGD train
-----------------
lamda=2, epsilon=2/256

In [None]:
for epoch in range(start_epoch, start_epoch+30):
    if epoch > 0:
        train_loss, train_acc = train(resmodel, epoch, trainloader)
        adv_train_loss, adv_train_acc = fgsm_train(resmodel, epoch, trainloader)
    val_loss, val_acc = test(resmodel, testloader)
    
    print('[Epoch: {}]\nTrain Loss: {:.4f}\tTrain Accuracy: {:.4f}\tAdv_Train Loss: {:.4f}\tAdv_Train Accuracy: {:.4f}\nVal Loss: {:.4f}\tVal Accuracy: {:.4f}'.
          format(epoch, train_loss, train_acc, adv_train_loss, adv_train_acc, val_loss, val_acc))
    
    if (epoch+1)%10 == 0:
        torch.save({'model_state_dict': resmodel.state_dict(),},
                    path + "Adv_ResNet18_lamda2_{}.pth".format(str(epoch+1)))

## free train

In [None]:
"""res18"""
for epoch in range(start_epoch, start_epoch+20):
    train_loss, train_acc = train(resmodel, epoch, trainloader)
    val_loss, val_acc = test(resmodel, testloader)
    
    print('[Epoch: {}]\nTrain Loss: {:.4f}\tTrain Accuracy: {:.4f}\tVal Loss: {:.4f}\tVal Accuracy: {:.4f}'.
          format(epoch, train_loss, train_acc, val_loss, val_acc))
    
    # if (epoch+1)%10 == 0:
torch.save({'model_state_dict': resmodel.state_dict(),},
            path + "ResNet18_{}.pth".format(str(epoch)))

# Reload

## adv train

### Non SAP

In [None]:
# epsilons = 2/255

"""Load Model"""
criterion = nn.CrossEntropyLoss()
modelpath = path + "Adv_ResNet18_lamda2_10.pth"

pretrained_adv_model = ResNet18().to(device)
pretrained_adv_model.load_state_dict(torch.load(modelpath)['model_state_dict'])
pretrained_adv_dict = pretrained_adv_model.state_dict()
# pretrained_adv_model

In [None]:
grey_box_attack(pretrained_adv_model, attackloader)

### SAP

In [None]:
del adv_SAPmodel
# del model_adv_dict

adv_SAPmodel = ResNet18SAP().to(device)
model_adv_dict = adv_SAPmodel.state_dict()
pretrained_adv_dict = {k: v for k, v in pretrained_adv_dict.items() if k in model_adv_dict}
model_adv_dict.update(pretrained_adv_dict)
adv_SAPmodel.load_state_dict(pretrained_adv_dict)
# adv_SAPmodel

In [None]:
grey_box_attack(adv_SAPmodel, attackloader)

In [None]:
acc,acc_sap,nonsap,nonsap_att,sap,sap_att,ytrue = testattackSAP(pretrained_adv_model, adv_SAPmodel, attackloader, epsilons)

In [None]:
x = np.linspace(0,200,200)
plt.plot(x,sap[:200],'b.',label="SAP")
plt.plot(x,ytrue[:200],'r.',label="true")
plt.legend() 
plt.title("Predictions before Attack")
plt.show()

plt.plot(x,sap_att[:200],'b.', label="SAP_att")
plt.plot(x,ytrue[:200],'r.',label="true")
plt.legend() 
plt.title("Predictions after Attack")
plt.show()

## free train

### Non SAP

In [None]:
# epsilons = 0.05
del pretrained_model
del pretrained_dict

"""Load Model"""
criterion = nn.CrossEntropyLoss()
modelpath = path + "ResNet18_19.pth"

pretrained_model = ResNet18().to(device)
pretrained_model.load_state_dict(torch.load(modelpath)['model_state_dict'])
pretrained_dict = pretrained_model.state_dict()
# pretrained_model

In [None]:
testattack(pretrained_model, attackloader, epsilons)

In [None]:
grey_box_attack(pretrained_model, attackloader)

### Multi-SAP

In [None]:
random_SAPmodel = ResNet18SAP().to(device)
model_dict = random_SAPmodel.state_dict()
pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
model_dict.update(pretrained_dict)
random_SAPmodel.load_state_dict(pretrained_dict)
random_SAPmodel

In [None]:
def get_batch_grad(epoch, model, loader):
    for e in range(epoch):
        grad = get_grad(model, loader)
        np.save("sap_{}_grads.npy".format(e), grad)

In [None]:
get_batch_grad(10, random_SAPmodel, attack_loader)

In [None]:
!mv sap_0_grads.npy "/content/gdrive/My Drive/GameTheory/cal_grad/"
!mv sap_1_grads.npy "/content/gdrive/My Drive/GameTheory/cal_grad/"
!mv sap_2_grads.npy "/content/gdrive/My Drive/GameTheory/cal_grad/"
!mv sap_3_grads.npy "/content/gdrive/My Drive/GameTheory/cal_grad/"
!mv sap_4_grads.npy "/content/gdrive/My Drive/GameTheory/cal_grad/"
!mv sap_5_grads.npy "/content/gdrive/My Drive/GameTheory/cal_grad/"
!mv sap_6_grads.npy "/content/gdrive/My Drive/GameTheory/cal_grad/"
!mv sap_7_grads.npy "/content/gdrive/My Drive/GameTheory/cal_grad/"
!mv sap_8_grads.npy "/content/gdrive/My Drive/GameTheory/cal_grad/"
!mv sap_9_grads.npy "/content/gdrive/My Drive/GameTheory/cal_grad/"

In [None]:
acc,acc_sap,nonsap,nonsap_att,sap,sap_att,ytrue = testattackSAP(pretrained_model, random_SAPmodel, attack_loader, 0.05)


In [None]:
import matplotlib.pyplot as plt

# x = [1,2,3,4,5,6,7,8,9,10]
x = np.linspace(0,100,100)
plt.plot(x,sap[:100],'b.',label="SAP")
plt.plot(x,ytrue[:100],'r.',label="true")
plt.legend() 
plt.title("Predictions before Attack")
plt.show()

plt.plot(x,sap_att[:100],'b.', label="SAP_att")
plt.plot(x,ytrue[:100],'r.',label="true")
plt.legend() 
plt.title("Predictions after Attack")
plt.show()

In [None]:
import matplotlib.pyplot as plt

# x = [1,2,3,4,5,6,7,8,9,10]
x = np.linspace(0,100,100)
plt.plot(x,nonsap[:100],'b.',label="nonSAP")
plt.plot(x,ytrue[:100],'r.',label="true")
plt.legend() 
plt.title("Predictions before Attack")
plt.show()

plt.plot(x,nonsap_att[:100],'b.', label="nonSAP_att")
plt.plot(x,ytrue[:100],'r.',label="true")
plt.legend() 
plt.title("Predictions after Attack")
plt.show()

# plt.plot(x,sap_att[:100],'b.', label="SAP_att")
# plt.plot(ytrue[:100],sap_att[:100],'r.',label="true")
# plt.legend() 
# plt.show()

In [None]:
all_grad = []
all_pred = []
for inputs, targets in attackloader:
    inputs, targets = inputs.to(device), targets.to(device)
    
    inputs.requires_grad = True

    # outputs = net(inputs)
    grad_list = []
    pred_list = []

    for i in range(10):
        start = time.time()

        outputs_sap = random_SAPmodel(inputs)
    # nonsap.append(outputs.detach().cpu().numpy())
    # sap.append(outputs_sap.detach().cpu().numpy())
    # print("[non sap]: ",outputs, "\n[sap]: ", outputs_sap, "\n")
    
    # _, predicted = outputs.max(1)
        _, predicted_sap = outputs_sap.max(1)
        pred_list.append(predicted_sap.detach().cpu().numpy())
    # if predicted.item() != targets.item():
    #     continue
    # loss = criterion(outputs, targets)
        loss_sap = criterion(outputs_sap, targets)

    # net.zero_grad()
        random_SAPmodel.zero_grad()

    # test_loss += loss.item()
    # loss.backward()
        loss_sap.backward()

    # pdb.set_trace()
        data_grad = inputs.grad.data

        grad_list.append(data_grad.detach().cpu().numpy())
        
        # print("Time: ", time.time()-start, data_grad.detach().cpu().numpy())

    grad_list = np.concatenate(grad_list)
    pred_list = np.concatenate(pred_list)

    print("Target: {}\nPredictions: {}".format(targets.detach().cpu().numpy(), pred_list))

    all_grad.append(grad_list)
    all_pred.append(pred_list)

### SAP

In [None]:
import gc

# del pretrained_model
del SAPmodel
del model_dict
# del adv_SAPmodel
# del random_SAPmodel

gc.collect()

torch.cuda.empty_cache()

In [None]:
SAPmodel = ResNet18SAP().to(device)
model_dict = SAPmodel.state_dict()
pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
model_dict.update(pretrained_dict)
SAPmodel.load_state_dict(pretrained_dict)
# SAPmodel

In [None]:
acc,nonsap,sap = testattackSAP(pretrained_model, SAPmodel, attackloader, epsilons)
ns = np.squeeze(np.array(nonsap))
s = np.squeeze(np.array(sap))

ns_T = ns.T
s_T = s.T

In [None]:
import matplotlib.pyplot as plt

x = [1,2,3,4,5,6,7,8,9,10]
plt.plot(x,ns[0],label="nonSAP")
plt.plot(x,s[0],label="SAP")
plt.legend() 

In [None]:
import matplotlib.pyplot as plt

x = [1,2,3,4,5,6,7,8,9,10]
plt.plot(x,ns[1],label="nonSAP")
plt.plot(x,s[1],label="SAP")
plt.legend()

In [None]:
import matplotlib.pyplot as plt
classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')

x = np.linspace(0,10000,10000)

for i in range(len(classes)):
    plt.plot(x,ns_T[i],label="nonSAP")
    plt.plot(x,s_T[i],label="SAP")
    plt.legend()
    plt.title(classes[i])
    plt.show()