# Task 2: Input Randomization

### If you are using Google Colab, you need to upload this notebook and the codebase to your Google Drive. Then you need to mount your Google Drive in Colab and set your working directory. If you are running on your local machine, you can ignore the following line.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
root_dir = "/content/drive/My Drive/"
project_dir = "Assignment1_code" # Change to your path
os.chdir(root_dir + project_dir)

In [None]:
# Make sure the path is correct
!ls

CS5562_Assignment_1_Task_1.ipynb    imagenet_class_index.json
CS5562_Assignment_1_Task_2.ipynb    JSMA
CS5562_Assignment_1_Task_3.ipynb    model.py
CS5562_Assignment_1_Task_4.ipynb    __pycache__
CS5562_Assignment_1_Task_5.ipynb    results
CS5562_Assignment_1_Warm_ups.ipynb  test_image
defense.py			    utilities.py
environment.yml


## Implement the Defense

In [None]:
from torchvision import transforms
import torch
from scipy.stats import norm, binom_test
import numpy as np
from math import ceil
from statsmodels.stats.proportion import proportion_confint
import torch.nn as nn
import torchvision
from torch import optim

In [None]:
def randomization_input(test_image, resize_bound):
    '''
    :param test_image: the test image which could be clean or adversarial. The size is [1, 3, 224,224]
    :return: randomized version of the test image. The size is [1, 3, resize_bound, resize_bound].
    Hints:
    1. Sample a integer number uniformly from [1,10]: np.random.randint(1,10, size=1)
    2. Pytorch provides transforms function for image transformations. You can call the transforms.Resize to resize your images (https://pytorch.org/vision/stable/transforms.html).
    >>> p = transforms.Compose([transforms.Resize(48)])
    >>> p(test_image)
    3. Pytorch provides torch.nn.functional.pad function to pad tensors.
    '''

    PAD_VALUE = 0.5  # this is the pad value for the random padding step
    randomized_image = test_image
    ################
    # TODO: implement randomized resizing and randomized padding.
    rand_W, rand_H = np.random.randint(low=224, high=resize_bound, size=2)

    ## Resizing between [224, resize_bound) for least performance drop in clean images.
    resized_image = transforms.Resize(size=(rand_H, rand_W))(randomized_image)

    ## Padding - (padding_left, padding_right, padding_top, padding_bottom)
    leftover_W = resize_bound - rand_W
    leftover_H = resize_bound - rand_H
    prob_w, prob_h = np.random.randint(0, 11, size=2)

    left = int(leftover_W * prob_w/10)
    right = leftover_W - left
    top = int(leftover_H * prob_h/10)
    bottom = leftover_H - top

    pad = (left, right, top, bottom)
    randomized_image = nn.functional.pad(resized_image, pad, "constant", PAD_VALUE)
    assert randomized_image.shape == torch.Size([1, 3, resize_bound, resize_bound]), ValueError('Shape does not match')

    ################
    return randomized_image

## Test your code

#### Copy and Paste your FSGM and PGD attacks here:

In [None]:
class FSG_attack:
    """
        The FSGM attack in warm-up task.
    """
    def __init__(self, target_model: torchvision.models, epsilon: float):
        self.target_model = target_model
        self.epsilon = epsilon

    def attack(self, test_image: torch.tensor, y_true: torch.tensor, is_targeted: bool = False,
               y_target: torch.tensor = None)-> torch.tensor:
        perturbation = torch.zeros_like(test_image, requires_grad=True)  # init the perturbation
        pred = self.target_model.predict(test_image + perturbation)

        if is_targeted:
            ############
            # TODO: implement the loss
            criterion = torch.nn.CrossEntropyLoss()
            loss = -criterion(pred, y_target)
            ############
        else:
            ############
            # TODO: implement the loss function
            criterion = torch.nn.CrossEntropyLoss()
            loss = criterion(pred, y_true)
            ############

        loss.backward()
        gradient = perturbation.grad  # get the gradient of the loss with respect to the perturbation
        ############
        # TODO: update the perturbation
        perturbation = self.epsilon * torch.sign(gradient)
        ############
        return perturbation


class PGD_attack:
    """
        The PGD attack in warm-up task.
    """
    def __init__(self, target_model: torchvision.models, epsilon: float, steps: int,
                 learning_rate: float = 1e-1):
        self.target_model = target_model
        self.epsilon = epsilon
        self.steps = steps
        self.learning_rate = learning_rate

    def attack(self, test_image: torch.tensor, y_true: torch.tensor, is_targeted: bool = False,
               y_target: torch.tensor = None) -> torch.tensor:

        perturbation = torch.zeros_like(test_image, requires_grad=True)
        opt = optim.SGD([perturbation], lr=self.learning_rate)

        for t in range(self.steps):
            pred = self.target_model.predict(test_image + perturbation)
            if is_targeted:
                ############
                # TODO: implement the loss
                criterion = torch.nn.CrossEntropyLoss()
                loss = criterion(pred, y_target)
                ############
            else:
                ############
                # TODO: implement the loss
                criterion = torch.nn.CrossEntropyLoss()
                loss = -criterion(pred, y_true)
                ############

            if t%10 == 0:
                print("PGD attack epoch %d: loss %s"%(t,str(loss.item())))

            opt.zero_grad()
            loss.backward()
            opt.step()
            ##########
            # TODO: update the perturbation
            ##########
            epsilon = torch.full(test_image.shape, self.epsilon)
            perturbation = torch.max(torch.min(perturbation.grad, epsilon), -epsilon).requires_grad_(True)

        return perturbation

### Helper functions

In [None]:
import time
from PIL import Image

from utilities import *
from defense import standard_trainer

In [None]:
def compute_attack_grade_imagenet(attack_name, model, test_dir, eps=0.1, is_targeted=False, y_target=None, steps=10,
                                  learning_rate=0.1, num_pixel=None, defense_name=None):
    if attack_name == 'FSG':
        attacker = FSG_attack(model, eps)
    elif attack_name == 'PGD':
        attacker = PGD_attack(model, eps, steps, learning_rate)

    else:
        raise ValueError('Please input the corret attack name: FSG, PGD, OP,adaptive')

    if is_targeted:
        y_target = torch.LongTensor([y_target])

        assert y_target is not None, ValueError('Please input the target label')

    attack_grade = []
    attack_success = []
    start_time = time.time()
    for filename in os.listdir(test_dir)[:1]:

        if filename.endswith(".JPEG"):
            # convert the name of the label to the number
            y_true = filename.split('_')[0]
            y_true_tensor = load_label_tensor(y_true)

            # read the image and pre-processing the data
            test_image = Image.open(test_dir + filename)
            target_image_tensor = preprocess_features(test_image)[None, :, :, :]

            # generate the pertubation
            delta = attacker.attack(target_image_tensor, y_true_tensor, is_targeted=is_targeted, y_target=y_target)

            # generate the adv examples based on the pertubation
            adv_example = get_adv_example(target_image_tensor, delta, attack_name)

            # compute the prediction based on the clean images and adversarial examples
            pred_clean = model.predict(target_image_tensor)
            pred_adv = model.predict(adv_example)

            # compute the score of the attack
            grade, success = get_attack_score(y_true_tensor, pred_clean, pred_adv, is_targeted=is_targeted,
                                              y_target=y_target)
            if grade == -1 and success == -1:
                continue
            attack_grade.append(grade)
            attack_success.append(success)

    print('----------results-------------')
    print("[%s attack against %s model] \nattacking %d images using %.3f seconds" % (
    attack_name, defense_name, len(attack_grade), time.time() - start_time))
    print("grade %.2f, success rate: %.2f" % (np.array(attack_grade).mean(), np.array(attack_success).mean()))

    return np.array(attack_grade).mean(), np.array(attack_grade).var()

def get_attack_score(y_true: torch.tensor, pred_clean: torch.tensor, pred_adv: torch.tensor, weight: float = 0.5,
                     is_targeted: bool = False, y_target: torch.Tensor = None):
    assert pred_clean.argmax().item() == y_true.item(), ValueError("this image is not valid")

    if is_targeted:
        confid_clean = get_confidence(pred_clean, y_target.item())
        confid_adv = get_confidence(pred_adv, y_target.item())
        return weight * int(pred_adv.argmax().item() == y_target.item()) + (1 - weight) * (
                    confid_adv - confid_clean), int(pred_adv.argmax().item() == y_target.item())
    else:
        confid_clean = get_confidence(pred_clean, y_true.item())
        confid_adv = get_confidence(pred_adv, y_true.item())

        return weight * int(pred_adv.argmax().item() != y_true.item()) + (1 - weight) * (
                    confid_clean - confid_adv), int(pred_adv.argmax().item() != y_true.item())

In [None]:
class Randomization_Defense:
    def __init__(self, model, resize_bound):
        self.model = model
        self.resize_bound = resize_bound

    def predict(self, test_image):
        normalize = transforms.Compose([
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
        return self.model(randomization_input(normalize(test_image), self.resize_bound))

### Testing

In [None]:
pretrained_model = load_model()
target_model = Randomization_Defense(pretrained_model, 256)
defense_name = 'randomization defense'

grade_mean, grade_variance = compute_attack_grade_imagenet("PGD", target_model, "test_image/", eps=0.01, is_targeted=False, y_target=None, steps=50, learning_rate=0.02, defense_name=defense_name)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:01<00:00, 81.0MB/s]


PGD attack epoch 0: loss -0.068153016269207
PGD attack epoch 10: loss -0.007505314890295267
PGD attack epoch 20: loss -0.03793869912624359
PGD attack epoch 30: loss -0.03344587981700897
PGD attack epoch 40: loss -0.0394243448972702
----------results-------------
[PGD attack against randomization defense model] 
attacking 1 images using 31.728 seconds
grade -0.01, success rate: 0.00


In [None]:
grade_mean, grade_variance = compute_attack_grade_imagenet("PGD", target_model, "test_image/", eps=0.01, is_targeted=True, y_target=25, steps=50, learning_rate=0.02, defense_name=defense_name)

PGD attack epoch 0: loss 16.642498016357422
PGD attack epoch 10: loss 21.59040641784668
PGD attack epoch 20: loss 22.720443725585938
PGD attack epoch 30: loss 21.709941864013672
PGD attack epoch 40: loss 24.854631423950195
----------results-------------
[PGD attack against randomization defense model] 
attacking 1 images using 29.136 seconds
grade -0.00, success rate: 0.00
