<div align="right">
    <a href="https://colab.research.google.com/github/Its-Shivanshu-Sharma/AdversarialAttacks/blob/main/Adversarial_Attacks.ipynb">
        <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open in Colab"/>
    </a>
</div>
    
<div align="right">
    <a href="https://console.paperspace.com/github/Its-Shivanshu-Sharma/AdversarialAttacks/blob/main/Adversarial_Attacks.ipynb">
        <img src="https://assets.paperspace.io/img/gradient-badge.svg" alt="Run on Gradient"/>
    </a>
</div>

# Adversarial Attacks on Neural Networks

---

## Table of Contents:
- ### [Installing & Importing packages, libraries, etc](#installing-&-importing-packages,-libraries,-etc)
- ### [Fetching & preparing the Dataset](#fetching-&-preparing-the-dataset)

<a name="installing-&-importing-packages,-libraries,-etc"></a>

---
## Installing & Importing packages, libraries, etc

### Installing packages

In [4]:
%%bash
pip3 install -qq torch==1.10.2+cu113 torchvision==0.11.3+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html

tcmalloc: large alloc 1147494400 bytes == 0x55efd7854000 @  0x7f296c9dc615 0x55ef9df9117c 0x55ef9e07147a 0x55ef9df93f9d 0x55ef9e085d4d 0x55ef9e007ec8 0x55ef9e002a2e 0x55ef9df9588a 0x55ef9e007d30 0x55ef9e002a2e 0x55ef9df9588a 0x55ef9e004719 0x55ef9e086b76 0x55ef9e003d95 0x55ef9e086b76 0x55ef9e003d95 0x55ef9e086b76 0x55ef9e003d95 0x55ef9df95ce9 0x55ef9dfd9579 0x55ef9df94902 0x55ef9e007c4d 0x55ef9e002a2e 0x55ef9df9588a 0x55ef9e004719 0x55ef9e002a2e 0x55ef9df9588a 0x55ef9e0038f6 0x55ef9df957aa 0x55ef9e003b4f 0x55ef9e002a2e
tcmalloc: large alloc 1434370048 bytes == 0x55f01beaa000 @  0x7f296c9dc615 0x55ef9df9117c 0x55ef9e07147a 0x55ef9df93f9d 0x55ef9e085d4d 0x55ef9e007ec8 0x55ef9e002a2e 0x55ef9df9588a 0x55ef9e007d30 0x55ef9e002a2e 0x55ef9df9588a 0x55ef9e004719 0x55ef9e086b76 0x55ef9e003d95 0x55ef9e086b76 0x55ef9e003d95 0x55ef9e086b76 0x55ef9e003d95 0x55ef9df95ce9 0x55ef9dfd9579 0x55ef9df94902 0x55ef9e007c4d 0x55ef9e002a2e 0x55ef9df9588a 0x55ef9e004719 0x55ef9e002a2e 0x55ef9df9588a 0x55ef9e00

### Importing packages, libraries, etc

In [5]:
import os
from pathlib import Path

import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms.functional as TF
from torch.utils.data import DataLoader, Dataset

In [6]:
# Setting setting the seed for the RNG (Random Number Generator)
# for reproducibility
torch.manual_seed(1234)

<torch._C.Generator at 0x7f5995f3fcf0>

<a name="fetching-&-preparing-the-dataset"></a>

---
## Fetching & preparing the Dataset

We'll creating a model for binary classification using the [Oxford-IIIT Pet Dataset](https://www.robots.ox.ac.uk/~vgg/data/pets/).

In [7]:
%%bash
# Create a new directory to store the dataset
mkdir pets_dataset
cd pets_dataset
wget -q https://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz
wget -q https://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz
tar -xf images.tar.gz
tar -xf annotations.tar.gz

In [8]:
dataset_dir = Path("./pets_dataset")

In [9]:
def ls(path):
    for f in path.iterdir():
        print(f"{'d' if f.is_dir() else 'f': <4}{f}")

In [10]:
ls(dataset_dir)

f   /content/pets_dataset/annotations.tar.gz
d   /content/pets_dataset/images
d   /content/pets_dataset/annotations
f   /content/pets_dataset/images.tar.gz


In [11]:
full_data = pd.read_csv(
    dataset_dir / "annotations" / "list.txt",
    skiprows=6,
    header=None,
    names=["file_name", "class_id", "species", "breed_id"],
    sep=" ",
)
full_data.head()

Unnamed: 0,file_name,class_id,species,breed_id
0,Abyssinian_100,1,1,1
1,Abyssinian_101,1,1,1
2,Abyssinian_102,1,1,1
3,Abyssinian_103,1,1,1
4,Abyssinian_104,1,1,1


In [12]:
full_data = full_data.loc[:, ["file_name", "species"]]
full_data.head()

Unnamed: 0,file_name,species
0,Abyssinian_100,1
1,Abyssinian_101,1
2,Abyssinian_102,1
3,Abyssinian_103,1
4,Abyssinian_104,1


#### Change the labels (i.e. `species`) such that:
`Cat = 0` & `Dog = 1`

In [13]:
new_label_map = {
    1: 0,
    2: 1,
}

In [14]:
full_data.loc[:, "species"] = full_data.loc[:, "species"].apply(new_label_map.get)

In [15]:
full_data.head()

Unnamed: 0,file_name,species
0,Abyssinian_100,0
1,Abyssinian_101,0
2,Abyssinian_102,0
3,Abyssinian_103,0
4,Abyssinian_104,0


In [18]:
def train_val_split(data, train_size=None, val_size=None):
    """Function to randomly split data into training & validation sets
    Parameters:
    -----------
     - data (pandas.DataFrame): Dataframe containing the annotations for the images.
    - train_size (float) - Fraction of data to be allocated to training set.
    - val_size (float) - Fraction of data to be allocated to test set.
                         (`val_size` is ignored if `train_size` is not None)

    Returns:
    --------
    A tuple containing Dataframes for the training and validation sets,
    i.e. returns `(train_set, val_set)`.
    """
    size = len(data)
    # Calculate length of the training set
    if train_size:
        train_len = int(train_size * size)
    elif val_size:
        train_len = size - int(val_size * size)

    # Randomly generate training and validation datasets
    idxs = torch.randperm(size)
    train_set = data.iloc[idxs[:train_len], :].reset_index(drop=True)
    val_set = data.iloc[idxs[train_len:], :].reset_index(drop=True)

    return train_set, val_set

In [20]:
data = {}
data["train"], data["val"] = train_val_split(full_data, train_size=0.8)

In [21]:
data["train"].head()

Unnamed: 0,file_name,species
0,staffordshire_bull_terrier_174,1
1,beagle_73,1
2,Abyssinian_192,0
3,american_pit_bull_terrier_93,1
4,american_pit_bull_terrier_119,1


#### Create `Dataset` objects for the training & validations sets

In [22]:
class PetsDataset(Dataset):
    """Custom defined Dataset subclass for working with the IIIT-Pets Dataset"""

    def __init__(
        self,
        annotations,
        img_dir,
        img_format="jpg",
        transform=None,
        target_transform=None,
    ):
        """
        Parameters:
        -----------
        - annotations (pandas.DataFrame) - Dataframe containing the annotations.
        - img_dir (str or Path object) - Path to the directory containing images.
        - img_format (str: default = 'jpg') - Format of the images.
        - transform (callable; default=None) - Tranformation to apply to images.
        - target_transform (callable; default=None) - Transformation to apply to
                                                      target labels.
        """
        self.annotations = annotations
        self.img_dir = img_dir
        self.img_format = img_format
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        file_name = self.annotations.iloc[index, 0] + "." + self.img_format
        img_file = os.path.join(self.img_dir, file_name)
        image = torchvision.io.read_image(img_file)
        label = self.annotations.iloc[index, 1]

        if self.transform:
            image = self.transform(image)

        if self.target_transform:
            label = self.target_transform(label)
        return image, label

### **Note:** Some images have an additional `alpha` channel for transparency.<br>This will cause problems, hence, we will simply remove this additional channel using the `resize_remove` tranform on the images.

In [23]:
def resize_remove(img_size):
    """This function will resize the image to the passed size using
    `torchvision.transforms.functional.resize` and will keep only the
    first `img_size[0]` number of channels.

    Parameters:
    -----------
    - img_size (tuple or array-like) - specifies the size (channels, height, width)
                                       to which the image must be resized.
    Returns:
    --------
    A callable that will resize the image & remove extra channels.
    """
    num_channels = img_size[0]

    def transform(img):
        return TF.resize(img[:num_channels], img_size[1:])

    return transform

In [24]:
# Define the image size in (C, H, W) format
img_size = (3, 150, 150)

In [None]:
data_transforms = {
    "train": nn.Sequential(
        resize_remove(img_size),
    ),
    "val": nn.Sequential(
        resize_remove(img_size),
    ),
}

In [None]:
datasets = {
    phase: PetsDataset(
        data[phase], img_dir=dataset_dir / "images", transform=data_transforms[phase]
    )
    for phase in ["train", "val"]
}

In [None]:
dataset_size = {phase: len(datasets[phase]) for phase in ["train", "val"]}

### Visualizing the Data

In [None]:
def visualize_data(data, label_map, n_rows=3, n_cols=3):
    """Function to display n_rows * n_cols number of images of the passed `data`.

    Parameters:
    -----------
    - data (Dataset or its subclass): Data which has to be visualized.
    - label_map (dict): Mapping from int to labels for the classes.
    - n_rows (int): Number of rows of images to display.
    - n_cols (int): Number of columns of images to display.
    """
    figure = plt.figure(figsize=(8, 8))
    # Randomly choose the images to display from the data
    idxs = torch.randint(high=len(data), size=(n_rows * n_cols,)).tolist()

    for i, idx in enumerate(idxs):
        img, label = data[idx]
        figure.add_subplot(n_rows, n_cols, i + 1)
        plt.title(label_map.get(label))
        plt.axis("off")
        plt.imshow(img.permute((1, 2, 0)))  # change image to (H,W,C) format
    plt.show()

In [None]:
species_map = {
    0: "Cat",
    1: "Dog",
}

In [None]:
visualize_data(dataset["train"], species_map)

#### Create `DataLoader` objects for the training & validation data

In [None]:
# define the batch size
bs = 32

In [None]:
dataloaders = {phase: DataLoader(datasets[phase], batch_size=bs, shuffle=True)}

<a name="training-a-deep-neural-network-for-binary-classification"></a>

---
## Training a Deep Neural Network for Binary Classification

In this section we will be training a simple `Feed-Forward Neural Network` on our pets dataset.<br> The output of the network will be one of the 2 classes, i.e. `Cat` or `Dog`.

In [None]:
from functools import reduce

from torch.optim.sgd import SGD

In [None]:
class FeedForwardNetwork(nn.Module):
    """A class defining a simple Feedforward neural network"""

    def __init__(self, img_size):
        """
        Parameters:
        -----------
        - img_size: Dimensions of the input image.
        """
        super(FeedForwardNetwork, self).__init__()
        input_size = reduce(lambda x, y: x * y, img_size)

        self.sequential = nn.Sequential(
            nn.Flatten(),
            nn.BatchNorm1d(input_size),
            nn.Linear(input_size, 512),
            nn.ReLU(),
            nn.BatchNorm1d(512),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.BatchNorm1d(256),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.BatchNorm1d(64),
            nn.Linear(64, 16),
            nn.ReLU(),
            nn.BatchNorm1d(16),
            nn.Linear(16, 1),
            nn.Sigmoid(),
        )

    def forward(self, x):
        return self.sequential(x)

In [None]:
def train_val_model(
    model,
    dataloaders,
    dataset_size,
    loss_fn,
    optimizer,
    num_epochs,
    threshold=0.5,
    device="cpu",
):
    """
    Parameters:
    -----------
    - dataloaders (dict-like): dict containing dataloader objects for the
                               training & validation datasets.
    - dataset_size (dict-like): dict containing the lengths for the training &
                                validation datasets.
    - model: The model which has to be trained (& used for making predictions).
    - loss_fn (function): Loss function to use for calculating the gradients
    - optimizer: Optimizer to use for updating the parameters of the model
    - num_epochs (int): Number of epochs for which the model must be trained.
    - theshold (float; default=0.5): Value above which the image will be
                                     considered to belong to class `1`.
    - device (str; default="cpu"): Device to use for training the model.

    Returns:
    --------
    Dictionary containing the parameters of the model which had the best accuracy
    on the validation set.
    """
    start_time = time.time()

    # Initialize the best model weights as the initial weights of the model
    best_model_parameters = copy.deepcopy(model.state_dict())
    best_accuracy = 0.0

    # Move the `model` to the specified `device`
    model = model.to(device)

    # Initially set the gradients to zero
    optimizer.grad_zero()

    for i in range(num_epochs):
        print(f"Epoch {i}/{num_epochs-1}")
        print("-" * 15)

        # Both the training & validation datasets are passed to the model in
        # each epoch
        for phase in ["train", "val"]:
            # Set mode depending upon the phase
            if phase == "train":
                model.train()
            else:
                model.eval()

            total_loss = 0.0
            total_corrects = 0

            with torch.set_grad_enabled(phase == "train"):
                for imgs, targets in dataloaders["phase"]:
                    imgs = imgs.to(device)
                    targets = targets.to(device)
                    predictions = model(imgs)
                    loss = loss_fn(predictions, targets)
                    total_loss += loss.item() * imgs.size(0)
                    total_corrects += torch.sum(
                        (predictions > threshold) == predictions
                    )

                    # Update parameter values during the training phase
                    if phase == "train":
                        loss.backward()
                        optimizer.step()
                        # Set the gradients to zero
                        optimizer.grad_zero()

            avg_loss = total_loss / dataset_size[phase]
            accuray = total_corrects / dataset_size[phase]
            print(f"{phase.capitalize()} Average Loss: {avg_loss: .4f}")
            print(f"{phase.capitalize()} Accuracy: {accuracy: .4f}")

            if phase == "val" and accuracy < best_accuracy:
                best_model_parameters = copy.deepcopy(model.state_dict())
                best_accuracy = accuracy
        # Print a new line after each epoch
        print()

        total_time = time.time() - start_time
        print(f"Model training completed in {total_time//60}m {total_time % 60}s")
        print(f"Minimum average validation loss: {min_val_loss}")

        return best_model_parameters

### Create, train & validate model

##### Use `GPU` for training if available else use `CPU`

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device being used: {device}")

In [None]:
model = FeedForwardNetwork(img_size)
model.to(device);

In [None]:
n_epochs = 10
lr = 1e-3
optimizer = SGD(model.parameters(), lr)
bce_loss = nn.BCELoss()

In [None]:
results = pd.DataFrame(
    [],
    columns=["Training Loss", "Validation Loss", "Accuracy"],
    index=pd.Index([], name="Epoch No."),
)
for i in range(n_epochs):
    train_loss = train_loop(train_dataloader, model, bce_loss, optimizer, device=device)
    val_loss, accuracy = val_loop(val_dataloader, model, bce_loss, device=device)
    results.loc[i] = [train_loss, val_loss, accuracy]
    print(results.loc[i].to_frame().T)

In [None]:
results

### Creating an Adversarial Example for the model

In [None]:
x, y = val_data[0]

In [None]:
species_map.get(y)

In [None]:
x_new = x.detach().clone().float().unsqueeze(0).to(device).requires_grad_()

In [None]:
y_new = 0 if y == 1 else 1

In [None]:
y_new = torch.Tensor([[y_new]]).to(device)

In [None]:
plt.figure()
plt.imshow(x_new.permute((1, 2, 0)).type(torch.uint8))
plt.axis("off")
plt.title(species_map.get(y))
plt.show()

In [None]:
n_epochs = 40
lr = 1e-1
momentum = 0.9
loss_func = nn.BCELoss()
opt = SGD([x_new], lr, momentum=momentum)

In [None]:
model(x_new)

In [None]:
for i in range(n_epochs):
    pred = model(x_new)
    opt.zero_grad()
    loss = loss_func(pred, y_new)
    loss.backward()
    opt.step()

In [None]:
model(x_new)