# Adversarial Examples on Food-101 Dataset

### Import Libraries

In [4]:
import torch
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from torchattacks import PGD, FGSM, VANILA, GN, Jitter #Ben: trying out some more attacks
from torchvision import datasets, transforms
import torchvision.models as models
from torch.utils.data import DataLoader
import random
from torchinfo import summary

### Set Configuration Variables

In [5]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DATA_DIR = './data'
FINE_TUNE_EPOCHS = 0
BEST_MODEL_PATH = 'results/bestmodel.pth'

### Transformations and Pretrained Model

In [7]:
train_transform =  transforms.Compose([
    transforms.RandomResizedCrop(size = 224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

#Test does not use augmentation, only normalization is being performed.
test_transform = transforms.Compose([
    transforms.Resize(224),
    transforms.CenterCrop(size = 224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


# Load datasets, first time you need to download it (may take a while). After that it should just pull the local copy
train_dataset = datasets.Food101(root=DATA_DIR, split='train', download=True, transform=train_transform)
test_dataset = datasets.Food101(root=DATA_DIR, split='test', download=True, transform=test_transform)

# Dataloaders, may need to change # of workers or batchsize to improve performance
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=8)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=8)

# Pretrained model, efficient architecture
# https://paperswithcode.com/sota/fine-grained-image-classification-on-food-101
model = models.efficientnet_b2(weights='DEFAULT')

# Add one more layer to base model and then add an output layer
model.classifier = nn.Sequential(
    nn.Linear(model.classifier[1].in_features, 1024),
    nn.ReLU(),
    nn.Linear(1024, len(train_dataset.classes))
)

model.to(DEVICE)
summary(model, 
        input_size=(1, 3, 224, 224),
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"])
print("\n")

RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


### Train and Test functions

In [None]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(DEVICE), y.to(DEVICE)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test(dataloader, model, loss_fn, save_best_model=False, best_acc=0):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(DEVICE), y.to(DEVICE)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()        
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.3f}%, Avg loss: {test_loss:>8f} \n")

    # Save best vesion of model at save_path
    if(100*correct > best_acc) and save_best_model:
        print("Saving New Best Model")
        best_acc = 100*correct
        save_path = BEST_MODEL_PATH
        torch.save(model.state_dict(), save_path)
    
    return best_acc

### Fine Tuning

In [None]:
# Set optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.CrossEntropyLoss()

best_acc = 0
for epoch in range(FINE_TUNE_EPOCHS):
    print(f"Epoch {epoch+1}\n-------------------------------")
    train(train_loader, model, loss_fn, optimizer)
    best_acc = test(test_loader, model, loss_fn, save_best_model=False, best_acc=best_acc)
print("Fine-Tuning Completed!")

Fine-Tuning Completed!


### Loading Best Performing Model

In [None]:
## Loading the model with the best accuracy after a number of fine-tuning epochs
model_path = BEST_MODEL_PATH
state_dict = torch.load(model_path)
model.load_state_dict(state_dict)
print(f"Model loaded from {model_path}")
model = model.to(DEVICE)
test(test_loader, model, loss_fn, save_best_model=False)
print("\n")

RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


### Our PGD Implementation

In [None]:
def general_PGD(model, loss_function, data, proj_norm=2, eps=0.03, lr=0.0001, steps=10):

  features, labels = data
  features = features.clone().detach().to(DEVICE)
  labels = labels.clone().detach().to(DEVICE)

  adv_features = features.clone().detach()

  #maximize loss wrt feature perturbations, for fixed network parameters
  for i in range(steps):
    adv_features.requires_grad = True

    #model prediction
    pred = model(adv_features)

    #error calculation
    error = loss_function(pred, labels)

    #gradient descend
    grad = torch.autograd.grad(error, adv_features)[0] #grad:(1, 64, 3, 224, 224), where the first coordinate if the batch number?
    grad_norm = torch.norm(grad, p=proj_norm, dim=[1,2,3]) #normalize the gradient according to paper https://arxiv.org/pdf/1706.06083
    grad = grad / grad_norm.view(-1,1,1,1)

    adv_features = adv_features.detach() + lr * grad

    #projection: |features - adv_features|_{norm} < radius
    orig_diff = features - adv_features
    orig_diff_norm = torch.norm(orig_diff, p=proj_norm, dim=[1,2,3])
    normalization = eps / orig_diff_norm

    diff = orig_diff * normalization.view(-1,1,1,1)

    adv_features = (features - diff).detach()

  return adv_features


### Implementations of FGSM and Iterated FGSM. 

In [None]:

def ifgsm(model, loss_fn, data, eps=0.03, alpha=0.01, num_iter=10):  
    features, labels = data
    features = features.clone().detach().to(DEVICE)
    features.requires_grad = True  
    labels = labels.clone().detach().to(DEVICE)
    model.eval()

    perturbed_data = features.clone() 

    for i in range(num_iter):  # Iterate for num_iter times
        output = model(perturbed_data)
        loss = loss_fn(output, labels)
        model.zero_grad()
        loss.backward(retain_graph=True)
        sign_data_grad = features.grad.data.sign()

        perturbed_data += alpha * sign_data_grad  
        perturbed_data = torch.clamp(perturbed_data, features - eps, features + eps)  
        perturbed_data = torch.clamp(perturbed_data, 0, 1)
    return perturbed_data

    

### Function to Test Adversarial Attacks

Checks how our model performs on different adversarial attacks

In [None]:
def test_with_adv(dataloader, model, loss_fn, attack, eps=0.0, is_torchattacks=True, num_iter=1):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    selected_adv_examples = []
    for X, y in dataloader:
        X, y = X.to(DEVICE), y.to(DEVICE)
        if is_torchattacks:
            atk = attack(model, eps=eps)
            adv_examples = atk(X, y)
        else:
            adv_examples = attack(model, loss_fn, (X, y), eps=eps, num_iter=num_iter) # (Ben) last parameter is for iterated FGSM.
        
        selected_adv_examples += adv_examples[:1]
        pred = model(adv_examples)
        test_loss += loss_fn(pred, y).item()
        correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>.3f}%, Avg loss: {test_loss:>8f} \n")
    return correct, test_loss, selected_adv_examples


#### Run this for different Adversarial Attacks (~20mins)

In [None]:
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=8)

# Pretrained Model
# print("--- Pretrained Model -----")
# test(test_loader, model, loss_fn, save_best_model=False)

# ## Torchattacks
eps = 0.01
results = dict()
for atk, atk_name in zip([PGD, FGSM, Jitter], ['PGD', 'FGSM', 'Jitter']):
    print(f"--- Attack: {atk_name} ----")
    correct, test_loss, selected_adv_examples = test_with_adv(test_loader, model, loss_fn, atk, eps=eps, is_torchattacks=True)
    results[atk_name] = (correct, test_loss, selected_adv_examples)

## Our PGD Implementation
print(f"--- Attack: Our PGD Implementation ----")
test_with_adv(test_loader, model, loss_fn, general_PGD, eps=eps, is_torchattacks=False)
correct, test_loss, selected_adv_examples = test_with_adv(test_loader, model, loss_fn, atk, eps=eps, is_torchattacks=True)
results[atk_name] = (correct, test_loss, selected_adv_examples)


#### Testing Iterated FGSM for different methods. 

In [None]:
## Our FGSM Implementation
print(f"--- Attack: Our FGSM Implementation ----")
iterated_fgsm_data = dict()
for i in range(1, 11):
    correct, loss, examples = test_with_adv(test_loader, model, loss_fn, ifgsm, eps=eps, num_iter=i, is_torchattacks=False)
    iterated_fgsm_data[i] = [(correct, loss, examples)]

--- Attack: Our FGSM Implementation ----


OutOfMemoryError: CUDA out of memory. Tried to allocate 294.00 MiB. GPU 

In [None]:
iterated_fgsm_data

{1: [(0.0005544554455445544,
   0.0182371380962903,
   [tensor([[[1.0000, 1.0000, 1.0000,  ..., 0.0000, 0.0000, 0.0000],
             [1.0000, 1.0000, 1.0000,  ..., 0.0000, 0.0000, 0.0000],
             [1.0000, 1.0000, 1.0000,  ..., 0.0000, 0.0000, 0.0000],
             ...,
             [1.0000, 1.0000, 1.0000,  ..., 0.0000, 0.0000, 0.0000],
             [1.0000, 1.0000, 1.0000,  ..., 0.0000, 0.0000, 0.0000],
             [1.0000, 1.0000, 1.0000,  ..., 0.0000, 0.0000, 0.0000]],
    
            [[1.0000, 1.0000, 1.0000,  ..., 0.0000, 0.0000, 0.0000],
             [1.0000, 1.0000, 1.0000,  ..., 0.0000, 0.0000, 0.0000],
             [1.0000, 1.0000, 1.0000,  ..., 0.0000, 0.0000, 0.0000],
             ...,
             [0.9830, 1.0000, 1.0000,  ..., 0.0000, 0.0000, 0.0000],
             [1.0000, 1.0000, 1.0000,  ..., 0.0000, 0.0000, 0.0000],
             [1.0000, 1.0000, 1.0000,  ..., 0.0000, 0.0000, 0.0000]],
    
            [[1.0000, 1.0000, 1.0000,  ..., 0.7676, 0.6954, 0.6605],
   