# Corrupted Images Benchmark Notebook
------------------
This notebook contains the code used to analyze the model's robustness through corrupted image analysis. 

It functions primarily on Google Colab, but can be adapted to work on local hardware.

It runs a Boundary Attack through the use of the Foolbox library on a given model.

This code runs a single attack on a single model with a single image. 

It was then expanded into the attack_model function in implementations.py in order to be more easily run on multiple models.

In [1]:
# STEP 1: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
%cd /content/drive/MyDrive/OptiML/repo/OptML-project

/content/drive/MyDrive/OptiML/repo/OptML-project


In [None]:
# STEP 2: Import required packages

import numpy as np
import pandas as pd
import sklearn
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision import datasets, transforms
from sklearn.metrics import accuracy_score, recall_score, f1_score
from Functions.implementations import *
from Functions.visualization import *
import os

In [None]:
# STEP 3: Define paths and parameters

#if google drive :
drive_base_path = os.getcwd()
cifar10_c_path = os.path.join(drive_base_path, 'data/CIFAR-10-C')
model_path = os.path.join(drive_base_path, 'Results/Adam_VGG/VGG_lr_0.001_beta1_0.8_beta2_0.99.pth')

In [6]:
# STEP 4: Load CIFAR-10 test labels
# Load the official CIFAR-10 test set (same order as CIFAR-10-C labels)
test_set = CIFAR10(root='./data', train=False, download=True)
true_labels = torch.tensor(test_set.targets)  # Should have 10,000 labels


In [7]:
# STEP 5: Load your model
# Replace with your actual model class
from Functions.implementations import *

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = VGGLike().to(device)
model.load_state_dict(torch.load(model_path, map_location=device))
model.eval()

VGGLike(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=4096, out_features=256, bias=True)
    (2): ReLU()
    (3): Linear(in_features=256, out_features=10, bias=True)
  )
)

In [None]:
# STEP 6: Evaluate all the models in a given folder on all corruptions
# If you want to evaluate only one model, you can skip this step and directly evaluate the model loaded above with the evaluate_model_on_all_corruptions function.

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

folder_path = os.path.join(drive_base_path, 'Results/Adam_VGG') # Path to the folder containing the models

for model_name in os.listdir(folder_path):
    model_path = os.path.join(folder_path, model_name)
    model = VGGLike().to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()
    results = evaluate_model_on_all_corruptions(model) # Function to evaluate the model on all corruptions
    df = pd.DataFrame(results)
    csv_path = os.path.join(os.path.join(folder_path, 'Corrupted'), model_name[:-4]+'.csv')
    df.to_csv(csv_path, index=False) # Save the results to a CSV file
    

In [None]:
# STEP 6: Evaluate the model on the clean CIFAR-10 test set
clean_f1 = evaluate_on_clean_testset(model)
print(f"F1-score on clean CIFAR-10 test set: {clean_f1:.4f}")

F1-score on clean CIFAR-10 test set: 0.8229
