In [1]:
# STEP 1: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
%cd /content/drive/MyDrive/OptiML/repo/OptML-project

/content/drive/MyDrive/OptiML/repo/OptML-project


In [20]:
import os
import numpy as np
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
from tqdm import tqdm
from sklearn.metrics import f1_score

In [42]:
#if google drive :
drive_base_path = '/content/drive/MyDrive/OptiML/repo'
cifar10_c_path = os.path.join(drive_base_path, 'CIFAR-10-C')
model_path = os.path.join(drive_base_path, 'OptML-project/Results/SGD/VGG_Transform_lr_0.01_momentum_0.0.pth')

In [43]:
# STEP 4: Load CIFAR-10 test labels
# Load the official CIFAR-10 test set (same order as CIFAR-10-C labels)
test_set = CIFAR10(root='./data', train=False, download=True)
true_labels = torch.tensor(test_set.targets)  # Should have 10,000 labels


In [44]:
# STEP 5: Load your model
# Replace with your actual model class
from Functions.implementations import *

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = VGGLike().to(device)
model.load_state_dict(torch.load(model_path, map_location=device))
model.eval()

VGGLike(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=4096, out_features=256, bias=True)
    (2): ReLU()
    (3): Linear(in_features=256, out_features=10, bias=True)
  )
)

In [50]:
# STEP 6: Evaluation function
def evaluate_model_on_corruption(corruption_type, severity):
    corruption_file = os.path.join(cifar10_c_path, f"{corruption_type}.npy")
    data = np.load(corruption_file)[(severity - 1) * 10000: severity * 10000]
    data = torch.tensor(data).permute(0, 3, 1, 2).float() / 255.0  # Normalize to [0,1]
    mean = torch.tensor([0.4914, 0.4822, 0.4465]).view(1, 3, 1, 1)
    std = torch.tensor([0.2023, 0.1994, 0.2010]).view(1, 3, 1, 1)

    data = (data - mean) / std

    dataset = TensorDataset(data, true_labels)
    loader = DataLoader(dataset, batch_size=128, shuffle=False)

    all_preds = []
    all_labels = []

    with torch.no_grad():
        for images, labels in loader:
            images = images.to(device)
            outputs = model(images)
            _, predicted = outputs.max(1)

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.numpy())

    f1 = f1_score(all_labels, all_preds, average='macro')  # You can also use 'weighted' if you prefer
    return f1

In [51]:
# STEP 7: Run evaluation on all corruptions and severity levels
corruptions = [
    "gaussian_noise", "shot_noise", "impulse_noise",
    "defocus_blur", "glass_blur", "motion_blur", "zoom_blur",
    "snow", "frost", "fog", "brightness",
    "contrast", "elastic_transform", "pixelate", "jpeg_compression"
]

results = []

for corruption in tqdm(corruptions):
    for severity in range(1, 6):
        f1 = evaluate_model_on_corruption(corruption, severity)
        results.append({
          'corruption': corruption,
          'severity': severity,
          'f1_macro': f1
        })

100%|██████████| 15/15 [01:21<00:00,  5.40s/it]


In [52]:
# STEP 8: Save results to CSV
df = pd.DataFrame(results)
csv_path = os.path.join(drive_base_path, 'VGG_corruption_evaluation_results.csv')
df.to_csv(csv_path, index=False)

df.head()


Unnamed: 0,corruption,severity,f1_macro
0,gaussian_noise,1,0.779682
1,gaussian_noise,2,0.719714
2,gaussian_noise,3,0.633976
3,gaussian_noise,4,0.593009
4,gaussian_noise,5,0.549714


In [49]:
def evaluate_on_clean_testset():
    transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465],
                         std=[0.2023, 0.1994, 0.2010])
    ])
    test_dataset = CIFAR10(root='./data', train=False, download=True, transform=transform)
    test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

    all_preds = []
    all_labels = []

    model.eval()
    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            outputs = model(images)
            _, predicted = outputs.max(1)

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.numpy())

    f1 = f1_score(all_labels, all_preds, average='macro')
    return f1

# Call the function and print the result
clean_f1 = evaluate_on_clean_testset()
print(f"F1-score on clean CIFAR-10 test set: {clean_f1:.4f}")

F1-score on clean CIFAR-10 test set: 0.8229
