<a href="https://colab.research.google.com/github/Marcel99Codes/MLSec_project2_MM/blob/main/lab_secml_improoved.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
%pip install git+https://github.com/RobustBench/robustbench.git
%pip install secml-torch[foolbox,tensorboard]

import torchvision.datasets
from robustbench.utils import load_model
from secmlt.adv.backends import Backends
from secmlt.adv.evasion.perturbation_models import LpPerturbationModels
from secmlt.adv.evasion.pgd import PGD
from secmlt.metrics.classification import Accuracy
from secmlt.models.pytorch.base_pytorch_nn import BasePytorchClassifier
from torch.utils.data import DataLoader, Subset
import torch
from torch import nn,optim
import torch.nn.functional as F
from secmlt.trackers import (
    GradientNormTracker,
    GradientsTracker,
    LossTracker,
    PerturbationNormTracker,
    PredictionTracker,
    SampleTracker,
    ScoresTracker,
    TensorboardTracker,
)

Collecting git+https://github.com/RobustBench/robustbench.git
  Cloning https://github.com/RobustBench/robustbench.git to /tmp/pip-req-build-2su650ur
  Running command git clone --filter=blob:none --quiet https://github.com/RobustBench/robustbench.git /tmp/pip-req-build-2su650ur
  Resolved https://github.com/RobustBench/robustbench.git to commit 46a91f44524133b2cd8f721ec7e73ecb63f17fc8
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting autoattack@ git+https://github.com/fra31/auto-attack.git@a39220048b3c9f2cca9a4d3a54604793c68eca7e#egg=autoattack (from robustbench==1.1)
  Cloning https://github.com/fra31/auto-attack.git (to revision a39220048b3c9f2cca9a4d3a54604793c68eca7e) to /tmp/pip-install-fl9v5lkm/autoattack_7bdaff97f937491d9141ba1d051ea070
  Running command git clone --filter=blob:none --quiet https://github.com/fra31/auto-attack.git /tmp/pip-install-fl9v5lkm/autoattack_7bdaff97f937491d9141ba1d051ea070
  Running command git rev-parse -q --verify 'sha^a39220048b3c9f2c

In [13]:
class ComposedClf(nn.Module):
    def __init__(self, models):
        super(ComposedClf,self).__init__()
        self.models = nn.ModuleList(models)

    def forward(self, x):
        # Apply each model and compute softmax for their outputs
        softmax_outputs = torch.stack([model(x) for model in self.models], dim=0)
        random_weights=torch.rand(len(self.models))
        random_weights=random_weights/torch.sum(random_weights)
        avg_softmax = torch.sum(softmax_outputs * random_weights.unsqueeze(1).unsqueeze(2), dim=0)
        return avg_softmax

In [14]:
model_names = [
    "Standard",
    "Rebuffi2021Fixing_70_16_cutmix_extra",
    "Rade2021Helper_extra"
]

In [15]:
models = [load_model(model_name=model_name) for model_name in model_names]
net = ComposedClf(models)#load_model(model_name="Rony2019Decoupling", dataset="cifar10", threat_model="L2")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if device == "cuda":
    net.to(device)

test_dataset = torchvision.datasets.CIFAR10(
    transform=torchvision.transforms.ToTensor(),
    train=False,
    root=".",
    download=True,
)

  checkpoint = torch.load(model_path, map_location=torch.device('cpu'))


Files already downloaded and verified


In [16]:
test_dataset = Subset(test_dataset, list(range(5)))
test_data_loader = DataLoader(test_dataset, batch_size=5, shuffle=False)

In [17]:
# Wrap model
model = BasePytorchClassifier(net)

# Test accuracy on original data
accuracy = Accuracy()(model, test_data_loader)
print("Accuracy:", accuracy.item())

Accuracy: 1.0


In [None]:
# Create and run attack
epsilon = 8/255
num_steps = 100
step_size = 0.005
perturbation_model = LpPerturbationModels.LINF
y_target = None

trackers = [
    LossTracker(),
    PredictionTracker(),
    PerturbationNormTracker("linf"),
    GradientNormTracker(),
    SampleTracker(),
    ScoresTracker(),
    GradientsTracker(),
]
tensorboard_tracker = TensorboardTracker("example_data/logs/pgd", trackers)

native_attack = PGD(
    perturbation_model=perturbation_model,
    epsilon=epsilon,
    num_steps=num_steps,
    step_size=step_size,
    random_start=False,
    y_target=y_target,
    backend=Backends.NATIVE,
    trackers=tensorboard_tracker,
)
native_adv_ds = native_attack(model, test_data_loader)

In [None]:
# tensorboard
%load_ext tensorboard
%tensorboard --logdir example_data/logs/pgd

In [None]:
# Test accuracy on the composed model
n_robust_accuracy = Accuracy()(model, native_adv_ds)
print("Robust Accuracy (PGD Native): ", n_robust_accuracy.item())

# Test accuracy on the single models
for m,name in zip(models, model_names):
  n_robust_accuracy = Accuracy()(BasePytorchClassifier(m), native_adv_ds)
  print(f"Robust Accuracy (PGD Native, {name}) ", n_robust_accuracy.item())





In [None]:
# prompt: find all images in native_adv_ds for which all three models fail to predict the right label and put them into a new dataloader

import torch
from torch.utils.data import DataLoader, Subset

# Assuming native_adv_ds is already defined from the previous code
failed_indices = []
for i in range(len(native_adv_ds)):
    x, y = native_adv_ds[i]
    # Check predictions of all three models
    all_models_failed = True
    for model in models:
        with torch.no_grad():
            prediction = model(x.unsqueeze(0).to(device)).argmax()
        if prediction == y:
            all_models_failed = False
            break
    if all_models_failed:
        failed_indices.append(i)

failed_dataset = Subset(native_adv_ds, failed_indices)
failed_data_loader = DataLoader(failed_dataset, batch_size=5, shuffle=False)

print(f"Number of images where all three models failed: {len(failed_indices)}")
