In diesem Notebook wird die Pipeline aufgebaut.

In [None]:
%load_ext autoreload
%autoreload 2

import os
if os.getcwd() == '/home/jovyan/work': # jhub
    os.chdir("24FS_I4DS27/main/") 
    os.system("make reqs")
else: # local
    os.chdir("../")

In [None]:
import torch
import wandb
import torchvision
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from src.utils.download import download_models
from src.utils.uap_helper import generate_adversarial_images_from_model_dataset, get_datamodule, get_model
from src.utils.metrics import metrics, Metrics
from src.utils.transform_perturbation import AddImagePerturbation
from src.data.mri import MRIDataModule
from src.data.covidx import COVIDXDataModule
from src.utils.evaluation import WeightsandBiasEval
from src.models.imageclassifier import ImageClassifier

from lightning.pytorch import Trainer
from lightning.pytorch import loggers as pl_loggers
from lightning.pytorch.callbacks import ModelCheckpoint

plt.rcParams["figure.dpi"] = 200
plt.rcParams["figure.figsize"] = (16, 8)

In [None]:
ENTITY = "24FS_I4DS27"
PROJECT = "baselines"
NUM_WORKERS = 8


def get_transform(perturbations: torch.Tensor = None, p: float = None, idx: int = None):
    if None in (perturbations, p, idx):
        return torchvision.transforms.Compose(
            [
                torchvision.transforms.Resize((224, 224), antialias=True),
            ]
        )
    return torchvision.transforms.Compose(
        [
            torchvision.transforms.Resize((224, 224), antialias=True),
            AddImagePerturbation(perturbations, p, idx),
        ]
    )


device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"

In [None]:
# Download models if not present
models = download_models(ENTITY, PROJECT)

In [None]:
def pipeline(
    modelname: str,
    dataset: str,
    n_robustifications: int,
    i: int,
    n: int,
    t: int,
    p: int,
    lambda_norm: float,
    r: float,
    eps: float,
    seed: int,
    num_workers: int,
    device: str,
    verbose: bool = False,
):
    """
    Runs the pipeline for generating Universal Adversarial Perturbations (UAP) using the specified model and dataset.

    Parameters:
    modelname (str): The name of the model to be used for generating perturbations.
    dataset (str): The dataset to be used for training and evaluating the UAP.
    n_robustifications (int): The number of robustifications to apply.
    i (int): The number of UAPs to generate.
    n (int): The number of images to be used for generating UAPs.
    t (int): The number of retries to fool an image on the algorithm.
    p (int): The norm to be used for measuring perturbations (e.g., L2 norm, L∞ norm).
    lambda_norm (float): The regularization parameter for the norm.
    r (float): The desired fooling rate, which when achieved, saves the UAP.
    eps (float): A small positive constant for numerical stability in the loss function.
    seed (int): The random seed for reproducibility.
    num_workers (int): Number of worker threads for data loading.
    device (str): The device to be used for computation (e.g., 'cpu' or 'cuda').
    verbose (bool): Whether to print additional information during the pipeline.

    Returns:
    None
    """
    model = get_model(modelname=modelname, dataset=dataset, output_size=1)

    for current_robustification in range(n_robustifications):
        modelfolder = f"{modelname}-{dataset}-n_{n}-robustification_{current_robustification}"
        # generate UAP
        loggerUAP = pl_loggers.CSVLogger(
            "robustified_models",
            name=modelfolder,
            version="UAPs_pre_robustification",
            flush_logs_every_n_steps=1,
        )
        perturbations = generate_adversarial_images_from_model_dataset(
            model,
            modelname,
            dataset,
            logger=loggerUAP,
            transform=get_transform(),
            i=i,
            n=n,
            r=r,
            p=p,
            lambda_norm=lambda_norm,
            t=t,
            eps=eps,
            seed=seed,
            num_workers=num_workers,
            device=device,
            verbose=verbose,
        )
        loggerUAP.save()

        # evaluate model on testdata
        loggerEvalUnrobustifiedTest = pl_loggers.CSVLogger(
            "robustified_models",
            name=modelfolder,
            version="eval_unrobustified_test",
            flush_logs_every_n_steps=1,
        )
        datamodule = get_datamodule(
            dataset=dataset,
            transform=get_transform(),
            num_workers=num_workers,
            batch_size=32,
            seed=seed,
        )
        trainer = Trainer(
            logger=loggerEvalUnrobustifiedTest,
        )
        trainer.test(model, datamodule.test_dataloader())

        # evaluate model on testdata + uaps
        for perturbation_idx in range(i):
            loggerEvalUnrobustifiedTestUAP = pl_loggers.CSVLogger(
                "robustified_models",
                name=modelfolder,
                version=f"eval_unrobustified_test_uap_{perturbation_idx}",
                flush_logs_every_n_steps=1,
            )
            datamodule = get_datamodule(
                dataset=dataset,
                transform=get_transform(perturbations, p=1, idx=perturbation_idx),
                num_workers=num_workers,
                batch_size=32,
                seed=seed,
            )
            trainer = Trainer(
                logger=loggerEvalUnrobustifiedTestUAP,
            )
            trainer.test(model, datamodule.test_dataloader())

        # robustify model
        loggerRobustify = pl_loggers.CSVLogger(
            "robustified_models",
            name=modelfolder,
            version="robustify_model",
            flush_logs_every_n_steps=1,
        )
        datamodule = get_datamodule(
            dataset=dataset,
            transform=get_transform(perturbations, p=0.5),
            num_workers=num_workers,
            batch_size=32,
            seed=seed,
        )
        trainer = Trainer(
            max_epochs=50,
            log_every_n_steps=1,
            gradient_clip_val=0.5,
            accelerator="auto",
            logger=loggerRobustify,
            fast_dev_run=False,  # set to True to test run
            enable_progress_bar=True,
            enable_model_summary=True,
            callbacks=[
                # EarlyStopping(monitor="val_loss", mode="min", patience=1),
                ModelCheckpoint(
                    monitor="val_loss",
                    mode="min",
                    save_top_k=1,  # save the best model
                    save_last=False,  # save the last model
                    dirpath=f"robustified_models/{modelname}-{dataset}-n_{n}-robustification_{current_robustification}/robustify_model",
                    filename="model",
                )
            ],
        )
        trainer.fit(model, datamodule.train_dataloader(), datamodule.val_dataloader())

        # evaluate robustified model on testdata
        loggerEvalRobustifiedTest = pl_loggers.CSVLogger(
            "robustified_models",
            name=modelfolder,
            version="eval_robustified_test",
            flush_logs_every_n_steps=1,
        )
        datamodule = get_datamodule(
            dataset=dataset,
            transform=get_transform(perturbations, p=0.5),
            num_workers=num_workers,
            batch_size=32,
            seed=seed,
        )
        trainer = Trainer(
            logger=loggerEvalRobustifiedTest,
        )
        trainer.test(model, datamodule.test_dataloader())

        # evaluate robustified model on testdata + uaps
        for perturbation_idx in range(i):
            loggerEvalRobustifiedTestUAP = pl_loggers.CSVLogger(
                "robustified_models",
                name=modelfolder,
                version=f"eval_robustified_test_uap_{perturbation_idx}",
                flush_logs_every_n_steps=1,
            )
            datamodule = get_datamodule(
                dataset=dataset,
                transform=get_transform(perturbations, p=0.5, idx=perturbation_idx),
                num_workers=num_workers,
                batch_size=32,
                seed=seed,
            )
            trainer = Trainer(
                logger=loggerEvalRobustifiedTestUAP,
            )
            trainer.test(model, datamodule.test_dataloader())

In [7]:
modelname, dataset = "efficientnet_v2_m", "covidx_data"
print(f"\n---\nModel: {modelname} - Dataset: {dataset}")

pipeline(
    modelname=modelname,
    dataset=dataset,
    n_robustifications=3,
    i=3,
    n=5,
    t=10,
    p=0.5,
    lambda_norm=0.1,
    r=0.1,
    eps=1e-6,
    seed=42,
    num_workers=NUM_WORKERS,
    device=device,
)