# 24FS\_I4DS27: Adversarial Attacks \\ Wie kann KI überlistet werden? <br> 03-Training

In [1]:
%load_ext autoreload
%autoreload 2

import os
import torch
import wandb
import warnings
import torchvision

from lightning import Trainer
from lightning.pytorch.loggers import WandbLogger
from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint

os.chdir("..")
from src.data.mri import MRIDataModule
from src.data.covidx import COVIDXDataModule
from src.models.imageclassifier import ImageClassifier

os.environ["WANDB_NOTEBOOK_NAME"] = "notebooks/03-training.ipynb"
warnings.filterwarnings("ignore", category=UserWarning)
torch.set_float32_matmul_precision('medium')

In [2]:
BATCH_SIZE = 32
OUTPUT_SIZE = 1
NUM_WORKERS = 8

WANDB_ENTITY = "24FS_I4DS27"
WANDB_PROJECT = "baselines"

models = [
    "alexnet", 
    "vgg11",
    "vgg16",
    "resnet18", 
    "resnet50",
    "densenet121", 
    "densenet169",
    "efficientnet_v2_s", 
    "vit_b_16"
]
datasets = [
    "covidx_data", 
    "mri_data",
]

transform = torchvision.transforms.Compose(
    [
        torchvision.transforms.Resize((224, 224), antialias=True),
    ]
)

In [3]:
def train():
    wandb.init()
    config = wandb.config
    
    wandb_logger = WandbLogger(log_model=True)
    
    model = ImageClassifier(
        modelname=config.model, 
        output_size=OUTPUT_SIZE, 
        p_dropout_classifier=config.p_dropout_classifier, 
        weight_decay=config.weight_decay
    )
    wandb_logger.watch(model, log_graph=False)

    if config.dataset == "covidx_data":
        datamodule = COVIDXDataModule(
            path="data/raw/COVIDX-CXR4", 
            transform=transform, 
            num_workers=NUM_WORKERS, 
            batch_size=config.batch_size,
            train_sample_size=0.05,
            train_shuffle=True,
        ).setup()
    elif config.dataset == "mri_data":
        datamodule = MRIDataModule(
            path="data/raw/Brain-Tumor-MRI", 
            path_processed="data/processed/Brain-Tumor-MRI", 
            transform=transform, 
            num_workers=NUM_WORKERS, 
            batch_size=config.batch_size,
            train_shuffle=True,
        ).setup()

    trainer = Trainer(
        max_epochs=config.epochs,
        log_every_n_steps=1,
        gradient_clip_val=0.5,
        accelerator="auto",
        logger=wandb_logger, 
        fast_dev_run=False, # set to True to test run
        enable_progress_bar=True,
        enable_model_summary=True,
        callbacks=[
            #EarlyStopping(monitor="val_loss", mode="min", patience=1),
            ModelCheckpoint(monitor="val_loss", 
                            mode="min", 
                            save_top_k=1, # save the best model
                            save_last=True, # save the last model
                            dirpath=f"models/{model.modelname}", 
                            filename=f"{model.modelname}-lr{model.lr}-pdrop{model.p_dropout_classifier}-wd{model.weight_decay}") 
        ]
    )

    # Train the model
    trainer.fit(model, datamodule.train_dataloader(), datamodule.val_dataloader())
    wandb_logger.experiment.unwatch(model)
    
    del config, wandb_logger, model, trainer
    wandb.finish()

In [None]:
for dataset_name in datasets:
    for model_name in models:
        sweep_config = {
            "method": "grid",
            "metric": {"name": "val_loss", "goal": "minimize"},
            "parameters": {
                "model": {"values": [model_name]},  
                "dataset": {"values": [dataset_name]},
                "lr": {"values": [1e-5]},
                "p_dropout_classifier": {"values": [0.0, 0.2]},
                "weight_decay": {"values":  [0.0, 0.0001]},
                "batch_size": {"values": [BATCH_SIZE]},
                "epochs": {"values": [20]},
            },
        }
        
        sweep_id = wandb.sweep(sweep_config, project=WANDB_PROJECT, entity=WANDB_ENTITY)
        wandb.agent(sweep_id, function=train)

Create sweep with ID: f81jgkgb
Sweep URL: https://wandb.ai/24FS_I4DS27/baselines/sweeps/f81jgkgb


[34m[1mwandb[0m: Agent Starting Run: z5uy932o with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dataset: covidx_data
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	lr: 1e-05
[34m[1mwandb[0m: 	model: alexnet
[34m[1mwandb[0m: 	p_dropout_classifier: 0
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: Currently logged in as: [33mgabrieltorresgamez[0m ([33m24FS_I4DS27[0m). Use [1m`wandb login --relogin`[0m to force relogin


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type             | Params
---------------------------------------------
0 | metrics | MetricCollection | 0     
1 | model   | AlexNet          | 57.0 M
---------------------------------------------
57.0 M    Trainable params
0         Non-trainable params
57.0 M    Total params
228.032   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


VBox(children=(Label(value='1304.855 MB of 1304.855 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
train_BinaryAUROC,▃▄▇▇▆▅██▃▄▅▄▂▇▁▆▄▂▅▅
train_BinaryAccuracy,▁▇██████████████████
train_BinaryF1Score,▁▇██████████████████
train_BinaryPrecision,▁█▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄
train_BinaryRecall,▁▇██████████████████
train_BinarySpecificity,█▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss,█▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
val_BinaryAUROC,▁███████████████████

0,1
epoch,19.0
train_BinaryAUROC,0.49423
train_BinaryAccuracy,0.84881
train_BinaryF1Score,0.91822
train_BinaryPrecision,0.84881
train_BinaryRecall,1.0
train_BinarySpecificity,0.0
train_loss,0.42661
trainer/global_step,2139.0
val_BinaryAUROC,0.5


[34m[1mwandb[0m: Agent Starting Run: 5o249s68 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dataset: covidx_data
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	lr: 1e-05
[34m[1mwandb[0m: 	model: alexnet
[34m[1mwandb[0m: 	p_dropout_classifier: 0
[34m[1mwandb[0m: 	weight_decay: 0.0001


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type             | Params
---------------------------------------------
0 | metrics | MetricCollection | 0     
1 | model   | AlexNet          | 57.0 M
---------------------------------------------
57.0 M    Trainable params
0         Non-trainable params
57.0 M    Total params
228.032   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


VBox(children=(Label(value='1304.855 MB of 1304.855 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
train_BinaryAUROC,▆▇▃▃▃█▆▃▆▆▅▅▃▂▇▇▂▅▄▁
train_BinaryAccuracy,▁▅▆████▅▇██████▇████
train_BinaryF1Score,▁▅▆████▅███████▇████
train_BinaryPrecision,▅▄█▆▆▆▇▁▅▆▆▆▆▆▆▆▆▆▆▆
train_BinaryRecall,▁▅▆████▅▇██████▇████
train_BinarySpecificity,█▄▄▁▁▁▁▃▁▁▁▂▁▁▁▂▁▁▁▁
train_loss,█▄▂▁▁▁▁▃▁▁▁▁▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
val_BinaryAUROC,▇▇█▇▇▇▇▆▇▇▁▇▇▇▇▂▇▇▇▇

0,1
epoch,19.0
train_BinaryAUROC,0.47534
train_BinaryAccuracy,0.85116
train_BinaryF1Score,0.9196
train_BinaryPrecision,0.85116
train_BinaryRecall,1.0
train_BinarySpecificity,0.0
train_loss,0.42909
trainer/global_step,2139.0
val_BinaryAUROC,0.5


[34m[1mwandb[0m: Agent Starting Run: jnlxh1pq with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dataset: covidx_data
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	lr: 1e-05
[34m[1mwandb[0m: 	model: alexnet
[34m[1mwandb[0m: 	p_dropout_classifier: 0.2
[34m[1mwandb[0m: 	weight_decay: 0


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type             | Params
---------------------------------------------
0 | metrics | MetricCollection | 0     
1 | model   | AlexNet          | 57.0 M
---------------------------------------------
57.0 M    Trainable params
0         Non-trainable params
57.0 M    Total params
228.032   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


VBox(children=(Label(value='2947.382 MB of 2947.382 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
train_BinaryAUROC,▆▆▆█▆▅▄▇█▅▅▄▁▄▂▆▃▁▂▅
train_BinaryAccuracy,▁███████████████████
train_BinaryF1Score,▁███████████████████
train_BinaryPrecision,█▃▃▃▃▃▃▃▃▁▃▃▃▃▃▃▃▃▃▃
train_BinaryRecall,▁███████████████████
train_BinarySpecificity,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
val_BinaryAUROC,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,19.0
train_BinaryAUROC,0.49655
train_BinaryAccuracy,0.84704
train_BinaryF1Score,0.91719
train_BinaryPrecision,0.84704
train_BinaryRecall,1.0
train_BinarySpecificity,0.0
train_loss,0.42806
trainer/global_step,2139.0
val_BinaryAUROC,0.5


[34m[1mwandb[0m: Agent Starting Run: 8oenvvk4 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dataset: covidx_data
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	lr: 1e-05
[34m[1mwandb[0m: 	model: vgg11
[34m[1mwandb[0m: 	p_dropout_classifier: 0
[34m[1mwandb[0m: 	weight_decay: 0.0001


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type             | Params
---------------------------------------------
0 | metrics | MetricCollection | 0     
1 | model   | VGG              | 128 M 
---------------------------------------------
128 M     Trainable params
0         Non-trainable params
128 M     Total params
515.082   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


VBox(children=(Label(value='2947.382 MB of 2947.382 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
train_BinaryAUROC,▄▆▄▇▅▇▆▅█▇▇▃▆▅▅▇▅▅▆▁
train_BinaryAccuracy,▁▄█▇▆███████████▅██▄
train_BinaryF1Score,▁▅█▇▆███████████▅██▅
train_BinaryPrecision,▁▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▄▇▇▄
train_BinaryRecall,▁▄█▇▆███████████▅██▄
train_BinarySpecificity,█▅▁▂▃▁▁▁▁▁▁▁▁▁▁▁▄▁▁▅
train_loss,█▂▁▂▂▁▁▁▁▁▁▁▁▁▁▁▃▁▁▆
trainer/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
val_BinaryAUROC,▅▅▁▅▅▅▅▆▅▅▅▅▅▅▅█▅▅▅▃

0,1
epoch,19.0
train_BinaryAUROC,0.46869
train_BinaryAccuracy,0.71677
train_BinaryF1Score,0.83018
train_BinaryPrecision,0.84103
train_BinaryRecall,0.81961
train_BinarySpecificity,0.1575
train_loss,13.41921
trainer/global_step,2139.0
val_BinaryAUROC,0.43506


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
