# Imports and definitions

In [1]:
import itertools
import pathlib
import shutil
import sys
import time

import flash
import flash.image
import pandas
import pl_bolts
import plotly
import plotly.express
import plotly.graph_objects
import plotly.subplots
import plotly.express.colors
import pytorch_lightning
import torch
import torchmetrics
import torchvision

In [2]:
image_classifier_cifar10_resnet18_layer_order = [
    
    "adapter.backbone.conv1",
    "adapter.backbone.bn1",
    "adapter.backbone.relu",
    "adapter.backbone.maxpool",
    
    "adapter.backbone.layer1.0.conv1",
    "adapter.backbone.layer1.0.bn1",
    "adapter.backbone.layer1.0.relu",
    "adapter.backbone.layer1.0.conv2",
    "adapter.backbone.layer1.0.bn2",
    
    "adapter.backbone.layer1.1.conv1",
    "adapter.backbone.layer1.1.bn1",
    "adapter.backbone.layer1.1.relu",
    "adapter.backbone.layer1.1.conv2",
    "adapter.backbone.layer1.1.bn2",
    
    "adapter.backbone.layer2.0.conv1",
    "adapter.backbone.layer2.0.bn1",
    "adapter.backbone.layer2.0.relu",
    "adapter.backbone.layer2.0.conv2",
    "adapter.backbone.layer2.0.bn2",
    "adapter.backbone.layer2.0.downsample.0",
    "adapter.backbone.layer2.0.downsample.1",
    
    "adapter.backbone.layer2.1.conv1",
    "adapter.backbone.layer2.1.bn1",
    "adapter.backbone.layer2.1.relu",
    "adapter.backbone.layer2.1.conv2",
    "adapter.backbone.layer2.1.bn2",
    
    "adapter.backbone.layer3.0.conv1",
    "adapter.backbone.layer3.0.bn1",
    "adapter.backbone.layer3.0.relu",
    "adapter.backbone.layer3.0.conv2",
    "adapter.backbone.layer3.0.bn2",
    "adapter.backbone.layer3.0.downsample.0",
    "adapter.backbone.layer3.0.downsample.1",
    
    "adapter.backbone.layer3.1.conv1",
    "adapter.backbone.layer3.1.bn1",
    "adapter.backbone.layer3.1.relu",
    "adapter.backbone.layer3.1.conv2",
    "adapter.backbone.layer3.1.bn2",
    
    "adapter.backbone.layer4.0.conv1",
    "adapter.backbone.layer4.0.bn1",
    "adapter.backbone.layer4.0.relu",
    "adapter.backbone.layer4.0.conv2",
    "adapter.backbone.layer4.0.bn2",
    "adapter.backbone.layer4.0.downsample.0",
    "adapter.backbone.layer4.0.downsample.1",
    
    "adapter.backbone.layer4.1.conv1",
    "adapter.backbone.layer4.1.bn1",
    "adapter.backbone.layer4.1.relu",
    "adapter.backbone.layer4.1.conv2",
    "adapter.backbone.layer4.1.bn2",
    
    "adapter.backbone.avgpool",
    
    "adapter.head",
]

image_classifier_cifar10_vgg11_layer_order = [f"adapter.backbone.{x}" for x in range(0, 21)] + ["adapter.head"]

# to be used for sorting dataframe layer names
def sorting(reference):
    def key(series):
        unsorted = list(series)
        return pandas.Series([reference.index(el) for el in unsorted])
    return key

TIME_FORMAT = "%Y_%m_%d__%H_%M_%S_%z"

In [3]:
def compute_pruning_amount(epoch):
    if epoch < 30:
        return 0
    else:
        if epoch % 5 == 0:
            return 0.05
        else:
            return 0


class Model(pytorch_lightning.LightningModule):
    def __init__(self):
        super().__init__()
        self.backbone = torchvision.models.vgg11(pretrained=True)
        self.head = torch.nn.Linear(1000, 10)
        self.accuracy = torchmetrics.Accuracy(task="multiclass", num_classes=10)
        
    def forward(self, x):
        return self.head(self.backbone(x.view(x.size(0), -1)))
    
    def step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = torch.nn.functional.cross_entropy(y_hat, y)
        accuracy = self.accuracy(y_hat, y)
        return {"loss": loss, "accuracy": accuracy}
    
    def training_step(self, batch, batch_idx):
        return self.step(batch, batch_idx)["loss"]
    
    def validation_step(self, batch, batch_idx):
        values = self.step(batch, batch_idx)
        self.log("val_loss", values["loss"])
        self.log("val_accuracy", values["accuracy"])
        
    def test_step(self, batch, batch_idx):
        values = self.step(batch, batch_idx)
        self.log("test_loss", values["loss"])
        self.log("test_accuracy", values["accuracy"])
    
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.02)

In [4]:
pytorch_lightning.seed_everything(seed=42, workers=True)    

train_dataset = torchvision.datasets.CIFAR10(
    "/shared/ml/datasets/vision/CIFAR10",
    train=True,
    download=True,
)
test_dataset = torchvision.datasets.CIFAR10(
    "/shared/ml/datasets/vision/CIFAR10",
    train=False,
    download=True,
)
datamodule = flash.image.ImageClassificationData.from_datasets(
    train_dataset=train_dataset,
    test_dataset=test_dataset,
    batch_size=32,
    num_workers=64,
    transform_kwargs={
        "image_size": (32, 32),
        # "mean": (0.485, 0.456, 0.406),
        # "std": (0.229, 0.224, 0.225)
    },
    val_split=0.2,
)

model = flash.image.ImageClassifier(
    backbone="vgg11",
    learning_rate=0.001,
    num_classes=10,
    pretrained=True,
)


trainer = flash.Trainer(
    accelerator="gpu",
    auto_lr_find=False,
    auto_scale_batch_size=False,
    benchmark=False,
    callbacks=[
        pytorch_lightning.callbacks.DeviceStatsMonitor(),
        pytorch_lightning.callbacks.EarlyStopping(
            check_finite=True,
            check_on_train_epoch_end=False,
            divergence_threshold=None,
            min_delta=0.001,
            mode="min",
            monitor="val_cross_entropy",
            patience=1000,
            stopping_threshold=None,
            strict=True,
            verbose=True,
        ),
        pytorch_lightning.callbacks.LearningRateMonitor(
            log_momentum=True,
            logging_interval="epoch",
        ),
        pytorch_lightning.callbacks.ModelCheckpoint(
            dirpath=None,
            every_n_epochs=1,
            every_n_train_steps=None,
            filename=None,
            mode="min",
            monitor="val_cross_entropy",
            save_last=True,
            save_top_k=3,
            save_weights_only=False,
            verbose=True,
        ),
        pytorch_lightning.callbacks.ModelPruning(
            amount=compute_pruning_amount,
            apply_pruning=True,
            make_pruning_permanent=True,
            parameter_names=("weight", "bias"),
            parameters_to_prune=None,
            prune_on_train_epoch_end=True,
            pruning_dim=None,
            # pruning_fn="random_unstructured",
            pruning_fn="l1_unstructured",
            pruning_norm=None,
            resample_parameters=True,
            use_global_unstructured=True,
            use_lottery_ticket_hypothesis=True,
            verbose=True,
        ),
        pytorch_lightning.callbacks.RichProgressBar(
#             refresh_rate=10,
        ),
    ],
    check_val_every_n_epoch=1,
    default_root_dir="results/vgg11_cifar10",
    detect_anomaly=False,
    deterministic=True,
    devices=[1],
    enable_checkpointing=True,
    enable_model_summary=True,
    enable_progress_bar=True,
    fast_dev_run=False,
    gradient_clip_algorithm=None,
    gradient_clip_val=None,
    ipus=None,
    log_every_n_steps=10,
    logger=[
        pytorch_lightning.loggers.TensorBoardLogger(
            default_hp_metric=True,
            log_graph=True,
            name="default",
            prefix="",
            save_dir="results/vgg11_cifar10",
            version=None,
        ),
    ],
    max_epochs=-1,
    max_steps=-1,
    max_time=None,
    min_epochs=1,
    min_steps=None,
    move_metrics_to_cpu=False,
    multiple_trainloader_mode="min_size",
    num_nodes=1,
    num_processes=1,
    num_sanity_val_steps=2,
    plugins=[],
    precision=32,
    profiler=None,
    reload_dataloaders_every_n_epochs=0,
    replace_sampler_ddp=True,
    strategy=None,
    sync_batchnorm=False,
    tpu_cores=None,
    track_grad_norm=-1,
    weights_save_path=None,
)

Global seed set to 42


Files already downloaded and verified
Files already downloaded and verified


Using 'vgg11' provided by PyTorch/torchvision (https://github.com/pytorch/vision).
  rank_zero_warn(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [None]:
trainer.fit(model, datamodule=datamodule)
trainer.test(model, datamodule=datamodule)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Output()

  rank_zero_warn(
Metric val_cross_entropy improved. New best score: 0.676
Epoch 0, global step 1250: 'val_cross_entropy' reached 0.67576 (best 0.67576), saving model to 'results/vgg11_cifar10/default/version_1/checkpoints/epoch=0-step=1250.ckpt' as top 3
Metric val_cross_entropy improved by 0.055 >= min_delta = 0.001. New best score: 0.620
Epoch 1, global step 2500: 'val_cross_entropy' reached 0.62037 (best 0.62037), saving model to 'results/vgg11_cifar10/default/version_1/checkpoints/epoch=1-step=2500.ckpt' as top 3
Epoch 2, global step 3750: 'val_cross_entropy' reached 0.70054 (best 0.62037), saving model to 'results/vgg11_cifar10/default/version_1/checkpoints/epoch=2-step=3750.ckpt' as top 3
Epoch 3, global step 5000: 'val_cross_entropy' reached 0.67203 (best 0.62037), saving model to 'results/vgg11_cifar10/default/version_1/checkpoints/epoch=3-step=5000.ckpt' as top 3
Metric val_cross_entropy improved by 0.067 >= min_delta = 0.001. New best score: 0.554
Epoch 4, global step 6250: 