# Extending

You can extend FD-Shifts to benchmark your own models including new confidence scores, your own dataset and your own softmax-based confidence scoring functions. In this tutorial we will see how.

## Some Setup

First we have to import a lot of stuff and create a config object. Make sure to set `EXPERIMENT_ROOT_DIR` and `DATASET_ROOT_DIR` appropriately beforehand.

In [None]:
import os

os.environ["EXPERIMENT_ROOT_DIR"] = os.getcwd() + "/experiments"
os.environ["DATASET_ROOT_DIR"] = "/home/t974t/Data"

In [None]:
from typing import Callable, Optional
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pytorch_lightning as pl
import torch
from pytorch_lightning.callbacks.progress.rich_progress import RichProgressBar
from rich import print
from torch import nn
from torchvision import datasets, transforms

from fd_shifts import analysis, configs, models, reporting
from fd_shifts.exec import test, train
from fd_shifts.loaders import dataset_collection
from fd_shifts.loaders.abstract_loader import AbstractDataLoader
from fd_shifts.utils import exp_utils

configs.init()
config = configs.Config.with_defaults(data="svhn")

## Adding a New Model

Let's start with adding a new model. First we set up a model class inheriting from `LightningModule`. We have to create methods for `train`, `validation` and `test` steps for the benchmark to work. We will also copy over `load_only_state_dict`, a helper method.

In [None]:
class MyModel(pl.LightningModule):
    def __init__(self, cfg: configs.Config) -> None:
        super().__init__()

        self.cfg = cfg
        self.ext_confid_name = "my_fancy_confidence"

        self.encoder = nn.Sequential(
            nn.Flatten(),
            nn.Linear(
                cfg.data.img_size[0] * cfg.data.img_size[1] * cfg.data.img_size[2], 512
            ),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
        )

        self.classifier = nn.Linear(512, 10)

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self.classifier(self.encoder(x))
        loss = torch.nn.functional.cross_entropy(logits, y)
        return {"loss": loss, "softmax": torch.softmax(logits, dim=1), "labels": y}

    def validation_step(self, batch, batch_idx, dataloader_idx=0):
        x, y = batch

        if x.shape[1] > 1 and self.cfg.data.img_size[2] == 1:
            x = transforms.Grayscale()(x)

        logits = self.classifier(self.encoder(x))
        loss = torch.nn.functional.cross_entropy(logits, y)
        my_fancy_confidence = torch.sum(logits, dim=1)
        return {
            "loss": loss,
            "softmax": torch.softmax(logits, dim=1),
            "labels": y,
            "confid": my_fancy_confidence,
        }

    def test_step(self, batch, batch_idx, dataloader_idx=0):
        x, y = batch

        if x.shape[1] > 1 and self.cfg.data.img_size[2] == 1:
            x = transforms.Grayscale()(x)

        logits = self.classifier(self.encoder(x))
        my_fancy_confidence = torch.sum(logits, dim=1)
        self.test_results = {
            "logits": logits,
            "labels": y,
            "confid": my_fancy_confidence,
        }

    def load_only_state_dict(self, path):
        ckpt = torch.load(path)
        self.load_state_dict(ckpt["state_dict"], strict=True)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters())
        return {
            "optimizer": optimizer,
            "lr_scheduler": torch.optim.lr_scheduler.CosineAnnealingLR(
                optimizer, T_max=self.cfg.trainer.num_epochs
            ),
        }

To use the model we have to tell FD-Shifts about it and update our configuration. We will also use this opportunity to update the default configuration with some information about our experiment. Afterwards we can train the model. In this case we train on SVHN since that is the dataset for which we loaded the default configuration.

In [None]:
models.register_model("my_model", MyModel)

config.model.name = "my_model"
config.eval.ext_confid_name = "ext"

config.trainer.num_epochs = 10
config.trainer.batch_size = 256

group_name = config.data.dataset
name = "my_first_experiment"
group_dir = config.exp.group_dir.parent / group_name
exp_dir = group_dir / name
exp_dir.mkdir(exist_ok=True, parents=True)
version = exp_utils.get_next_version(exp_dir)

config.exp = configs.ExperimentConfig(
    group_name=group_name,
    name=name,
    mode=configs.Mode.train,
    fold=0,
    crossval_n_folds=0,
    global_seed=1234,
    version=version,
    work_dir=os.getcwd(),
    data_root_dir=os.getenv("DATASET_ROOT_DIR"),
    group_dir=group_dir,
    dir=exp_dir,
    version_dir=exp_dir / f"version_{version}",
    output_paths=configs.OutputPathsPerMode(
        test=configs.OutputPathsConfig(
            raw_output=exp_dir / "test_results" / "raw_logits.npz",
            raw_output_dist=exp_dir / "test_results" / "raw_logits_dist.npz",
            external_confids=exp_dir / "test_results" / "external_confids.npz",
            external_confids_dist=exp_dir
            / "test_results"
            / "external_confids_dist.npz",
        )
    ),
)

print(config)

# progress = progress.Progress()

train(config, RichProgressBar())

Now let's set up some information about the testing and test on SVHN as well as some additional datasets.

In [None]:
config.test = configs.TestConfig(
    name="test_results",
    dir=exp_dir / "test_results",
    cf_path=exp_dir / "hydra/config.yaml",
    selection_criterion="latest",
    best_ckpt_path=exp_dir / f"version_{version}/latest.ckpt",
    only_latest_version=True,
    devries_repro_ood_split=False,
    assim_ood_norm_flag=False,
    iid_set_split="devries",
    raw_output_path="raw_logits.npz",
    external_confids_output_path="external_confids.npz",
    selection_mode="max",
    output_precision=64,
)


print(config)

test(config)

All computed metrics are now found in various `csv` files in the experiment folder. Let's load and preprocess them and then render a benchmark table similar to the paper.

In [None]:
data = pd.concat(
    [
        reporting.load_file(p, experiment_override=str(p.parent.parent.parent.stem))
        for p in Path(os.getenv("EXPERIMENT_ROOT_DIR")).glob("**/test_results/*.csv")
    ]
)
# data = data.assign(experiment=config.exp.group_name)
data = data.assign(study=data.experiment + "_" + data.study)
data = reporting.assign_hparams_from_names(data)

# data = reporting.filter_unused(data)
data = reporting.rename_confids(data)
data = reporting.rename_studies(data)
data = reporting.tables.aggregate_over_runs(data)
data = reporting.str_format_metrics(data)

results_table = reporting.tables.build_results_table(
    data=data, metric="aurc", original_mode=False, paper_filter=False
)
results_table

## Adding a New Dataset

We can also evaluate our model (or one of the built in ones) on a custom dataset. Let's define a dataset that is just a wrapper around MNIST for simplicity and tell FD-Shifts about it.

In [None]:
class MyDataset(datasets.MNIST):
    def __init__(
        self,
        root: str,
        train: bool = True,
        transform: Optional[Callable] = None,
        target_transform: Optional[Callable] = None,
        download: bool = False,
    ) -> None:
        super().__init__(root, train, transform, target_transform, download)


dataset_collection.register_dataset("mydataset", MyDataset)

In [None]:
print(config)

We now have to update our configuration with the new dataset. We will also update the list of datasets to additionally test on. Since we train on MNIST we could argue that SVHN provides a sub-class shift, while CIFAR-10 might be a good choice for a new-class shift.

In [None]:
from copy import deepcopy
config_svhn = deepcopy(config)

config.data = configs.DataConfig(
    dataset="mydataset",
    data_dir=config.data.data_dir.parent / "mydataset",
    pin_memory=True,
    img_size=(32, 32, 1),
    num_workers=12,
    num_classes=10,
    reproduce_confidnet_splits=True,
    augmentations={
        "train": {
            "to_tensor": None,
            "random_crop": [32, 4],
            "normalize": [[0.5], [0.5]],
        },
        "val": {
            "to_tensor": None,
            "resize": 32,
            "normalize": [[0.5], [0.5]],
        },
        "test": {
            "to_tensor": None,
            "resize": 32,
            "normalize": [[0.5], [0.5]],
        },
    },
)
config.eval.query_studies.iid_study = "mydataset"
config.eval.query_studies.noise_study = []
config.eval.query_studies.in_class_study = ["svhn"]
config.eval.query_studies.new_class_study = ["cifar10"]
print(config)

Let's take a look at the data loader this configuration will create.

In [None]:
datamodule = AbstractDataLoader(config)

datamodule.setup()
datamodule.prepare_data()

x, y = next(iter(datamodule.train_dataloader()))


def tensor_to_image(t: torch.Tensor):
    return t.cpu().numpy().transpose(1, 2, 0)


fig, ax = plt.subplots()
ax.imshow(tensor_to_image(x[0]))
fig.show()

We can now train and test on this dataset and recompute our results table. It will now display both SVHN and our new dataset.

In [None]:
group_name = config.data.dataset
name = "my_first_experiment"
group_dir = config.exp.group_dir.parent / group_name
exp_dir = group_dir / name
exp_dir.mkdir(exist_ok=True, parents=True)
version = exp_utils.get_next_version(exp_dir)

config.exp = configs.ExperimentConfig(
    group_name=group_name,
    name=name,
    mode=configs.Mode.train,
    fold=0,
    crossval_n_folds=0,
    global_seed=1234,
    version=version,
    work_dir=os.getcwd(),
    data_root_dir=os.getenv("DATASET_ROOT_DIR"),
    group_dir=group_dir,
    dir=exp_dir,
    version_dir=exp_dir / f"version_{version}",
    output_paths=configs.OutputPathsPerMode(
        test=configs.OutputPathsConfig(
            raw_output=exp_dir / "test_results" / "raw_logits.npz",
            raw_output_dist=exp_dir / "test_results" / "raw_logits_dist.npz",
            external_confids=exp_dir / "test_results" / "external_confids.npz",
            external_confids_dist=exp_dir
            / "test_results"
            / "external_confids_dist.npz",
        )
    ),
)

print(config)

train(config, RichProgressBar())

config.test = configs.TestConfig(
    name="test_results",
    dir=exp_dir / "test_results",
    cf_path=exp_dir / "hydra/config.yaml",
    selection_criterion="latest",
    best_ckpt_path=exp_dir / f"version_{version}/latest.ckpt",
    only_latest_version=True,
    devries_repro_ood_split=False,
    assim_ood_norm_flag=False,
    iid_set_split="devries",
    raw_output_path="raw_logits.npz",
    external_confids_output_path="external_confids.npz",
    selection_mode="max",
    output_precision=64,
)


print(config)

test(config)

data = pd.concat(
    [
        reporting.load_file(p, experiment_override=str(p.parent.parent.parent.stem))
        for p in Path(os.getenv("EXPERIMENT_ROOT_DIR")).glob("**/test_results/*.csv")
    ]
)
# data = data.assign(experiment=config.exp.group_name)
data = data.assign(study=data.experiment + "_" + data.study)
data = reporting.assign_hparams_from_names(data)

# data = reporting.filter_unused(data)
data = reporting.rename_confids(data)
data = reporting.rename_studies(data)
data = reporting.tables.aggregate_over_runs(data)
data = reporting.str_format_metrics(data)

results_table = reporting.tables.build_results_table(
    data=data, metric="aurc", original_mode=False, paper_filter=False
)
results_table

## Adding a New CSF

We can also add a new softmax-based confidence scoring function, we just have to tell FD-Shifts about it. Afterwards we need to rerun the analysis for both of our experiments and update the results table.

In [None]:
@analysis.confid_scores.register_confid_func("my_csf")
def my_fancy_csf(softmax):
    return 1 - np.min(softmax, axis=1)


config.eval.confidence_measures.test.append("my_csf")

analysis.main(
    in_path=config.test.dir,
    out_path=config.test.dir,
    query_studies=config.eval.query_studies,
    add_val_tuning=config.eval.val_tuning,
    threshold_plot_confid=None,
    cf=config,
)

config_svhn.eval.confidence_measures.test.append("my_csf")

analysis.main(
    in_path=config_svhn.test.dir,
    out_path=config_svhn.test.dir,
    query_studies=config_svhn.eval.query_studies,
    add_val_tuning=config_svhn.eval.val_tuning,
    threshold_plot_confid=None,
    cf=config_svhn,
)

In [None]:
data = pd.concat(
    [
        reporting.load_file(p, experiment_override=str(p.parent.parent.parent.stem))
        for p in Path(os.getenv("EXPERIMENT_ROOT_DIR")).glob("**/test_results/*.csv")
    ]
)
# data = data.assign(experiment=config.exp.group_name)
data = data.assign(study=data.experiment + "_" + data.study)
data = reporting.assign_hparams_from_names(data)

# data = reporting.filter_unused(data)
data = reporting.rename_confids(data)
data = reporting.rename_studies(data)
data = reporting.tables.aggregate_over_runs(data)
data = reporting.str_format_metrics(data)

results_table = reporting.tables.build_results_table(
    data=data, metric="aurc", original_mode=False, paper_filter=False
)
results_table