# Unified Experiment Code
> Resistance is futile.

In [None]:
# default_exp unified_experiment

In [None]:
# hide
import blackhc.project.script

Import modules and functions were are going to use.

In [None]:
# exports

import dataclasses
import traceback
from dataclasses import dataclass
from typing import Type, Union, Optional

import torch
import torch.utils.data
from blackhc.project import is_run_from_ipython
from blackhc.project.experiment import embedded_experiments

import batchbald_redux.acquisition_functions as acquisition_functions
from batchbald_redux.acquisition_functions import (
    CandidateBatchComputer,
    EvalCandidateBatchComputer,
)
from batchbald_redux.black_box_model_training import evaluate
from batchbald_redux.dataset_challenges import (
    get_base_dataset_index,
    get_target,
)
from batchbald_redux.di import DependencyInjection
from batchbald_redux.experiment_data import ExperimentData, ExperimentDataConfig, OoDDatasetConfig
from batchbald_redux.models import MnistModelTrainer
from batchbald_redux.resnet_models import Cifar10ModelTrainer
from batchbald_redux.train_eval_model import (
    TrainEvalModel,
    TrainSelfDistillationEvalModel,
)
from batchbald_redux.trained_model import ModelTrainer

In [None]:
# exports


@dataclass
class UnifiedExperiment:
    seed: int

    id_dataset_name: str
    ood_dataset_name: Optional[str]
    ood_exposure: bool
    initial_training_set_size: int = 20
    validation_set_size: int = 1024
    evaluation_set_size: int = 1024
    id_repetitions: float = 1
    ood_repetitions: float = 1
    add_dataset_noise: bool = False
    validation_split_random_state: int = 0

    acquisition_size: int = 5
    max_training_set: int = 200

    max_training_epochs: int = 300

    num_pool_samples: int = 100
    num_validation_samples: int = 20
    num_training_samples: int = 1

    device: str = "cuda"
    acquisition_function: Union[
        Type[CandidateBatchComputer], Type[EvalCandidateBatchComputer]
    ] = acquisition_functions.BALD
    train_eval_model: Type[TrainEvalModel] = TrainSelfDistillationEvalModel
    model_trainer_factory: Type[ModelTrainer] = Cifar10ModelTrainer

    temperature: float = 0.0

    def load_experiment_data(self) -> ExperimentData:
        di = DependencyInjection(vars(self), [])
        odc: OoDDatasetConfig = di.create_dataclass_type(OoDDatasetConfig) if self.ood_dataset_name is not None else None
        edc: ExperimentDataConfig = di.create_dataclass_type(ExperimentDataConfig, ood_dataset_config=odc)
        return edc.load()

    # Simple Dependency Injection
    def create_acquisition_function(self):
        di = DependencyInjection(vars(self))
        return di.create_dataclass_type(self.acquisition_function)

    def create_train_eval_model(self, runtime_config) -> TrainEvalModel:
        config = {**vars(self), **runtime_config}
        di = DependencyInjection(config, [])
        return di.create_dataclass_type(self.train_eval_model)

    def create_model_trainer(self) -> ModelTrainer:
        di = DependencyInjection(vars(self))
        return di.create_dataclass_type(self.model_trainer_factory)

    def run(self, store):
        torch.manual_seed(self.seed)

        # Active Learning setup
        data = self.load_experiment_data()
        store["dataset_info"] = dict(training=repr(data.active_learning.base_dataset), test=repr(data.test_dataset))
        store["initial_training_set_indices"] = data.initial_training_set_indices
        store["evaluation_set_indices"] = data.evaluation_set_indices

        model_trainer = self.create_model_trainer()

        train_loader = model_trainer.get_train_dataloader(data.active_learning.training_dataset)
        pool_loader = model_trainer.get_evaluation_dataloader(data.active_learning.pool_dataset)
        validation_loader = model_trainer.get_evaluation_dataloader(data.validation_dataset)
        test_loader = model_trainer.get_evaluation_dataloader(data.test_dataset)

        store["active_learning_steps"] = []
        active_learning_steps = store["active_learning_steps"]

        acquisition_function = self.create_acquisition_function()

        num_iterations = 0
        max_iterations = int(1.5 * (self.max_training_set - self.initial_training_set_size) / self.acquisition_size)

        # Active Training Loop
        while True:
            training_set_size = len(data.active_learning.training_dataset)
            print(f"Training set size {training_set_size}:")

            # iteration_log = dict(training={}, pool_training={}, evaluation_metrics=None, acquisition=None)
            active_learning_steps.append({})
            iteration_log = active_learning_steps[-1]

            iteration_log["training"] = {}

            # TODO: this is a hack! :(
            if self.ood_dataset_name is None:
                loss = validation_loss = torch.nn.NLLLoss()
            elif self.ood_exposure:
                loss = torch.nn.KLDivLoss(log_target=False, reduction="batchmean")
                validation_loss = torch.nn.NLLLoss()
            else:
                loss = validation_loss = torch.nn.NLLLoss()

            trained_model = model_trainer.get_trained(train_loader=train_loader, train_augmentations=data.train_augmentations,
                                                      validation_loader=validation_loader,
                                                      log=iteration_log["training"], loss=loss,
                                                      validation_loss=validation_loss)

            evaluation_metrics = evaluate(model=trained_model, num_samples=self.num_validation_samples,
                                          loader=test_loader, device=self.device, storage_device="cpu")
            iteration_log["evaluation_metrics"] = evaluation_metrics
            print(f"Perf after training {evaluation_metrics}")

            if training_set_size >= self.max_training_set or num_iterations >= max_iterations:
                print("Done.")
                break

            if isinstance(acquisition_function, CandidateBatchComputer):
                candidate_batch = acquisition_function.compute_candidate_batch(trained_model, pool_loader, self.device)
            elif isinstance(acquisition_function, EvalCandidateBatchComputer):
                if self.evaluation_set_size:
                    eval_dataset = data.evaluation_dataset
                else:
                    eval_dataset = data.active_learning.pool_dataset

                train_eval_model = self.create_train_eval_model(
                    dict(
                        model_trainer=model_trainer,
                        training_dataset=data.active_learning.training_dataset,
                        train_augmentations=data.train_augmentations,
                        eval_dataset=eval_dataset,
                        validation_loader=validation_loader,
                        trained_model=trained_model,
                        dataset_device=data.device
                    )
                )

                iteration_log["eval_training"] = {}
                trained_eval_model = train_eval_model(device=self.device, training_log=iteration_log["eval_training"])

                candidate_batch = acquisition_function.compute_candidate_batch(
                    trained_model, trained_eval_model, pool_loader, device=self.device
                )
            else:
                raise ValueError(f"Unknown acquisition function {acquisition_function}!")

            candidate_global_dataset_indices = []
            candidate_labels = []
            for index in candidate_batch.indices:
                base_di = get_base_dataset_index(data.active_learning.pool_dataset, index)
                dataset_type = "ood" if base_di.dataset == data.ood_dataset else "id"
                candidate_global_dataset_indices.append((dataset_type, base_di.index))
                label = get_target(data.active_learning.pool_dataset, index).cpu().numpy()
                candidate_labels.append(label)

            iteration_log["acquisition"] = dict(
                indices=candidate_global_dataset_indices, labels=candidate_labels, scores=candidate_batch.scores
            )

            print(candidate_batch)
            print(candidate_global_dataset_indices)

            if self.ood_dataset_name is None:
                data.active_learning.acquire(candidate_batch.indices)
            elif self.ood_exposure:
                data.active_learning.acquire(candidate_batch.indices)
            else:
                data.active_learning.acquire(
                    [index for index, label in zip(candidate_batch.indices, candidate_labels) if label != -1]
                )

            ls = ", ".join(f"{label} ({score:.4})" for label, score in zip(candidate_labels, candidate_batch.scores))
            print(f"Acquiring (label, score)s: {ls}")

            num_iterations += 1

## CIFAR-10 vs SVHN Coverage

In [None]:
# experiment
# OOD experiment (ood_exposure=True)

experiment = UnifiedExperiment(
    ood_exposure=True,
    id_dataset_name="CIFAR-10",
    ood_dataset_name="SVHN",
    seed=1,
    max_training_epochs=1,
    max_training_set=20 + 10,
    acquisition_function=acquisition_functions.EvalBALD,
    evaluation_set_size=100,
    acquisition_size=10,
    num_pool_samples=2,
    device="cuda",
)

results = {}
experiment.run(results)
results

Creating: OoDDatasetConfig(
	ood_dataset_name=SVHN,
	ood_repetitions=1,
	ood_exposure=True
)
Creating: ExperimentDataConfig(
	id_dataset_name=CIFAR-10,
	id_repetitions=1,
	initial_training_set_size=0,
	validation_set_size=1024,
	validation_split_random_state=0,
	evaluation_set_size=100,
	add_dataset_noise=False,
	device=cuda,
	ood_dataset_config=OoDDatasetConfig(ood_dataset_name='SVHN', ood_repetitions=1, ood_exposure=True)
)
Files already downloaded and verified
Files already downloaded and verified
Using downloaded and verified file: data/SVHN/train_32x32.mat
Using downloaded and verified file: data/SVHN/test_32x32.mat
Creating: Cifar10ModelTrainer(
	device=cuda,
	num_training_samples=1,
	num_validation_samples=20,
	max_training_epochs=1
)
Creating: EvalBALD(
	acquisition_size=10,
	num_pool_samples=2
)
Training set size 0:
Cosine Annealing


get_predictions_labels:   0%|          | 0/200000 [00:00<?, ?it/s]

Perf after training {'accuracy': 0.0991, 'crossentropy': tensor(2.3396)}
Creating: TrainSelfDistillationEvalModel(
	num_pool_samples=2,
	training_dataset=<torch.utils.data.dataset.Subset object at 0x7f4e0402d250>,
	eval_dataset=Evaluation Set (100 samples),
	validation_loader=<torch.utils.data.dataloader.DataLoader object at 0x7f4e0402dcd0>,
	trained_model=TrainedBayesianModel(model=BayesianResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): Identity()
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2)

get_predictions_labels:   0%|          | 0/200 [00:00<?, ?it/s]

Cosine Annealing


100%|##########| 1/1 [00:00<?, ?it/s]

[1/39]   3%|2          [00:00<?]

Epoch 1 metrics: {'accuracy': 0.099609375, 'crossentropy': 2.4185454845428467}


get_predictions_labels:   0%|          | 0/244266 [00:00<?, ?it/s]

get_predictions_labels:   0%|          | 0/244266 [00:00<?, ?it/s]

Conditional Entropy:   0%|          | 0/122133 [00:00<?, ?it/s]

Entropy:   0%|          | 0/122133 [00:00<?, ?it/s]

Conditional Entropy:   0%|          | 0/122133 [00:00<?, ?it/s]

Entropy:   0%|          | 0/122133 [00:00<?, ?it/s]

CandidateBatch(scores=[0.16170918941497803, 0.1460709571838379, 0.13792598247528076, 0.1345287561416626, 0.12914717197418213, 0.12680065631866455, 0.12444841861724854, 0.12151336669921875, 0.12119269371032715, 0.11997759342193604], indices=[27777, 39432, 33443, 10304, 27398, 48250, 27779, 7104, 43814, 16946])
[('id', 27836), ('id', 39513), ('id', 33509), ('id', 10328), ('id', 27457), ('id', 48350), ('id', 27838), ('id', 7124), ('id', 43902), ('id', 16982)]
Acquiring (label, score)s: tensor([1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]) (0.1617), tensor([1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]) (0.1461), tensor([0., 0., 0., 0., 0., 0., 1., 0., 0., 0.]) (0.1379), tensor([0., 0., 0., 0., 0., 0., 1., 0., 0., 0.]) (0.1345), tensor([1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]) (0.1291), tensor([1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]) (0.1268), tensor([0., 0., 0., 0., 0., 0., 1., 0., 0., 0.]) (0.1244), tensor([0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]) (0.1215), tensor([1., 0., 0., 0., 0., 0., 0., 0., 

100%|##########| 1/1 [00:00<?, ?it/s]

[1/39]   3%|2          [00:00<?]

Epoch 1 metrics: {'accuracy': 0.1083984375, 'crossentropy': 10.67159652709961}


get_predictions_labels:   0%|          | 0/200000 [00:00<?, ?it/s]

Perf after training {'accuracy': 0.112, 'crossentropy': tensor(8.4377)}
Creating: TrainSelfDistillationEvalModel(
	num_pool_samples=2,
	training_dataset=<torch.utils.data.dataset.Subset object at 0x7f4e0402d250>,
	eval_dataset=Evaluation Set (100 samples),
	validation_loader=<torch.utils.data.dataloader.DataLoader object at 0x7f4e0402dcd0>,
	trained_model=TrainedBayesianModel(model=BayesianResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): Identity()
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2):

get_predictions_labels:   0%|          | 0/220 [00:00<?, ?it/s]

Cosine Annealing


100%|##########| 1/1 [00:00<?, ?it/s]

[1/39]   3%|2          [00:00<?]

Epoch 1 metrics: {'accuracy': 0.1005859375, 'crossentropy': 6.751611948013306}


get_predictions_labels:   0%|          | 0/244246 [00:00<?, ?it/s]

get_predictions_labels:   0%|          | 0/244246 [00:00<?, ?it/s]

Conditional Entropy:   0%|          | 0/122123 [00:00<?, ?it/s]

Entropy:   0%|          | 0/122123 [00:00<?, ?it/s]

Conditional Entropy:   0%|          | 0/122123 [00:00<?, ?it/s]

Entropy:   0%|          | 0/122123 [00:00<?, ?it/s]

CandidateBatch(scores=[0.13110750913619995, 0.12008911371231079, 0.12005770206451416, 0.11774098873138428, 0.11513710021972656, 0.11465287208557129, 0.11355888843536377, 0.11210590600967407, 0.11198091506958008, 0.11182057857513428], indices=[108357, 16410, 111919, 36436, 19854, 28490, 94016, 29013, 72081, 53570])
[('ood', 59491), ('id', 16446), ('ood', 63053), ('id', 36516), ('id', 19897), ('id', 28556), ('ood', 45150), ('id', 29080), ('ood', 23215), ('ood', 4704)]
Acquiring (label, score)s: tensor([0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000,
        0.1000]) (0.1311), tensor([0., 0., 1., 0., 0., 0., 0., 0., 0., 0.]) (0.1201), tensor([0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000,
        0.1000]) (0.1201), tensor([0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]) (0.1177), tensor([0., 0., 0., 0., 0., 0., 0., 1., 0., 0.]) (0.1151), tensor([0., 0., 1., 0., 0., 0., 0., 0., 0., 0.]) (0.1147), tensor([0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.10

100%|##########| 1/1 [00:00<?, ?it/s]

[1/39]   3%|2          [00:00<?]

Epoch 1 metrics: {'accuracy': 0.1435546875, 'crossentropy': 4.0553059577941895}


get_predictions_labels:   0%|          | 0/200000 [00:00<?, ?it/s]

Perf after training {'accuracy': 0.1433, 'crossentropy': tensor(3.9053)}
Creating: TrainSelfDistillationEvalModel(
	num_pool_samples=2,
	training_dataset=<torch.utils.data.dataset.Subset object at 0x7f4e0402d250>,
	eval_dataset=Evaluation Set (100 samples),
	validation_loader=<torch.utils.data.dataloader.DataLoader object at 0x7f4e0402dcd0>,
	trained_model=TrainedBayesianModel(model=BayesianResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): Identity()
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2)

get_predictions_labels:   0%|          | 0/240 [00:00<?, ?it/s]

Cosine Annealing


100%|##########| 1/1 [00:00<?, ?it/s]

[1/39]   3%|2          [00:00<?]

Epoch 1 metrics: {'accuracy': 0.13671875, 'crossentropy': 4.364100933074951}


get_predictions_labels:   0%|          | 0/244226 [00:00<?, ?it/s]

get_predictions_labels:   0%|          | 0/244226 [00:00<?, ?it/s]

Conditional Entropy:   0%|          | 0/122113 [00:00<?, ?it/s]

Entropy:   0%|          | 0/122113 [00:00<?, ?it/s]

Conditional Entropy:   0%|          | 0/122113 [00:00<?, ?it/s]

Entropy:   0%|          | 0/122113 [00:00<?, ?it/s]

CandidateBatch(scores=[0.22851800918579102, 0.22053802013397217, 0.2160869836807251, 0.2160542607307434, 0.21418005228042603, 0.21359804272651672, 0.21166038513183594, 0.20974905788898468, 0.2081349492073059, 0.2079445719718933], indices=[14278, 4426, 39970, 40479, 38313, 17622, 40042, 30875, 37625, 40974])
[('id', 14312), ('id', 4433), ('id', 40064), ('id', 40574), ('id', 38402), ('id', 17662), ('id', 40136), ('id', 30948), ('id', 37713), ('id', 41071)]
Acquiring (label, score)s: tensor([0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]) (0.2285), tensor([0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]) (0.2205), tensor([1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]) (0.2161), tensor([0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]) (0.2161), tensor([0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]) (0.2142), tensor([0., 0., 0., 0., 0., 0., 0., 1., 0., 0.]) (0.2136), tensor([0., 0., 0., 0., 0., 0., 0., 1., 0., 0.]) (0.2117), tensor([0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]) (0.2097), tensor([0., 0., 0., 0., 0., 0., 0., 0., 0.

100%|##########| 1/1 [00:00<?, ?it/s]

[1/39]   3%|2          [00:00<?]

Epoch 1 metrics: {'accuracy': 0.1630859375, 'crossentropy': 3.1745386123657227}


get_predictions_labels:   0%|          | 0/200000 [00:00<?, ?it/s]

Perf after training {'accuracy': 0.1488, 'crossentropy': tensor(3.0583)}
Done.


{'dataset_info': {'training': "'CIFAR-10 (Train, seed=0, 48976 samples)' | one_hot_targets{'num_classes': 10} + 'SVHN (Train, seed=0, 73257 samples)' | uniform_targets{'num_classes': 10}",
  'test': "'CIFAR-10 (Test)'"},
 'initial_training_set_indices': [],
 'evaluation_set_indices': [12980,
  44617,
  3812,
  42704,
  6729,
  38942,
  48125,
  16968,
  5652,
  4045,
  6984,
  21168,
  37164,
  33354,
  47307,
  17878,
  26665,
  40819,
  14805,
  201,
  33976,
  35571,
  15578,
  36667,
  5551,
  23088,
  32496,
  5705,
  23255,
  25559,
  33058,
  43729,
  47518,
  36303,
  18452,
  34447,
  24821,
  36157,
  48089,
  25120,
  26944,
  24745,
  11001,
  6995,
  10899,
  36881,
  7002,
  19049,
  13388,
  40737,
  66,
  14046,
  45656,
  5604,
  9134,
  35979,
  19757,
  43627,
  35248,
  23566,
  46542,
  39478,
  25443,
  45862,
  30730,
  9611,
  43077,
  23902,
  9541,
  38859,
  6000,
  5915,
  21547,
  47739,
  13909,
  624,
  25621,
  30173,
  37807,
  7053,
  39360,
  20774,
 

In [None]:
# experiment
# OOD experiment (ood_exposure=False)

experiment = UnifiedExperiment(
    ood_exposure=False,
    id_dataset_name="CIFAR-10",
    ood_dataset_name="SVHN",
    seed=1,
    max_training_epochs=1,
    max_training_set=20 + 10,
    acquisition_function=acquisition_functions.EvalBALD,
    evaluation_set_size=100,
    acquisition_size=10,
    num_pool_samples=2,
    device="cuda",
)

results = {}
experiment.run(results)
results

Creating: OoDDatasetConfig(
	ood_dataset_name=SVHN,
	ood_repetitions=1,
	ood_exposure=False
)
Creating: ExperimentDataConfig(
	id_dataset_name=CIFAR-10,
	id_repetitions=1,
	initial_training_set_size=0,
	validation_set_size=1024,
	validation_split_random_state=0,
	evaluation_set_size=100,
	add_dataset_noise=False,
	device=cuda,
	ood_dataset_config=OoDDatasetConfig(ood_dataset_name='SVHN', ood_repetitions=1, ood_exposure=False)
)
Files already downloaded and verified
Files already downloaded and verified
Using downloaded and verified file: data/SVHN/train_32x32.mat
Using downloaded and verified file: data/SVHN/test_32x32.mat
Creating: Cifar10ModelTrainer(
	device=cuda,
	num_training_samples=1,
	num_validation_samples=20,
	max_training_epochs=1
)
Creating: EvalBALD(
	acquisition_size=10,
	num_pool_samples=2
)
Training set size 0:
Cosine Annealing


get_predictions_labels:   0%|          | 0/200000 [00:00<?, ?it/s]

Perf after training {'accuracy': 0.0991, 'crossentropy': tensor(2.3396)}
Creating: TrainSelfDistillationEvalModel(
	num_pool_samples=2,
	training_dataset=<torch.utils.data.dataset.Subset object at 0x7f4e4fe8aca0>,
	eval_dataset=Evaluation Set (100 samples),
	validation_loader=<torch.utils.data.dataloader.DataLoader object at 0x7f4e4fe8a4f0>,
	trained_model=TrainedBayesianModel(model=BayesianResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): Identity()
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2)

get_predictions_labels:   0%|          | 0/200 [00:00<?, ?it/s]

Cosine Annealing


100%|##########| 1/1 [00:00<?, ?it/s]

[1/39]   3%|2          [00:00<?]

Epoch 1 metrics: {'accuracy': 0.099609375, 'crossentropy': 2.418884754180908}


get_predictions_labels:   0%|          | 0/244266 [00:00<?, ?it/s]

get_predictions_labels:   0%|          | 0/244266 [00:00<?, ?it/s]

Conditional Entropy:   0%|          | 0/122133 [00:00<?, ?it/s]

Entropy:   0%|          | 0/122133 [00:00<?, ?it/s]

Conditional Entropy:   0%|          | 0/122133 [00:00<?, ?it/s]

Entropy:   0%|          | 0/122133 [00:00<?, ?it/s]

CandidateBatch(scores=[0.16103601455688477, 0.14529132843017578, 0.137229323387146, 0.1340651512145996, 0.12953388690948486, 0.12592089176177979, 0.12433218955993652, 0.12192320823669434, 0.12090826034545898, 0.119803786277771], indices=[27777, 39432, 33443, 10304, 27398, 48250, 27779, 7104, 43814, 16946])
[('id', 27836), ('id', 39513), ('id', 33509), ('id', 10328), ('id', 27457), ('id', 48350), ('id', 27838), ('id', 7124), ('id', 43902), ('id', 16982)]
Acquiring (label, score)s: 0 (0.161), 0 (0.1453), 6 (0.1372), 6 (0.1341), 0 (0.1295), 0 (0.1259), 6 (0.1243), 3 (0.1219), 0 (0.1209), 1 (0.1198)
Training set size 10:
Cosine Annealing


100%|##########| 1/1 [00:00<?, ?it/s]

[1/39]   3%|2          [00:00<?]

Epoch 1 metrics: {'accuracy': 0.10546875, 'crossentropy': 10.567780017852783}


get_predictions_labels:   0%|          | 0/200000 [00:00<?, ?it/s]

Perf after training {'accuracy': 0.1096, 'crossentropy': tensor(8.3496)}
Creating: TrainSelfDistillationEvalModel(
	num_pool_samples=2,
	training_dataset=<torch.utils.data.dataset.Subset object at 0x7f4e4fe8aca0>,
	eval_dataset=Evaluation Set (100 samples),
	validation_loader=<torch.utils.data.dataloader.DataLoader object at 0x7f4e4fe8a4f0>,
	trained_model=TrainedBayesianModel(model=BayesianResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): Identity()
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2)

get_predictions_labels:   0%|          | 0/220 [00:00<?, ?it/s]

Cosine Annealing


100%|##########| 1/1 [00:00<?, ?it/s]

[1/39]   3%|2          [00:00<?]

Epoch 1 metrics: {'accuracy': 0.1005859375, 'crossentropy': 7.0234081745147705}


get_predictions_labels:   0%|          | 0/244246 [00:00<?, ?it/s]

get_predictions_labels:   0%|          | 0/244246 [00:00<?, ?it/s]

Conditional Entropy:   0%|          | 0/122123 [00:00<?, ?it/s]

Entropy:   0%|          | 0/122123 [00:00<?, ?it/s]

Conditional Entropy:   0%|          | 0/122123 [00:00<?, ?it/s]

Entropy:   0%|          | 0/122123 [00:00<?, ?it/s]

CandidateBatch(scores=[0.13051056861877441, 0.13019627332687378, 0.12381798028945923, 0.12050777673721313, 0.11997151374816895, 0.11890137195587158, 0.11812090873718262, 0.117886483669281, 0.11775517463684082, 0.1174015998840332], indices=[108693, 75658, 16410, 71458, 29110, 111919, 107843, 38796, 8566, 10414])
[('ood', 59827), ('ood', 26792), ('id', 16446), ('ood', 22592), ('id', 29177), ('ood', 63053), ('ood', 58977), ('id', 38881), ('id', 8588), ('id', 10440)]
Acquiring (label, score)s: -1 (0.1305), -1 (0.1302), 2 (0.1238), -1 (0.1205), 4 (0.12), -1 (0.1189), -1 (0.1181), 3 (0.1179), 2 (0.1178), 5 (0.1174)
Training set size 15:
Cosine Annealing


100%|##########| 1/1 [00:00<?, ?it/s]

[1/39]   3%|2          [00:00<?]

Epoch 1 metrics: {'accuracy': 0.1328125, 'crossentropy': 4.310318470001221}


get_predictions_labels:   0%|          | 0/200000 [00:00<?, ?it/s]

Perf after training {'accuracy': 0.143, 'crossentropy': tensor(3.9590)}
Creating: TrainSelfDistillationEvalModel(
	num_pool_samples=2,
	training_dataset=<torch.utils.data.dataset.Subset object at 0x7f4e4fe8aca0>,
	eval_dataset=Evaluation Set (100 samples),
	validation_loader=<torch.utils.data.dataloader.DataLoader object at 0x7f4e4fe8a4f0>,
	trained_model=TrainedBayesianModel(model=BayesianResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): Identity()
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2):

get_predictions_labels:   0%|          | 0/230 [00:00<?, ?it/s]

Cosine Annealing


100%|##########| 1/1 [00:00<?, ?it/s]

[1/39]   3%|2          [00:00<?]

Epoch 1 metrics: {'accuracy': 0.1298828125, 'crossentropy': 4.2707507610321045}


get_predictions_labels:   0%|          | 0/244236 [00:00<?, ?it/s]

get_predictions_labels:   0%|          | 0/244236 [00:00<?, ?it/s]

Conditional Entropy:   0%|          | 0/122118 [00:00<?, ?it/s]

Entropy:   0%|          | 0/122118 [00:00<?, ?it/s]

Conditional Entropy:   0%|          | 0/122118 [00:00<?, ?it/s]

Entropy:   0%|          | 0/122118 [00:00<?, ?it/s]

CandidateBatch(scores=[0.34349530935287476, 0.3363794684410095, 0.33509504795074463, 0.33454418182373047, 0.3339540958404541, 0.33392608165740967, 0.329687237739563, 0.32836365699768066, 0.32585471868515015, 0.3249916434288025], indices=[113291, 43299, 88518, 105442, 10227, 46289, 76690, 68508, 84017, 91928])
[('ood', 64430), ('id', 43398), ('ood', 39657), ('ood', 56581), ('id', 10253), ('id', 46397), ('ood', 27829), ('ood', 19647), ('ood', 35156), ('ood', 43067)]
Acquiring (label, score)s: -1 (0.3435), 4 (0.3364), -1 (0.3351), -1 (0.3345), 3 (0.334), 6 (0.3339), -1 (0.3297), -1 (0.3284), -1 (0.3259), -1 (0.325)
Training set size 18:
Cosine Annealing


100%|##########| 1/1 [00:00<?, ?it/s]

[1/39]   3%|2          [00:00<?]

Epoch 1 metrics: {'accuracy': 0.158203125, 'crossentropy': 3.7756258249282837}


get_predictions_labels:   0%|          | 0/200000 [00:00<?, ?it/s]

Perf after training {'accuracy': 0.1572, 'crossentropy': tensor(3.5865)}
Creating: TrainSelfDistillationEvalModel(
	num_pool_samples=2,
	training_dataset=<torch.utils.data.dataset.Subset object at 0x7f4e4fe8aca0>,
	eval_dataset=Evaluation Set (100 samples),
	validation_loader=<torch.utils.data.dataloader.DataLoader object at 0x7f4e4fe8a4f0>,
	trained_model=TrainedBayesianModel(model=BayesianResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): Identity()
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2)

get_predictions_labels:   0%|          | 0/236 [00:00<?, ?it/s]

Cosine Annealing


100%|##########| 1/1 [00:00<?, ?it/s]

[1/39]   3%|2          [00:00<?]

Epoch 1 metrics: {'accuracy': 0.1328125, 'crossentropy': 4.023433804512024}


get_predictions_labels:   0%|          | 0/244230 [00:00<?, ?it/s]

get_predictions_labels:   0%|          | 0/244230 [00:00<?, ?it/s]

Conditional Entropy:   0%|          | 0/122115 [00:00<?, ?it/s]

Entropy:   0%|          | 0/122115 [00:00<?, ?it/s]

Conditional Entropy:   0%|          | 0/122115 [00:00<?, ?it/s]

Entropy:   0%|          | 0/122115 [00:00<?, ?it/s]

CandidateBatch(scores=[0.2958311438560486, 0.18454515933990479, 0.17109745740890503, 0.16570615768432617, 0.1645340919494629, 0.16295921802520752, 0.15645307302474976, 0.15490907430648804, 0.1431180238723755, 0.14293724298477173], indices=[68669, 115230, 110251, 92773, 60583, 118951, 99599, 88297, 98646, 8231])
[('ood', 19811), ('ood', 66372), ('ood', 61393), ('ood', 43915), ('ood', 11725), ('ood', 70093), ('ood', 50741), ('ood', 39439), ('ood', 49788), ('id', 8253)]
Acquiring (label, score)s: -1 (0.2958), -1 (0.1845), -1 (0.1711), -1 (0.1657), -1 (0.1645), -1 (0.163), -1 (0.1565), -1 (0.1549), -1 (0.1431), 3 (0.1429)
Training set size 19:
Cosine Annealing


100%|##########| 1/1 [00:00<?, ?it/s]

[1/39]   3%|2          [00:00<?]

Epoch 1 metrics: {'accuracy': 0.15625, 'crossentropy': 4.910492897033691}


get_predictions_labels:   0%|          | 0/200000 [00:00<?, ?it/s]

Perf after training {'accuracy': 0.1531, 'crossentropy': tensor(4.3525)}
Done.


{'dataset_info': {'training': "('CIFAR-10 (Train, seed=0, 48976 samples)') + 'SVHN (Train, seed=0, 73257 samples)' | constant_target{'target': tensor(-1), 'num_classes': 10}",
  'test': "'CIFAR-10 (Test)'"},
 'initial_training_set_indices': [],
 'evaluation_set_indices': [12980,
  44617,
  3812,
  42704,
  6729,
  38942,
  48125,
  16968,
  5652,
  4045,
  6984,
  21168,
  37164,
  33354,
  47307,
  17878,
  26665,
  40819,
  14805,
  201,
  33976,
  35571,
  15578,
  36667,
  5551,
  23088,
  32496,
  5705,
  23255,
  25559,
  33058,
  43729,
  47518,
  36303,
  18452,
  34447,
  24821,
  36157,
  48089,
  25120,
  26944,
  24745,
  11001,
  6995,
  10899,
  36881,
  7002,
  19049,
  13388,
  40737,
  66,
  14046,
  45656,
  5604,
  9134,
  35979,
  19757,
  43627,
  35248,
  23566,
  46542,
  39478,
  25443,
  45862,
  30730,
  9611,
  43077,
  23902,
  9541,
  38859,
  6000,
  5915,
  21547,
  47739,
  13909,
  624,
  25621,
  30173,
  37807,
  7053,
  39360,
  20774,
  23760,
  265

## MNIST vs FashionMNIST Coverage

In [None]:
# experiment
# MNIST OOD experiment (ood_exposure=True)

experiment = UnifiedExperiment(
    ood_exposure=True,
    id_dataset_name="MNIST",
    ood_dataset_name="FashionMNIST",
    seed=1,
    max_training_epochs=1,
    max_training_set=20+10,
    acquisition_function=acquisition_functions.EvalBALD,
    evaluation_set_size=100,
    acquisition_size=10,
    model_trainer_factory=MnistModelTrainer,
    num_pool_samples=2,
    device="cuda",
)

results = {}
experiment.run(results)
results

Creating: OoDDatasetConfig(
	ood_dataset_name=FashionMNIST,
	ood_repetitions=1,
	ood_exposure=True
)
Creating: ExperimentDataConfig(
	id_dataset_name=MNIST,
	id_repetitions=1,
	initial_training_set_size=20,
	validation_set_size=1024,
	validation_split_random_state=0,
	evaluation_set_size=100,
	add_dataset_noise=False,
	device=cuda,
	ood_dataset_config=OoDDatasetConfig(ood_dataset_name='FashionMNIST', ood_repetitions=1, ood_exposure=True)
)
Creating: MnistModelTrainer(
	device=cuda,
	num_training_samples=1,
	num_validation_samples=20,
	max_training_epochs=1
)
Creating: EvalBALD(
	acquisition_size=10,
	num_pool_samples=2
)
Training set size 20:


100%|##########| 1/1 [00:00<?, ?it/s]

[1/16]   6%|6          [00:00<?]

Epoch metrics: {'accuracy': 0.49609375, 'crossentropy': 1.8818157464265823}
RestoringEarlyStopping: Restoring best parameters. (Score: 0.49609375)
RestoringEarlyStopping: Restoring optimizer.


get_predictions_labels:   0%|          | 0/200000 [00:00<?, ?it/s]

Perf after training {'accuracy': 0.5149, 'crossentropy': tensor(1.5674)}
Creating: TrainSelfDistillationEvalModel(
	num_pool_samples=2,
	training_dataset=<torch.utils.data.dataset.Subset object at 0x7f76bfd837c0>,
	eval_dataset=Evaluation Set (100 samples),
	validation_loader=<torch.utils.data.dataloader.DataLoader object at 0x7f76bfd838e0>,
	trained_model=TrainedBayesianModel(model=BayesianMNISTCNN(
  (conv1): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
  (conv1_drop): ConsistentMCDropout2d(p=0.5)
  (conv2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
  (conv2_drop): ConsistentMCDropout2d(p=0.5)
  (fc1): Linear(in_features=1024, out_features=128, bias=True)
  (fc1_drop): ConsistentMCDropout(p=0.5)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)),
	model_trainer=MnistModelTrainer(device='cuda', num_training_samples=1, num_validation_samples=20, num_patience_epochs=20, max_training_epochs=1, min_samples_per_epoch=1024, num_training_batch_size=64, num_evaluation

get_predictions_labels:   0%|          | 0/240 [00:00<?, ?it/s]

100%|##########| 1/1 [00:00<?, ?it/s]

[1/16]   6%|6          [00:00<?]

Epoch metrics: {'accuracy': 0.189453125, 'crossentropy': 2.203241378068924}
RestoringEarlyStopping: Restoring best parameters. (Score: 0.189453125)
RestoringEarlyStopping: Restoring optimizer.


get_predictions_labels:   0%|          | 0/237712 [00:00<?, ?it/s]

get_predictions_labels:   0%|          | 0/237712 [00:00<?, ?it/s]

Conditional Entropy:   0%|          | 0/118856 [00:00<?, ?it/s]

Entropy:   0%|          | 0/118856 [00:00<?, ?it/s]

Conditional Entropy:   0%|          | 0/118856 [00:00<?, ?it/s]

Entropy:   0%|          | 0/118856 [00:00<?, ?it/s]

CandidateBatch(scores=[0.3815857172012329, 0.3783942461013794, 0.37222814559936523, 0.36560869216918945, 0.3644925355911255, 0.363120436668396, 0.36171412467956543, 0.3588372468948364, 0.34592437744140625, 0.3444235324859619], indices=[16434, 36841, 51768, 50553, 53011, 55169, 25615, 1811, 42960, 5497])
[('id', 16463), ('id', 36906), ('id', 51872), ('id', 50655), ('id', 53118), ('id', 55282), ('id', 25655), ('id', 1817), ('id', 43036), ('id', 5509)]
Acquiring (label, score)s: [0. 0. 0. 0. 0. 0. 1. 0. 0. 0.] (0.3816), [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.] (0.3784), [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.] (0.3722), [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.] (0.3656), [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.] (0.3645), [0. 0. 0. 0. 0. 0. 1. 0. 0. 0.] (0.3631), [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.] (0.3617), [0. 0. 0. 0. 0. 0. 1. 0. 0. 0.] (0.3588), [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.] (0.3459), [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.] (0.3444)
Training set size 30:


100%|##########| 1/1 [00:00<?, ?it/s]

[1/16]   6%|6          [00:00<?]

Epoch metrics: {'accuracy': 0.3564453125, 'crossentropy': 2.0059348344802856}
RestoringEarlyStopping: Restoring best parameters. (Score: 0.3564453125)
RestoringEarlyStopping: Restoring optimizer.


get_predictions_labels:   0%|          | 0/200000 [00:00<?, ?it/s]

Perf after training {'accuracy': 0.3921, 'crossentropy': tensor(1.7375)}
Done.


{'dataset_info': {'training': "'MNIST (Train, seed=0, 58976 samples)' | one_hot_targets{'num_classes': 10} + 'FashionMNIST (Train, seed=0, 60000 samples)' | uniform_targets{'num_classes': 10}",
  'test': "'MNIST (Test)'"},
 'initial_training_set_indices': [53434,
  8533,
  14640,
  39579,
  30392,
  58125,
  37915,
  3091,
  57520,
  43803,
  44119,
  52296,
  58226,
  40334,
  46037,
  22015,
  22304,
  43812,
  12640,
  53689],
 'evaluation_set_indices': [29974,
  55573,
  35472,
  44048,
  48031,
  5616,
  10110,
  47420,
  56990,
  34198,
  3792,
  5715,
  15969,
  32775,
  19757,
  34588,
  28991,
  47417,
  26501,
  12108,
  5573,
  48032,
  40646,
  43252,
  2404,
  36797,
  29079,
  40018,
  37047,
  41512,
  45567,
  801,
  10664,
  52801,
  42890,
  32972,
  45974,
  20801,
  23496,
  5803,
  10508,
  46870,
  49549,
  306,
  38725,
  13074,
  19689,
  27135,
  16068,
  18137,
  2728,
  43321,
  29950,
  380,
  27254,
  50466,
  31965,
  24052,
  44454,
  20076,
  21423,
  58

## MNIST only

In [None]:
# experiment
# MNIST experiment (ood_exposure=False)

experiment = UnifiedExperiment(
    ood_exposure=False,
    id_dataset_name="MNIST",
    ood_dataset_name=None,
    seed=1,
    max_training_epochs=5,
    max_training_set=20+10,
    acquisition_function=acquisition_functions.EvalBALD,
    evaluation_set_size=100,
    acquisition_size=10,
    model_trainer_factory=MnistModelTrainer,
    num_pool_samples=2,
    device="cuda",
)

results = {}
experiment.run(results)
results

Creating: ExperimentDataConfig(
	id_dataset_name=MNIST,
	id_repetitions=1,
	initial_training_set_size=20,
	validation_set_size=1024,
	validation_split_random_state=0,
	evaluation_set_size=100,
	add_dataset_noise=False,
	device=cuda,
	ood_dataset_config=None
)
Creating: MnistModelTrainer(
	device=cuda,
	num_training_samples=1,
	num_validation_samples=20,
	max_training_epochs=5
)
Creating: EvalBALD(
	acquisition_size=10,
	num_pool_samples=2
)
Training set size 20:


 20%|##        | 1/5 [00:00<?, ?it/s]

[1/16]   6%|6          [00:00<?]

Epoch metrics: {'accuracy': 0.494140625, 'crossentropy': 1.8813515156507492}


[1/16]   6%|6          [00:00<?]

Epoch metrics: {'accuracy': 0.6201171875, 'crossentropy': 1.7661588490009308}


[1/16]   6%|6          [00:00<?]

Epoch metrics: {'accuracy': 0.6220703125, 'crossentropy': 2.036451295018196}


[1/16]   6%|6          [00:00<?]

Epoch metrics: {'accuracy': 0.6337890625, 'crossentropy': 2.231146290898323}


[1/16]   6%|6          [00:00<?]

Epoch metrics: {'accuracy': 0.625, 'crossentropy': 2.4396891593933105}
RestoringEarlyStopping: 1 / 20
RestoringEarlyStopping: Restoring best parameters. (Score: 0.6337890625)
RestoringEarlyStopping: Restoring optimizer.


get_predictions_labels:   0%|          | 0/200000 [00:00<?, ?it/s]

Perf after training {'accuracy': 0.6714, 'crossentropy': tensor(1.1195)}
Creating: TrainSelfDistillationEvalModel(
	num_pool_samples=2,
	training_dataset=<torch.utils.data.dataset.Subset object at 0x7f76bf926e80>,
	eval_dataset=Evaluation Set (100 samples),
	validation_loader=<torch.utils.data.dataloader.DataLoader object at 0x7f76bfa85b20>,
	trained_model=TrainedBayesianModel(model=BayesianMNISTCNN(
  (conv1): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
  (conv1_drop): ConsistentMCDropout2d(p=0.5)
  (conv2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
  (conv2_drop): ConsistentMCDropout2d(p=0.5)
  (fc1): Linear(in_features=1024, out_features=128, bias=True)
  (fc1_drop): ConsistentMCDropout(p=0.5)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)),
	model_trainer=MnistModelTrainer(device='cuda', num_training_samples=1, num_validation_samples=20, num_patience_epochs=20, max_training_epochs=5, min_samples_per_epoch=1024, num_training_batch_size=64, num_evaluation

get_predictions_labels:   0%|          | 0/240 [00:00<?, ?it/s]

 20%|##        | 1/5 [00:00<?, ?it/s]

[1/16]   6%|6          [00:00<?]

Epoch metrics: {'accuracy': 0.3828125, 'crossentropy': 2.0286512225866318}


[1/16]   6%|6          [00:00<?]

Epoch metrics: {'accuracy': 0.6669921875, 'crossentropy': 1.411254107952118}


[1/16]   6%|6          [00:00<?]

Epoch metrics: {'accuracy': 0.67578125, 'crossentropy': 1.2591123133897781}


[1/16]   6%|6          [00:00<?]

Epoch metrics: {'accuracy': 0.6552734375, 'crossentropy': 1.2496468126773834}
RestoringEarlyStopping: 1 / 20


[1/16]   6%|6          [00:00<?]

Epoch metrics: {'accuracy': 0.6533203125, 'crossentropy': 1.259070336818695}
RestoringEarlyStopping: 2 / 20
RestoringEarlyStopping: Restoring best parameters. (Score: 0.67578125)
RestoringEarlyStopping: Restoring optimizer.


get_predictions_labels:   0%|          | 0/117712 [00:00<?, ?it/s]

get_predictions_labels:   0%|          | 0/117712 [00:00<?, ?it/s]

Conditional Entropy:   0%|          | 0/58856 [00:00<?, ?it/s]

Entropy:   0%|          | 0/58856 [00:00<?, ?it/s]

Conditional Entropy:   0%|          | 0/58856 [00:00<?, ?it/s]

Entropy:   0%|          | 0/58856 [00:00<?, ?it/s]

CandidateBatch(scores=[0.6789409071207047, 0.6772562861442566, 0.676706463098526, 0.6762979924678802, 0.6751218438148499, 0.6746721714735031, 0.6742521692067385, 0.6724546253681183, 0.6713075637817383, 0.6703185513615608], indices=[4114, 37301, 19195, 21932, 31354, 43856, 30432, 49115, 49941, 2109])
[('id', 4125), ('id', 37368), ('id', 19225), ('id', 21967), ('id', 31407), ('id', 43936), ('id', 30485), ('id', 49211), ('id', 50042), ('id', 2115)]
Acquiring (label, score)s: 0 (0.6789), 0 (0.6773), 9 (0.6767), 5 (0.6763), 8 (0.6751), 8 (0.6747), 9 (0.6743), 8 (0.6725), 0 (0.6713), 8 (0.6703)
Training set size 30:


 20%|##        | 1/5 [00:00<?, ?it/s]

[1/16]   6%|6          [00:00<?]

Epoch metrics: {'accuracy': 0.2568359375, 'crossentropy': 2.17490616440773}


[1/16]   6%|6          [00:00<?]

Epoch metrics: {'accuracy': 0.5087890625, 'crossentropy': 1.939666286110878}


[1/16]   6%|6          [00:00<?]

Epoch metrics: {'accuracy': 0.587890625, 'crossentropy': 1.9768947660923004}


[1/16]   6%|6          [00:00<?]

Epoch metrics: {'accuracy': 0.595703125, 'crossentropy': 2.080392748117447}


[1/16]   6%|6          [00:00<?]

Epoch metrics: {'accuracy': 0.6123046875, 'crossentropy': 2.385873556137085}
RestoringEarlyStopping: Restoring best parameters. (Score: 0.6123046875)
RestoringEarlyStopping: Restoring optimizer.


get_predictions_labels:   0%|          | 0/200000 [00:00<?, ?it/s]

Perf after training {'accuracy': 0.6413, 'crossentropy': tensor(1.1311)}
Done.


{'dataset_info': {'training': "'MNIST (Train, seed=0, 58976 samples)'",
  'test': "'MNIST (Test)'"},
 'initial_training_set_indices': [53434,
  8533,
  14640,
  39579,
  30392,
  58125,
  37915,
  3091,
  57520,
  43803,
  44119,
  52296,
  58226,
  40334,
  46037,
  22015,
  22304,
  43812,
  12640,
  53689],
 'evaluation_set_indices': [29974,
  55573,
  35472,
  44048,
  48031,
  5616,
  10110,
  47420,
  56990,
  34198,
  3792,
  5715,
  15969,
  32775,
  19757,
  34588,
  28991,
  47417,
  26501,
  12108,
  5573,
  48032,
  40646,
  43252,
  2404,
  36797,
  29079,
  40018,
  37047,
  41512,
  45567,
  801,
  10664,
  52801,
  42890,
  32972,
  45974,
  20801,
  23496,
  5803,
  10508,
  46870,
  49549,
  306,
  38725,
  13074,
  19689,
  27135,
  16068,
  18137,
  2728,
  43321,
  29950,
  380,
  27254,
  50466,
  31965,
  24052,
  44454,
  20076,
  21423,
  58741,
  27145,
  38430,
  37354,
  49986,
  4321,
  12610,
  34482,
  35794,
  396,
  50036,
  46861,
  57811,
  53831,
  4

## CIFAR-10 only

In [None]:
# experiment
# CIFAR-10 experiment (ood_exposure=False)

experiment = UnifiedExperiment(
    ood_exposure=False,
    id_dataset_name="CIFAR-10",
    ood_dataset_name=None,
    seed=1,
    max_training_epochs=5,
    max_training_set=20+10,
    acquisition_function=acquisition_functions.EvalBALD,
    evaluation_set_size=100,
    acquisition_size=10,
    model_trainer_factory=Cifar10ModelTrainer,
    num_pool_samples=2,
    device="cuda",
)

results = {}
experiment.run(results)
results

Creating: ExperimentDataConfig(
	id_dataset_name=CIFAR-10,
	id_repetitions=1,
	initial_training_set_size=20,
	validation_set_size=1024,
	validation_split_random_state=0,
	evaluation_set_size=100,
	add_dataset_noise=False,
	device=cuda,
	ood_dataset_config=None
)
Files already downloaded and verified
Files already downloaded and verified
Creating: Cifar10ModelTrainer(
	device=cuda,
	num_training_samples=1,
	num_validation_samples=20,
	max_training_epochs=5
)
Creating: EvalBALD(
	acquisition_size=10,
	num_pool_samples=2
)
Training set size 20:
Cosine Annealing


 20%|##        | 1/5 [00:00<?, ?it/s]

[1/39]   3%|2          [00:00<?]

Epoch 1 metrics: {'accuracy': 0.1552734375, 'crossentropy': 5.018630504608154}


[1/39]   3%|2          [00:00<?]

Epoch 2 metrics: {'accuracy': 0.193359375, 'crossentropy': 10.327968120574951}


[1/39]   3%|2          [00:00<?]

Epoch 3 metrics: {'accuracy': 0.1767578125, 'crossentropy': 9.977657794952393}


[1/39]   3%|2          [00:00<?]

Epoch 4 metrics: {'accuracy': 0.1943359375, 'crossentropy': 9.833641529083252}


[1/39]   3%|2          [00:00<?]

Epoch 5 metrics: {'accuracy': 0.1982421875, 'crossentropy': 10.224448680877686}


get_predictions_labels:   0%|          | 0/200000 [00:00<?, ?it/s]

Perf after training {'accuracy': 0.1683, 'crossentropy': tensor(8.5987)}
Creating: TrainSelfDistillationEvalModel(
	num_pool_samples=2,
	training_dataset=<torch.utils.data.dataset.Subset object at 0x7f76bfa85970>,
	eval_dataset=Evaluation Set (100 samples),
	validation_loader=<torch.utils.data.dataloader.DataLoader object at 0x7f76c4341c10>,
	trained_model=TrainedBayesianModel(model=BayesianResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): Identity()
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2)

get_predictions_labels:   0%|          | 0/240 [00:00<?, ?it/s]

Cosine Annealing


 20%|##        | 1/5 [00:00<?, ?it/s]

[1/39]   3%|2          [00:00<?]

Epoch 1 metrics: {'accuracy': 0.1376953125, 'crossentropy': 3.423586845397949}


[1/39]   3%|2          [00:00<?]

Epoch 2 metrics: {'accuracy': 0.1640625, 'crossentropy': 5.1844401359558105}


[1/39]   3%|2          [00:00<?]

Epoch 3 metrics: {'accuracy': 0.189453125, 'crossentropy': 4.292270660400391}


[1/39]   3%|2          [00:00<?]

Epoch 4 metrics: {'accuracy': 0.2001953125, 'crossentropy': 4.472563028335571}


[1/39]   3%|2          [00:00<?]

Epoch 5 metrics: {'accuracy': 0.201171875, 'crossentropy': 4.777093410491943}


get_predictions_labels:   0%|          | 0/97712 [00:00<?, ?it/s]

get_predictions_labels:   0%|          | 0/97712 [00:00<?, ?it/s]

Conditional Entropy:   0%|          | 0/48856 [00:00<?, ?it/s]

Entropy:   0%|          | 0/48856 [00:00<?, ?it/s]

Conditional Entropy:   0%|          | 0/48856 [00:00<?, ?it/s]

Entropy:   0%|          | 0/48856 [00:00<?, ?it/s]

CandidateBatch(scores=[0.6896737881470472, 0.6686933115124702, 0.6678923796862364, 0.6654589455574751, 0.6643337719142437, 0.6628233063966036, 0.6582828722894192, 0.6546936221420765, 0.6541411019861698, 0.6524237096309662], indices=[30151, 2907, 25105, 23892, 25872, 9983, 39989, 24603, 24766, 27787])
[('id', 30226), ('id', 2913), ('id', 25169), ('id', 23953), ('id', 25939), ('id', 10011), ('id', 40085), ('id', 24664), ('id', 24829), ('id', 27858)]
Acquiring (label, score)s: 0 (0.6897), 2 (0.6687), 8 (0.6679), 8 (0.6655), 0 (0.6643), 2 (0.6628), 8 (0.6583), 0 (0.6547), 8 (0.6541), 9 (0.6524)
Training set size 30:
Cosine Annealing


 20%|##        | 1/5 [00:00<?, ?it/s]

[1/39]   3%|2          [00:00<?]

Epoch 1 metrics: {'accuracy': 0.1474609375, 'crossentropy': 2.3566828966140747}


[1/39]   3%|2          [00:00<?]

Epoch 2 metrics: {'accuracy': 0.1337890625, 'crossentropy': 9.67826795578003}


[1/39]   3%|2          [00:00<?]

Epoch 3 metrics: {'accuracy': 0.1875, 'crossentropy': 7.330405235290527}


[1/39]   3%|2          [00:00<?]

Epoch 4 metrics: {'accuracy': 0.2119140625, 'crossentropy': 8.059671401977539}


[1/39]   3%|2          [00:00<?]

Epoch 5 metrics: {'accuracy': 0.2080078125, 'crossentropy': 8.315721988677979}


get_predictions_labels:   0%|          | 0/200000 [00:00<?, ?it/s]

Perf after training {'accuracy': 0.2031, 'crossentropy': tensor(6.3103)}
Done.


{'dataset_info': {'training': "'CIFAR-10 (Train, seed=0, 48976 samples)'",
  'test': "'CIFAR-10 (Test)'"},
 'initial_training_set_indices': [12980,
  44617,
  6984,
  21168,
  33976,
  35571,
  33058,
  43729,
  26944,
  24745,
  66,
  14046,
  46542,
  39478,
  6000,
  5915,
  39360,
  20774,
  27084,
  44464],
 'evaluation_set_indices': [3812,
  42704,
  6729,
  38942,
  48125,
  16968,
  5652,
  4045,
  10740,
  19606,
  37164,
  33354,
  47307,
  17878,
  26665,
  40819,
  14805,
  201,
  47956,
  44739,
  15578,
  36667,
  5551,
  23088,
  32496,
  5705,
  23255,
  25559,
  11975,
  44032,
  47518,
  36303,
  18452,
  34447,
  24821,
  36157,
  48089,
  25120,
  44689,
  6509,
  11001,
  6995,
  10899,
  36881,
  7002,
  19049,
  13388,
  40737,
  9210,
  22684,
  45656,
  5604,
  9134,
  35979,
  19757,
  43627,
  35248,
  23566,
  727,
  34909,
  25443,
  45862,
  30730,
  9611,
  43077,
  23902,
  9541,
  38859,
  13973,
  27923,
  21547,
  47739,
  13909,
  624,
  25621,
  301

In [None]:
# exports

configs = [
    UnifiedExperiment(
        seed=seed + 1234,
        ood_exposure=ood_exposure,
        acquisition_function=acquisition_function,
        acquisition_size=5,
        num_pool_samples=num_pool_samples,
        evaluation_set_size=evaluation_set_size,
        id_dataset_name=id_dataset_name,
        ood_dataset_name=ood_dataset_name,
    )
    for seed in range(3)
    for acquisition_function in [acquisition_functions.BatchEvalBALD, acquisition_functions.BatchBALD]
    for evaluation_set_size in [1024]
    for num_pool_samples in [100]
    for ood_exposure in [True, False]
    for id_dataset_name, ood_dataset_name in [("CIFAR-10", "SVHN"), ("SVHN", "CIFAR-10")]
]

if not is_run_from_ipython() and __name__ == "__main__":
    for job_id, store in embedded_experiments(__file__, len(configs)):
        config = configs[job_id]
        config.seed += job_id
        print(config)
        store["config"] = dataclasses.asdict(config)
        store["log"] = {}

        try:
            config.run(store=store)
        except Exception:
            store["exception"] = traceback.format_exc()
            raise

In [None]:
len(configs)

24

In [None]:
# slow
import prettyprinter

prettyprinter.install_extras(include={"dataclasses"})

prettyprinter.pprint(configs)

[
    OodExperiment(
        seed=1234,
        uniform_ood=True,
        id_dataset_name='CIFAR-10',
        ood_dataset_name='SVHN',
        num_pool_samples=100,
        # class
        acquisition_function=batchbald_redux.acquisition_functions.BatchEvalBALD
    ),
    OodExperiment(
        seed=1234,
        uniform_ood=True,
        id_dataset_name='SVHN',
        ood_dataset_name='CIFAR-10',
        num_pool_samples=100,
        # class
        acquisition_function=batchbald_redux.acquisition_functions.BatchEvalBALD
    ),
    OodExperiment(
        seed=1234,
        uniform_ood=False,
        id_dataset_name='CIFAR-10',
        ood_dataset_name='SVHN',
        num_pool_samples=100,
        # class
        acquisition_function=batchbald_redux.acquisition_functions.BatchEvalBALD
    ),
    OodExperiment(
        seed=1234,
        uniform_ood=False,
        id_dataset_name='SVHN',
        ood_dataset_name='CIFAR-10',
        num_pool_samples=100,
        # class
        acquisit