# Unified Experiment Code
> Resistance is futile.

In [None]:
# default_exp unified_experiment

In [None]:
# hide
import blackhc.project.script

Appended /home/blackhc/PycharmProjects/bald-ical/src to paths
Switched to directory /home/blackhc/PycharmProjects/bald-ical
%load_ext autoreload
%autoreload 2


Import modules and functions were are going to use.

In [None]:
# exports

import dataclasses
import traceback
from dataclasses import dataclass
from typing import Optional, Type, Union

import torch
import torch.utils.data
from blackhc.project import is_run_from_ipython
from blackhc.project.experiment import embedded_experiments

from batchbald_redux import acquisition_functions
from batchbald_redux import baseline_acquisition_functions
from batchbald_redux.acquisition_functions import (
    CandidateBatchComputer,
    EvalDatasetBatchComputer,
    EvalModelBatchComputer,
)
from batchbald_redux.black_box_model_training import evaluate
from batchbald_redux.dataset_challenges import get_base_dataset_index, get_target
from batchbald_redux.di import DependencyInjection
from batchbald_redux.experiment_data import (
    ExperimentData,
    ExperimentDataConfig,
    OoDDatasetConfig,
    StandardExperimentDataConfig,
)
from batchbald_redux.models import MnistModelTrainer
from batchbald_redux.resnet_models import Cifar10ModelTrainer
from batchbald_redux.train_eval_model import (
    TrainEvalModel,
    TrainSelfDistillationEvalModel,
)
from batchbald_redux.trained_model import ModelTrainer, BayesianEnsembleModelTrainer

In [None]:
# exports


@dataclass
class ActiveLearner:
    acquisition_size: int
    max_training_set: int

    num_validation_samples: int

    acquisition_function: Union[CandidateBatchComputer, EvalModelBatchComputer]
    train_eval_model: TrainEvalModel
    model_trainer: ModelTrainer
    data: ExperimentData

    disable_training_augmentations: bool

    device: Optional

    def __call__(self, log):
        log["seed"] = torch.seed()

        # Active Learning setup
        data = self.data

        train_augmentations = data.train_augmentations if not self.disable_training_augmentations else None

        model_trainer = self.model_trainer
        train_eval_model = self.train_eval_model

        train_loader = model_trainer.get_train_dataloader(data.active_learning.training_dataset)
        pool_loader = model_trainer.get_evaluation_dataloader(data.active_learning.pool_dataset)
        validation_loader = model_trainer.get_evaluation_dataloader(data.validation_dataset)
        test_loader = model_trainer.get_evaluation_dataloader(data.test_dataset)

        log["active_learning_steps"] = []
        active_learning_steps = log["active_learning_steps"]

        acquisition_function = self.acquisition_function

        num_iterations = 0
        max_iterations = int(
            1.5 * (self.max_training_set - len(data.active_learning.training_dataset)) / self.acquisition_size
        )

        # Active Training Loop
        while True:
            training_set_size = len(data.active_learning.training_dataset)
            print(f"Training set size {training_set_size}:")

            # iteration_log = dict(training={}, pool_training={}, evaluation_metrics=None, acquisition=None)
            active_learning_steps.append({})
            iteration_log = active_learning_steps[-1]

            iteration_log["training"] = {}

            # TODO: this is a hack! :(
            if data.ood_dataset is None:
                loss = validation_loss = torch.nn.NLLLoss()
            elif data.ood_exposure:
                loss = torch.nn.KLDivLoss(log_target=False, reduction="batchmean")
                validation_loss = torch.nn.NLLLoss()
            else:
                loss = validation_loss = torch.nn.NLLLoss()

            trained_model = model_trainer.get_trained(
                train_loader=train_loader,
                train_augmentations=train_augmentations,
                validation_loader=validation_loader,
                log=iteration_log["training"],
                loss=loss,
                validation_loss=validation_loss,
            )

            evaluation_metrics = evaluate(
                model=trained_model,
                num_samples=self.num_validation_samples,
                loader=test_loader,
                device=self.device,
                storage_device="cpu",
            )
            iteration_log["evaluation_metrics"] = evaluation_metrics
            print(f"Perf after training {evaluation_metrics}")

            if training_set_size >= self.max_training_set or num_iterations >= max_iterations:
                print("Done.")
                break

            if isinstance(acquisition_function, CandidateBatchComputer):
                candidate_batch = acquisition_function.compute_candidate_batch(trained_model, pool_loader, self.device)
            elif isinstance(acquisition_function, EvalDatasetBatchComputer):
                if len(data.evaluation_dataset) > 0:
                    eval_loader = model_trainer.get_evaluation_dataloader(data.evaluation_dataset)
                else:
                    eval_loader = pool_loader

                candidate_batch = acquisition_function.compute_candidate_batch(
                    model=trained_model, pool_loader=pool_loader, eval_loader=eval_loader, device=self.device
                )
            elif isinstance(acquisition_function, EvalModelBatchComputer):
                if len(data.evaluation_dataset) > 0:
                    eval_dataset = data.evaluation_dataset
                else:
                    eval_dataset = data.active_learning.pool_dataset

                iteration_log["eval_training"] = {}
                trained_eval_model = train_eval_model(
                    model_trainer=model_trainer,
                    training_dataset=data.active_learning.training_dataset,
                    train_augmentations=train_augmentations,
                    eval_dataset=eval_dataset,
                    validation_loader=validation_loader,
                    trained_model=trained_model,
                    storage_device=data.device,
                    device=self.device,
                    training_log=iteration_log["eval_training"],
                )

                candidate_batch = acquisition_function.compute_candidate_batch(
                    trained_model, trained_eval_model, pool_loader, device=self.device
                )
            else:
                raise ValueError(f"Unknown acquisition function {acquisition_function}!")

            candidate_global_dataset_indices = []
            candidate_labels = []
            for index in candidate_batch.indices:
                base_di = get_base_dataset_index(data.active_learning.pool_dataset, index)
                dataset_type = "ood" if base_di.dataset == data.ood_dataset else "id"
                candidate_global_dataset_indices.append((dataset_type, base_di.index))
                label = get_target(data.active_learning.pool_dataset, index).tolist()
                candidate_labels.append(label)

            iteration_log["acquisition"] = dict(
                indices=candidate_global_dataset_indices, labels=candidate_labels, scores=candidate_batch.scores
            )

            print(candidate_batch)
            print(candidate_global_dataset_indices)

            if data.ood_dataset is None:
                data.active_learning.acquire(candidate_batch.indices)
            elif data.ood_exposure:
                data.active_learning.acquire(candidate_batch.indices)
            else:
                data.active_learning.acquire(
                    [index for index, label in zip(candidate_batch.indices, candidate_labels) if label != -1]
                )

            ls = ", ".join(f"{label} ({score:.4})" for label, score in zip(candidate_labels, candidate_batch.scores))
            print(f"Acquiring (label, score)s: {ls}")

            num_iterations += 1


@dataclass
class UnifiedExperiment:
    seed: int

    experiment_data_config: ExperimentDataConfig

    acquisition_size: int = 5
    max_training_set: int = 200

    max_training_epochs: int = 300

    num_pool_samples: int = 100
    num_validation_samples: int = 20
    num_training_samples: int = 1

    device: str = "cuda"
    acquisition_function: Union[Type[CandidateBatchComputer], Type[EvalModelBatchComputer]] = None #acquisition_functions.BALD
    train_eval_model: Type[TrainEvalModel] = TrainSelfDistillationEvalModel
    model_trainer_factory: Type[ModelTrainer] = None # Cifar10ModelTrainer
    ensemble_size: int = 1

    temperature: float = 1.0
    coldness: float = 1.0
    stochastic_mode: acquisition_functions.StochasticMode = None
    epig_bootstrap_type: acquisition_functions.BootstrapType = acquisition_functions.BootstrapType.NO_BOOTSTRAP
    epig_bootstrap_factor: float = 1.
    epig_dtype: torch.dtype = torch.double
    disable_training_augmentations: bool = False
    cache_explicit_eval_model: bool = False
    resnet18_dropout_head: bool = True

    def load_experiment_data(self) -> ExperimentData:
        print(self.experiment_data_config)
        return self.experiment_data_config.load(self.device)

    # Simple Dependency Injection
    def create_acquisition_function(self):
        di = DependencyInjection(vars(self))
        return di.create_dataclass_type(self.acquisition_function)

    def create_train_eval_model(self) -> TrainEvalModel:
        di = DependencyInjection(vars(self))
        return di.create_dataclass_type(self.train_eval_model)

    def create_model_trainer(self) -> ModelTrainer:
        di = DependencyInjection(vars(self))
        return di.create_dataclass_type(self.model_trainer_factory)

    def run(self, store):
        torch.manual_seed(self.seed)

        # Active Learning setup
        data = self.load_experiment_data()
        store["dataset_info"] = dict(training=repr(data.active_learning.base_dataset), test=repr(data.test_dataset))
        store["initial_training_set_indices"] = data.initial_training_set_indices
        store["evaluation_set_indices"] = data.evaluation_set_indices

        acquisition_function = self.create_acquisition_function()
        model_trainer = self.create_model_trainer()
        if self.ensemble_size > 1:
            model_trainer = BayesianEnsembleModelTrainer(model_trainer=model_trainer, ensemble_size=self.ensemble_size)
        train_eval_model = self.create_train_eval_model()

        active_learner = ActiveLearner(
            acquisition_size=self.acquisition_size,
            max_training_set=self.max_training_set,
            num_validation_samples=self.num_validation_samples,
            disable_training_augmentations=self.disable_training_augmentations,
            acquisition_function=acquisition_function,
            train_eval_model=train_eval_model,
            model_trainer=model_trainer,
            data=data,
            device=self.device,
        )

        active_learner(store)

## CIFAR-10 vs SVHN Coverage

In [None]:
# experiment
# OOD experiment (ood_exposure=True)

experiment = UnifiedExperiment(
    experiment_data_config=StandardExperimentDataConfig(
        id_dataset_name="CIFAR-10",
        id_repetitions=1,
        initial_training_set_size=20,
        validation_set_size=4096,
        validation_split_random_state=0,
        evaluation_set_size=100,
        add_dataset_noise=False,
        ood_dataset_config=OoDDatasetConfig(ood_dataset_name="SVHN", ood_repetitions=1, ood_exposure=True),
    ),
    seed=1,
    max_training_epochs=1,
    max_training_set=20 + 10,
    acquisition_function=acquisition_functions.EvalBALD,
    acquisition_size=10,
    num_pool_samples=2,
    device="cuda",
)

results = {}
experiment.run(results)
results

StandardExperimentDataConfig(id_dataset_name='CIFAR-10', id_repetitions=1, initial_training_set_size=20, validation_set_size=4096, validation_split_random_state=0, evaluation_set_size=100, add_dataset_noise=False, ood_dataset_config=OoDDatasetConfig(ood_dataset_name='SVHN', ood_repetitions=1, ood_exposure=True))
Files already downloaded and verified


KeyboardInterrupt: 

In [None]:
# experiment
# OOD experiment (ood_exposure=False)

experiment = UnifiedExperiment(
    experiment_data_config=StandardExperimentDataConfig(
        id_dataset_name="CIFAR-10",
        id_repetitions=1,
        initial_training_set_size=20,
        validation_set_size=4096,
        validation_split_random_state=0,
        evaluation_set_size=100,
        add_dataset_noise=False,
        ood_dataset_config=OoDDatasetConfig(ood_dataset_name="SVHN", ood_repetitions=1, ood_exposure=False),
    ),
    seed=1,
    max_training_epochs=1,
    max_training_set=20 + 10,
    acquisition_function=acquisition_functions.EvalBALD,
    acquisition_size=10,
    num_pool_samples=2,
    device="cuda",
)

results = {}
experiment.run(results)
results

Files already downloaded and verified


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  return floored.astype(np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  return floored.astype(np.int)


Files already downloaded and verified
Using downloaded and verified file: data/SVHN/train_32x32.mat
Using downloaded and verified file: data/SVHN/test_32x32.mat
Creating: EvalBALD(
	acquisition_size=10,
	num_pool_samples=2
)
Creating: Cifar10ModelTrainer(
	device=cuda,
	num_training_samples=1,
	num_validation_samples=20,
	max_training_epochs=1
)
Creating: TrainSelfDistillationEvalModel(
	num_pool_samples=2
)
Training set size 20:
Cosine Annealing


100%|##########| 1/1 [00:00<?, ?it/s]

[1/39]   3%|2          [00:00<?]

Epoch 1 metrics: {'accuracy': 0.157958984375, 'crossentropy': 2.860942095518112}


get_predictions_labels:   0%|          | 0/200000 [00:00<?, ?it/s]

Perf after training {'accuracy': 0.1588, 'crossentropy': tensor(2.6745)}


get_predictions_labels:   0%|          | 0/240 [00:00<?, ?it/s]

Cosine Annealing


100%|##########| 1/1 [00:00<?, ?it/s]

[1/39]   3%|2          [00:00<?]

Epoch 1 metrics: {'accuracy': 0.1494140625, 'crossentropy': 2.807897835969925}


get_predictions_labels:   0%|          | 0/238082 [00:00<?, ?it/s]

get_predictions_labels:   0%|          | 0/238082 [00:00<?, ?it/s]

Conditional Entropy:   0%|          | 0/119041 [00:00<?, ?it/s]

Entropy:   0%|          | 0/119041 [00:00<?, ?it/s]

Conditional Entropy:   0%|          | 0/119041 [00:00<?, ?it/s]

Entropy:   0%|          | 0/119041 [00:00<?, ?it/s]

CandidateBatch(scores=[0.5664071440696716, 0.5583854913711548, 0.5552188754081726, 0.5509648323059082, 0.5495696663856506, 0.5474484264850616, 0.5472981631755829, 0.5447407066822052, 0.5361950695514679, 0.5273618102073669], indices=[107452, 17624, 68416, 40016, 107706, 107707, 107454, 62234, 107453, 107705])
[('ood', 61668), ('id', 17671), ('ood', 22632), ('id', 40123), ('ood', 61922), ('ood', 61923), ('ood', 61670), ('ood', 16450), ('ood', 61669), ('ood', 61921)]
Acquiring (label, score)s: -1 (0.5664), 2 (0.5584), -1 (0.5552), 4 (0.551), -1 (0.5496), -1 (0.5474), -1 (0.5473), -1 (0.5447), -1 (0.5362), -1 (0.5274)
Training set size 22:
Cosine Annealing


100%|##########| 1/1 [00:00<?, ?it/s]

[1/39]   3%|2          [00:00<?]

Epoch 1 metrics: {'accuracy': 0.11181640625, 'crossentropy': 3.289741963148117}


get_predictions_labels:   0%|          | 0/200000 [00:00<?, ?it/s]

Perf after training {'accuracy': 0.1117, 'crossentropy': tensor(2.8899)}
Done.


{'dataset_info': {'training': "('CIFAR-10 (Train, seed=0, 45904 samples)') + 'SVHN (Train, seed=0, 73257 samples)' | constant_target{'target': tensor(-1), 'num_classes': 10}",
  'test': "'CIFAR-10 (Test)'"},
 'initial_training_set_indices': [29863,
  22519,
  21079,
  37521,
  15583,
  23405,
  44362,
  35084,
  29380,
  26994,
  39909,
  29333,
  527,
  31668,
  43591,
  12293,
  39247,
  36879,
  7979,
  33280],
 'evaluation_set_indices': [4291,
  37709,
  11949,
  12149,
  17798,
  32245,
  235,
  38778,
  32864,
  39123,
  22797,
  43486,
  5813,
  40306,
  4570,
  15667,
  36937,
  25830,
  2404,
  30096,
  19730,
  11343,
  17034,
  43455,
  15770,
  7894,
  36848,
  11067,
  12968,
  29454,
  33432,
  21672,
  38040,
  259,
  29571,
  41639,
  43971,
  13338,
  42364,
  11109,
  18814,
  21474,
  10681,
  40169,
  9997,
  12784,
  22626,
  36949,
  33130,
  13688,
  6466,
  28263,
  45060,
  10422,
  7005,
  3143,
  26143,
  28581,
  13207,
  10038,
  6750,
  21221,
  9097,
  26

## MNIST vs FashionMNIST Coverage

In [None]:
# experiment
# MNIST OOD experiment (ood_exposure=True)

experiment = UnifiedExperiment(
    experiment_data_config=StandardExperimentDataConfig(
        id_dataset_name="MNIST",
        id_repetitions=1,
        initial_training_set_size=20,
        validation_set_size=4096,
        validation_split_random_state=0,
        evaluation_set_size=100,
        add_dataset_noise=False,
        ood_dataset_config=OoDDatasetConfig(ood_dataset_name="FashionMNIST", ood_repetitions=1, ood_exposure=True),
    ),
    seed=1,
    max_training_epochs=1,
    max_training_set=20 + 10,
    acquisition_function=acquisition_functions.BALD,
    acquisition_size=10,
    model_trainer_factory=MnistModelTrainer,
    num_pool_samples=2,
    device="cuda",
)

results = {}
experiment.run(results)
results

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  return floored.astype(np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  return floored.astype(np.int)


Creating: BALD(
	acquisition_size=10,
	num_pool_samples=2
)
Creating: MnistModelTrainer(
	device=cuda,
	num_training_samples=1,
	num_validation_samples=20,
	max_training_epochs=1
)
Creating: TrainSelfDistillationEvalModel(
	num_pool_samples=2
)
Training set size 20:


100%|##########| 1/1 [00:00<?, ?it/s]

[1/16]   6%|6          [00:00<?]

[1/32]   3%|3          [00:00<?]

Epoch metrics: {'accuracy': 0.554931640625, 'crossentropy': 1.8601678125560284}
RestoringEarlyStopping: Restoring best parameters. (Score: 0.554931640625)
RestoringEarlyStopping: Restoring optimizer.


get_predictions_labels:   0%|          | 0/200000 [00:00<?, ?it/s]

Perf after training {'accuracy': 0.5582, 'crossentropy': tensor(1.6005)}


get_predictions_labels:   0%|          | 0/231568 [00:00<?, ?it/s]

Conditional Entropy:   0%|          | 0/115784 [00:00<?, ?it/s]

Entropy:   0%|          | 0/115784 [00:00<?, ?it/s]

CandidateBatch(scores=[0.5286269187927246, 0.5206483602523804, 0.5205633640289307, 0.5157134532928467, 0.5151408314704895, 0.513818621635437, 0.5130301117897034, 0.5124313831329346, 0.5110619068145752, 0.5096436738967896], indices=[19319, 24059, 30835, 46776, 43171, 49218, 47110, 22783, 6099, 43495])
[('id', 19365), ('id', 24112), ('id', 30906), ('id', 46876), ('id', 43266), ('id', 49321), ('id', 47210), ('id', 22832), ('id', 6113), ('id', 43590)]
Acquiring (label, score)s: [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] (0.5286), [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0] (0.5206), [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] (0.5206), [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] (0.5157), [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] (0.5151), [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] (0.5138), [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] (0.513), [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] (0.5124), [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0

100%|##########| 1/1 [00:00<?, ?it/s]

[1/16]   6%|6          [00:00<?]

[1/32]   3%|3          [00:00<?]

Epoch metrics: {'accuracy': 0.349853515625, 'crossentropy': 1.974664144217968}
RestoringEarlyStopping: Restoring best parameters. (Score: 0.349853515625)
RestoringEarlyStopping: Restoring optimizer.


get_predictions_labels:   0%|          | 0/200000 [00:00<?, ?it/s]

Perf after training {'accuracy': 0.3325, 'crossentropy': tensor(1.9364)}
Done.


{'dataset_info': {'training': "'MNIST (Train, seed=0, 55904 samples)' | one_hot_targets{'num_classes': 10} + 'FashionMNIST (Train, seed=0, 60000 samples)' | uniform_targets{'num_classes': 10}",
  'test': "'MNIST (Test)'"},
 'initial_training_set_indices': [47227,
  11511,
  18383,
  41080,
  32837,
  24393,
  23904,
  11784,
  20439,
  35043,
  27367,
  30426,
  32361,
  26116,
  24386,
  4689,
  44895,
  24211,
  17212,
  3478],
 'evaluation_set_indices': [49,
  12425,
  22159,
  22739,
  701,
  11289,
  4629,
  53972,
  52127,
  6277,
  18804,
  25364,
  18841,
  12957,
  10232,
  42477,
  35057,
  43089,
  48907,
  15149,
  24587,
  52160,
  19050,
  30188,
  50376,
  17140,
  26654,
  28683,
  36031,
  32477,
  32792,
  52660,
  52712,
  49702,
  40661,
  10190,
  18143,
  16617,
  54012,
  38376,
  1924,
  39518,
  1625,
  7896,
  1701,
  28972,
  3717,
  42815,
  6274,
  44154,
  44312,
  10944,
  14937,
  6358,
  1182,
  55213,
  9292,
  33572,
  13252,
  30667,
  31242,
  51852

## MNIST only

In [None]:
# experiment
# MNIST experiment (ood_exposure=False)

experiment = UnifiedExperiment(
    experiment_data_config=StandardExperimentDataConfig(
        id_dataset_name="MNIST",
        id_repetitions=1,
        initial_training_set_size=20,
        validation_set_size=4096,
        validation_split_random_state=0,
        evaluation_set_size=100,
        add_dataset_noise=False,
        ood_dataset_config=None,
    ),
    seed=1,
    max_training_epochs=5,
    max_training_set=20 + 10,
    acquisition_function=acquisition_functions.BALD,
    acquisition_size=10,
    model_trainer_factory=MnistModelTrainer,
    num_pool_samples=2,
    device="cuda",
)

results = {}
experiment.run(results)
results

Creating: BALD(
	acquisition_size=10,
	num_pool_samples=2
)
Creating: MnistModelTrainer(
	device=cuda,
	num_training_samples=1,
	num_validation_samples=20,
	max_training_epochs=5
)
Creating: TrainSelfDistillationEvalModel(
	num_pool_samples=2
)
Training set size 20:


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  return floored.astype(np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  return floored.astype(np.int)


 20%|##        | 1/5 [00:00<?, ?it/s]

[1/16]   6%|6          [00:00<?]

[1/32]   3%|3          [00:00<?]

Epoch metrics: {'accuracy': 0.603759765625, 'crossentropy': 1.8048731945455074}


[1/16]   6%|6          [00:00<?]

[1/32]   3%|3          [00:00<?]

Epoch metrics: {'accuracy': 0.679931640625, 'crossentropy': 1.5223335921764374}


[1/16]   6%|6          [00:00<?]

[1/32]   3%|3          [00:00<?]

Epoch metrics: {'accuracy': 0.679443359375, 'crossentropy': 1.623240053653717}
RestoringEarlyStopping: 1 / 20


[1/16]   6%|6          [00:00<?]

[1/32]   3%|3          [00:00<?]

Epoch metrics: {'accuracy': 0.690673828125, 'crossentropy': 1.8084253370761871}


[1/16]   6%|6          [00:00<?]

[1/32]   3%|3          [00:00<?]

Epoch metrics: {'accuracy': 0.685302734375, 'crossentropy': 1.965613130480051}
RestoringEarlyStopping: 1 / 20
RestoringEarlyStopping: Restoring best parameters. (Score: 0.690673828125)
RestoringEarlyStopping: Restoring optimizer.


get_predictions_labels:   0%|          | 0/200000 [00:00<?, ?it/s]

Perf after training {'accuracy': 0.696, 'crossentropy': tensor(0.9803)}


get_predictions_labels:   0%|          | 0/111568 [00:00<?, ?it/s]

Conditional Entropy:   0%|          | 0/55784 [00:00<?, ?it/s]

Entropy:   0%|          | 0/55784 [00:00<?, ?it/s]

CandidateBatch(scores=[0.6911463737487793, 0.6911274120211601, 0.6905933320522308, 0.6904804110527039, 0.6904016882181168, 0.6902305968105793, 0.6902124583721161, 0.6899457620456815, 0.6899281330406666, 0.6897666100412607], indices=[14719, 272, 7182, 27445, 43809, 7177, 11858, 45104, 22187, 36008])
[('id', 14750), ('id', 274), ('id', 7199), ('id', 27510), ('id', 43904), ('id', 7194), ('id', 11885), ('id', 45202), ('id', 22235), ('id', 36091)]
Acquiring (label, score)s: 1 (0.6911), 0 (0.6911), 8 (0.6906), 7 (0.6905), 7 (0.6904), 9 (0.6902), 2 (0.6902), 7 (0.6899), 1 (0.6899), 9 (0.6898)
Training set size 30:


 20%|##        | 1/5 [00:00<?, ?it/s]

[1/16]   6%|6          [00:00<?]

[1/32]   3%|3          [00:00<?]

Epoch metrics: {'accuracy': 0.45263671875, 'crossentropy': 1.9057771041989326}


[1/16]   6%|6          [00:00<?]

[1/32]   3%|3          [00:00<?]

Epoch metrics: {'accuracy': 0.709716796875, 'crossentropy': 1.3960129246115685}


[1/16]   6%|6          [00:00<?]

[1/32]   3%|3          [00:00<?]

Epoch metrics: {'accuracy': 0.74609375, 'crossentropy': 1.32473498955369}


[1/16]   6%|6          [00:00<?]

[1/32]   3%|3          [00:00<?]

Epoch metrics: {'accuracy': 0.7431640625, 'crossentropy': 1.4398422054946423}
RestoringEarlyStopping: 1 / 20


[1/16]   6%|6          [00:00<?]

[1/32]   3%|3          [00:00<?]

Epoch metrics: {'accuracy': 0.7548828125, 'crossentropy': 1.4579520002007484}
RestoringEarlyStopping: Restoring best parameters. (Score: 0.7548828125)
RestoringEarlyStopping: Restoring optimizer.


get_predictions_labels:   0%|          | 0/200000 [00:00<?, ?it/s]

Perf after training {'accuracy': 0.751, 'crossentropy': tensor(0.8279)}
Done.


{'dataset_info': {'training': "'MNIST (Train, seed=0, 55904 samples)'",
  'test': "'MNIST (Test)'"},
 'initial_training_set_indices': [47227,
  11511,
  18383,
  41080,
  32837,
  24393,
  23904,
  11784,
  20439,
  35043,
  27367,
  30426,
  32361,
  26116,
  24386,
  4689,
  44895,
  24211,
  17212,
  3478],
 'evaluation_set_indices': [49,
  12425,
  22159,
  22739,
  701,
  11289,
  4629,
  53972,
  52127,
  6277,
  18804,
  25364,
  18841,
  12957,
  10232,
  42477,
  35057,
  43089,
  48907,
  15149,
  24587,
  52160,
  19050,
  30188,
  50376,
  17140,
  26654,
  28683,
  36031,
  32477,
  32792,
  52660,
  52712,
  49702,
  40661,
  10190,
  18143,
  16617,
  54012,
  38376,
  1924,
  39518,
  1625,
  7896,
  1701,
  28972,
  3717,
  42815,
  6274,
  44154,
  44312,
  10944,
  14937,
  6358,
  1182,
  55213,
  9292,
  33572,
  13252,
  30667,
  31242,
  51852,
  33800,
  42491,
  31258,
  42348,
  42109,
  47515,
  1844,
  9314,
  18329,
  15263,
  52934,
  25225,
  25735,
  131

## MNIST only + BADGE

In [None]:
# experiment
# MNIST experiment (ood_exposure=False)

experiment = UnifiedExperiment(
    experiment_data_config=StandardExperimentDataConfig(
        id_dataset_name="MNIST",
        id_repetitions=1,
        initial_training_set_size=20,
        validation_set_size=4096,
        validation_split_random_state=0,
        evaluation_set_size=100,
        add_dataset_noise=False,
        ood_dataset_config=None,
    ),
    seed=1,
    max_training_epochs=5,
    max_training_set=20 + 10,
    acquisition_function=baseline_acquisition_functions.BADGE,
    acquisition_size=10,
    model_trainer_factory=MnistModelTrainer,
    num_pool_samples=2,
    device="cuda",
)

results = {}
experiment.run(results)
results

Creating: BADGE(
	acquisition_size=10
)
Creating: MnistModelTrainer(
	device=cuda,
	num_training_samples=1,
	num_validation_samples=20,
	max_training_epochs=5
)
Creating: TrainSelfDistillationEvalModel(
	num_pool_samples=2
)
Training set size 20:


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  return floored.astype(np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  return floored.astype(np.int)


 20%|##        | 1/5 [00:00<?, ?it/s]

[1/16]   6%|6          [00:00<?]

[1/32]   3%|3          [00:00<?]

Epoch metrics: {'accuracy': 0.512451171875, 'crossentropy': 1.9132713787257671}


[1/16]   6%|6          [00:00<?]

[1/32]   3%|3          [00:00<?]

Epoch metrics: {'accuracy': 0.63818359375, 'crossentropy': 1.6632042974233627}


[1/16]   6%|6          [00:00<?]

[1/32]   3%|3          [00:00<?]

Epoch metrics: {'accuracy': 0.68994140625, 'crossentropy': 1.746895831078291}


[1/16]   6%|6          [00:00<?]

[1/32]   3%|3          [00:00<?]

Epoch metrics: {'accuracy': 0.66845703125, 'crossentropy': 2.059821903705597}
RestoringEarlyStopping: 1 / 20


[1/16]   6%|6          [00:00<?]

[1/32]   3%|3          [00:00<?]

Epoch metrics: {'accuracy': 0.677001953125, 'crossentropy': 2.216190990060568}
RestoringEarlyStopping: 2 / 20
RestoringEarlyStopping: Restoring best parameters. (Score: 0.68994140625)
RestoringEarlyStopping: Restoring optimizer.


get_predictions_labels:   0%|          | 0/200000 [00:00<?, ?it/s]

Perf after training {'accuracy': 0.6936, 'crossentropy': tensor(0.9935)}


get_grad_embeddings:   0%|          | 0/55784 [00:00<?, ?it/s]

#Samps	Total Distance
1	5031186.862045288
2	4363374.027002335
3	3427368.4897241592
4	3124580.681898117
5	3120869.71295166
6	3018759.9706284828
7	2842746.541076125
8	2612532.148223342
9	2498573.2148422613
CandidateBatch(scores=[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], indices=[115, 1318, 9981, 10663, 12903, 54210, 36623, 25029, 31107, 24184])
[('id', 116), ('id', 1322), ('id', 10001), ('id', 10685), ('id', 12931), ('id', 54329), ('id', 36706), ('id', 25086), ('id', 31178), ('id', 24238)]
Acquiring (label, score)s: 0 (0.0), 2 (0.0), 9 (0.0), 9 (0.0), 0 (0.0), 2 (0.0), 9 (0.0), 2 (0.0), 5 (0.0), 3 (0.0)
Training set size 30:


 20%|##        | 1/5 [00:00<?, ?it/s]

[1/16]   6%|6          [00:00<?]

[1/32]   3%|3          [00:00<?]

Epoch metrics: {'accuracy': 0.296630859375, 'crossentropy': 2.080603674054146}


[1/16]   6%|6          [00:00<?]

[1/32]   3%|3          [00:00<?]

Epoch metrics: {'accuracy': 0.596923828125, 'crossentropy': 1.576156985014677}


[1/16]   6%|6          [00:00<?]

[1/32]   3%|3          [00:00<?]

Epoch metrics: {'accuracy': 0.69287109375, 'crossentropy': 1.5450268387794495}


[1/16]   6%|6          [00:00<?]

[1/32]   3%|3          [00:00<?]

Epoch metrics: {'accuracy': 0.715576171875, 'crossentropy': 1.5268513858318329}


[1/16]   6%|6          [00:00<?]

[1/32]   3%|3          [00:00<?]

Epoch metrics: {'accuracy': 0.69384765625, 'crossentropy': 1.828642327338457}
RestoringEarlyStopping: 1 / 20
RestoringEarlyStopping: Restoring best parameters. (Score: 0.715576171875)
RestoringEarlyStopping: Restoring optimizer.


get_predictions_labels:   0%|          | 0/200000 [00:00<?, ?it/s]

Perf after training {'accuracy': 0.7307, 'crossentropy': tensor(0.8910)}
Done.


{'dataset_info': {'training': "'MNIST (Train, seed=0, 55904 samples)'",
  'test': "'MNIST (Test)'"},
 'initial_training_set_indices': [47227,
  11511,
  18383,
  41080,
  32837,
  24393,
  23904,
  11784,
  20439,
  35043,
  27367,
  30426,
  32361,
  26116,
  24386,
  4689,
  44895,
  24211,
  17212,
  3478],
 'evaluation_set_indices': [49,
  12425,
  22159,
  22739,
  701,
  11289,
  4629,
  53972,
  52127,
  6277,
  18804,
  25364,
  18841,
  12957,
  10232,
  42477,
  35057,
  43089,
  48907,
  15149,
  24587,
  52160,
  19050,
  30188,
  50376,
  17140,
  26654,
  28683,
  36031,
  32477,
  32792,
  52660,
  52712,
  49702,
  40661,
  10190,
  18143,
  16617,
  54012,
  38376,
  1924,
  39518,
  1625,
  7896,
  1701,
  28972,
  3717,
  42815,
  6274,
  44154,
  44312,
  10944,
  14937,
  6358,
  1182,
  55213,
  9292,
  33572,
  13252,
  30667,
  31242,
  51852,
  33800,
  42491,
  31258,
  42348,
  42109,
  47515,
  1844,
  9314,
  18329,
  15263,
  52934,
  25225,
  25735,
  131

## MNIST only + (naive) EPIG

In [None]:
# experiment
# MNIST experiment (ood_exposure=False)

experiment = UnifiedExperiment(
    experiment_data_config=StandardExperimentDataConfig(
        id_dataset_name="MNIST",
        id_repetitions=1,
        initial_training_set_size=20,
        validation_set_size=4096,
        validation_split_random_state=0,
        evaluation_set_size=0,
        add_dataset_noise=False,
        ood_dataset_config=None,
    ),
    seed=1,
    max_training_epochs=5,
    max_training_set=20 + 10,
    acquisition_function=acquisition_functions.EPIG,
    acquisition_size=10,
    model_trainer_factory=MnistModelTrainer,
    num_pool_samples=100,
    device="cuda",
)

results = {}
experiment.run(results)
results

StandardExperimentDataConfig(id_dataset_name='MNIST', id_repetitions=1, initial_training_set_size=20, validation_set_size=4096, validation_split_random_state=0, evaluation_set_size=0, add_dataset_noise=False, ood_dataset_config=None)
Creating: EPIG(
	acquisition_size=10,
	num_pool_samples=100,
	epig_bootstrap_type=BootstrapType.NO_BOOTSTRAP,
	epig_bootstrap_factor=1.0,
	epig_dtype=torch.float64
)
Creating: MnistModelTrainer(
	device=cuda,
	num_training_samples=1,
	num_validation_samples=20,
	max_training_epochs=5
)
Creating: TrainSelfDistillationEvalModel(
	num_pool_samples=100
)
Training set size 20:


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  return floored.astype(np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  return floored.astype(np.int)


 20%|##        | 1/5 [00:00<?, ?it/s]

[1/16]   6%|6          [00:00<?]

[1/32]   3%|3          [00:00<?]

Epoch metrics: {'accuracy': 0.52490234375, 'crossentropy': 1.84434649720788}


[1/16]   6%|6          [00:00<?]

[1/32]   3%|3          [00:00<?]

Epoch metrics: {'accuracy': 0.702392578125, 'crossentropy': 1.4298735782504082}


[1/16]   6%|6          [00:00<?]

[1/32]   3%|3          [00:00<?]

Epoch metrics: {'accuracy': 0.703857421875, 'crossentropy': 1.5751474872231483}


[1/16]   6%|6          [00:00<?]

[1/32]   3%|3          [00:00<?]

Epoch metrics: {'accuracy': 0.71630859375, 'crossentropy': 1.7003575265407562}


[1/16]   6%|6          [00:00<?]

[1/32]   3%|3          [00:00<?]

Epoch metrics: {'accuracy': 0.717529296875, 'crossentropy': 1.8638241328299046}
RestoringEarlyStopping: Restoring best parameters. (Score: 0.717529296875)
RestoringEarlyStopping: Restoring optimizer.


get_predictions_labels:   0%|          | 0/200000 [00:00<?, ?it/s]

Perf after training {'accuracy': 0.7301, 'crossentropy': tensor(0.9447)}


get_predictions_labels:   0%|          | 0/5588400 [00:00<?, ?it/s]

Entropy:   0%|          | 0/55884 [00:00<?, ?it/s]

Entropy:   0%|          | 0/55884 [00:00<?, ?it/s]

Evaluation Set:   0%|          | 0/55884 [00:00<?, ?it/s]

KeyboardInterrupt: 

## CIFAR-10 only

In [None]:
# experiment
# CIFAR-10 experiment (ood_exposure=False)

experiment = UnifiedExperiment(
    experiment_data_config=StandardExperimentDataConfig(
        id_dataset_name="CIFAR-10",
        id_repetitions=1,
        initial_training_set_size=20,
        validation_set_size=4096,
        validation_split_random_state=0,
        evaluation_set_size=100,
        add_dataset_noise=False,
        ood_dataset_config=None,
    ),
    seed=1,
    max_training_epochs=5,
    max_training_set=20 + 10,
    acquisition_function=acquisition_functions.BALD,
    acquisition_size=10,
    model_trainer_factory=Cifar10ModelTrainer,
    num_pool_samples=2,
    device="cuda",
)

results = {}
experiment.run(results)
results

Files already downloaded and verified


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  return floored.astype(np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  return floored.astype(np.int)


Files already downloaded and verified
Creating: BALD(
	acquisition_size=10,
	num_pool_samples=2
)
Creating: Cifar10ModelTrainer(
	device=cuda,
	num_training_samples=1,
	num_validation_samples=20,
	max_training_epochs=5
)
Creating: TrainSelfDistillationEvalModel(
	num_pool_samples=2
)
Training set size 20:
Cosine Annealing


 20%|##        | 1/5 [00:00<?, ?it/s]

[1/39]   3%|2          [00:00<?]

Epoch 1 metrics: {'accuracy': 0.132080078125, 'crossentropy': 3.556436240673065}


[1/39]   3%|2          [00:00<?]

Epoch 2 metrics: {'accuracy': 0.147705078125, 'crossentropy': 11.288517951965332}


[1/39]   3%|2          [00:00<?]

Epoch 3 metrics: {'accuracy': 0.1484375, 'crossentropy': 9.785008907318115}


[1/39]   3%|2          [00:00<?]

Epoch 4 metrics: {'accuracy': 0.15869140625, 'crossentropy': 9.548845410346985}


[1/39]   3%|2          [00:00<?]

Epoch 5 metrics: {'accuracy': 0.151611328125, 'crossentropy': 9.943562984466553}


get_predictions_labels:   0%|          | 0/200000 [00:00<?, ?it/s]

Perf after training {'accuracy': 0.1561, 'crossentropy': tensor(7.4005)}


get_predictions_labels:   0%|          | 0/91568 [00:00<?, ?it/s]

Conditional Entropy:   0%|          | 0/45784 [00:00<?, ?it/s]

Entropy:   0%|          | 0/45784 [00:00<?, ?it/s]

CandidateBatch(scores=[0.6368825323879719, 0.600792333483696, 0.5738895609974861, 0.5720301643013954, 0.5641722083091736, 0.5616957247257233, 0.540523573756218, 0.5393854081630707, 0.5310562998056412, 0.5249305069446564], indices=[13423, 26613, 32359, 9927, 25536, 1967, 12669, 11213, 15695, 13329])
[('id', 13464), ('id', 26684), ('id', 32447), ('id', 9952), ('id', 25604), ('id', 1971), ('id', 12705), ('id', 11244), ('id', 15739), ('id', 13370)]
Acquiring (label, score)s: 0 (0.6369), 0 (0.6008), 0 (0.5739), 0 (0.572), 1 (0.5642), 2 (0.5617), 0 (0.5405), 8 (0.5394), 0 (0.5311), 6 (0.5249)
Training set size 30:
Cosine Annealing


 20%|##        | 1/5 [00:00<?, ?it/s]

[1/39]   3%|2          [00:00<?]

Epoch 1 metrics: {'accuracy': 0.14208984375, 'crossentropy': 3.128549039363861}


[1/39]   3%|2          [00:00<?]

Epoch 2 metrics: {'accuracy': 0.176513671875, 'crossentropy': 3.5860735476017}


[1/39]   3%|2          [00:00<?]

Epoch 3 metrics: {'accuracy': 0.18994140625, 'crossentropy': 5.4360968470573425}


[1/39]   3%|2          [00:00<?]

Epoch 4 metrics: {'accuracy': 0.193115234375, 'crossentropy': 5.482205808162689}


[1/39]   3%|2          [00:00<?]

Epoch 5 metrics: {'accuracy': 0.181640625, 'crossentropy': 5.63788229227066}


get_predictions_labels:   0%|          | 0/200000 [00:00<?, ?it/s]

Perf after training {'accuracy': 0.1767, 'crossentropy': tensor(4.1496)}
Done.


{'dataset_info': {'training': "'CIFAR-10 (Train, seed=0, 45904 samples)'",
  'test': "'CIFAR-10 (Test)'"},
 'initial_training_set_indices': [29863,
  22519,
  21079,
  37521,
  15583,
  23405,
  44362,
  35084,
  29380,
  26994,
  39909,
  29333,
  527,
  31668,
  43591,
  12293,
  39247,
  36879,
  7979,
  33280],
 'evaluation_set_indices': [4291,
  37709,
  11949,
  12149,
  17798,
  32245,
  235,
  38778,
  32864,
  39123,
  22797,
  43486,
  5813,
  40306,
  4570,
  15667,
  36937,
  25830,
  2404,
  30096,
  19730,
  11343,
  17034,
  43455,
  15770,
  7894,
  36848,
  11067,
  12968,
  29454,
  33432,
  21672,
  38040,
  259,
  29571,
  41639,
  43971,
  13338,
  42364,
  11109,
  18814,
  21474,
  10681,
  40169,
  9997,
  12784,
  22626,
  36949,
  33130,
  13688,
  6466,
  28263,
  45060,
  10422,
  7005,
  3143,
  26143,
  28581,
  13207,
  10038,
  6750,
  21221,
  9097,
  26989,
  26153,
  13087,
  28498,
  39840,
  26870,
  24298,
  24804,
  28038,
  17767,
  31439,
  1247

In [None]:
# exports

configs = [
    UnifiedExperiment(
        seed=seed + 1234,
        experiment_data_config=StandardExperimentDataConfig(
            id_dataset_name=id_dataset_name,
            id_repetitions=1,
            initial_training_set_size=20,
            validation_set_size=4096,
            validation_split_random_state=0,
            evaluation_set_size=evaluation_set_size,
            add_dataset_noise=False,
            ood_dataset_config=OoDDatasetConfig(
                ood_dataset_name=ood_dataset_name, ood_repetitions=1, ood_exposure=ood_exposure
            ),
        ),
        acquisition_function=acquisition_function,
        acquisition_size=5,
        num_pool_samples=num_pool_samples,
    )
    for seed in range(3)
    for acquisition_function in [acquisition_functions.BatchEvalBALD, acquisition_functions.BatchBALD]
    for evaluation_set_size in [1024]
    for num_pool_samples in [100]
    for ood_exposure in [True, False]
    for id_dataset_name, ood_dataset_name in [("CIFAR-10", "SVHN"), ("SVHN", "CIFAR-10")]
]

if not is_run_from_ipython() and __name__ == "__main__":
    for job_id, store in embedded_experiments(__file__, len(configs)):
        config = configs[job_id]
        config.seed += job_id
        print(config)
        store["config"] = dataclasses.asdict(config)
        store["log"] = {}

        try:
            config.run(store=store)
        except Exception:
            store["exception"] = traceback.format_exc()
            raise

In [None]:
len(configs)

24

In [None]:
# slow
import prettyprinter

prettyprinter.install_extras(include={"dataclasses"})

prettyprinter.pprint(configs)

[
    OodExperiment(
        seed=1234,
        uniform_ood=True,
        id_dataset_name='CIFAR-10',
        ood_dataset_name='SVHN',
        num_pool_samples=100,
        # class
        acquisition_function=batchbald_redux.acquisition_functions.BatchEvalBALD
    ),
    OodExperiment(
        seed=1234,
        uniform_ood=True,
        id_dataset_name='SVHN',
        ood_dataset_name='CIFAR-10',
        num_pool_samples=100,
        # class
        acquisition_function=batchbald_redux.acquisition_functions.BatchEvalBALD
    ),
    OodExperiment(
        seed=1234,
        uniform_ood=False,
        id_dataset_name='CIFAR-10',
        ood_dataset_name='SVHN',
        num_pool_samples=100,
        # class
        acquisition_function=batchbald_redux.acquisition_functions.BatchEvalBALD
    ),
    OodExperiment(
        seed=1234,
        uniform_ood=False,
        id_dataset_name='SVHN',
        ood_dataset_name='CIFAR-10',
        num_pool_samples=100,
        # class
        acquisit