In [None]:
from models_code.utilities import load_model
from models_code.experiments import softmax
from models_code.experiments import softmax2d_ensemble
from models_code.experiments import test_eval
from models_code.experiments import not_mnist_predictions
from models_code.experiments import load_omniglot
from models_code.experiments import load_cifar_bw
from models_code.experiments import load_notmnist
from models_code.experiments import non_distribution
from models_code.mnist import Mnist
from models_code.mnist import train
from models_code.mnist import test
from utilities.metric import predictive_entropy
from models_code.experiments import correlation_test_error_uncertainty_variational


from models_code.utilities import dump_results

In [None]:
import torch
import torch.utils.data
from torch import nn, optim
from torch.autograd import Variable
from torch.nn import functional as F
from torchvision import datasets, transforms
from torchvision.utils import save_image
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss


batch_size = 128
log_interval = 100
epochs = 12
num_batches = 60000 / 128

torch.manual_seed(9)
torch.cuda.manual_seed(9)


kwargs = {'num_workers': 1, 'pin_memory': True}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.Pad(2),
                       transforms.ToTensor()])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False,
                   transform=transforms.Compose([
                       transforms.Pad(2),
                       transforms.ToTensor()])),
    batch_size=batch_size, shuffle=False, **kwargs)

In [None]:
models = [Mnist().cuda() for i in range(5)]
optimizers = [optim.Adadelta(model.parameters()) for model in models]

loss_function = torch.nn.CrossEntropyLoss()


In [None]:
# for i in range(epochs):
#     for model_index in range(5):
#         train(i, models[model_index], train_loader, optimizers[model_index], loss_function, log_interval, num_batches)
#         test(i, models[model_index], test_loader, optimizers[model_index], loss_function, log_interval)

In [None]:
# torch.save(models[0].state_dict(), 'models/mnist_lenet/de1.torch')
# torch.save(models[1].state_dict(), 'models/mnist_lenet/de2.torch')
# torch.save(models[2].state_dict(), 'models/mnist_lenet/de3.torch')
# torch.save(models[3].state_dict(), 'models/mnist_lenet/de4.torch')
# torch.save(models[4].state_dict(), 'models/mnist_lenet/de5.torch')

In [None]:
for i in range(5):
    models[i] = load_model(Mnist, 'models/mnist_lenet/de{}.torch'.format(i+1))

# Compute accuracy and NLL

In [None]:
test_preds1, test_labels1, test_probs1 = test_eval(models[0], test_loader)
test_preds2, test_labels2, test_probs2 = test_eval(models[1], test_loader)
test_preds3, test_labels3, test_probs3 = test_eval(models[2], test_loader)
test_preds4, test_labels4, test_probs4 = test_eval(models[3], test_loader)
test_preds5, test_labels5, test_probs5 = test_eval(models[4], test_loader)

In [None]:
test_probs = np.stack([test_probs1, test_probs2, test_probs3, test_probs4, test_probs5])

In [None]:
accuracy_score(test_labels1, np.argmax(np.mean(test_probs, axis=0), axis=1))

In [None]:
log_loss(test_labels1, softmax(np.mean(test_probs, axis=0)))

### Omniglot experiment

In [None]:
omniglot_loader = load_omniglot(batch_size)

In [None]:
omniglot_truth, omniglot_probs, omniglot_images = not_mnist_predictions([
    models[0],
    models[1],
    models[2],
    models[3],
    models[4]
], omniglot_loader)

In [None]:
roc, ac, fpr, tpr, pr, re = non_distribution(
    test_probs,
    predictive_entropy(softmax2d_ensemble(test_probs)).reshape(10000,1),
    predictive_entropy(omniglot_probs).reshape(32460,1),
    42460,
    10000
)

In [None]:
roc

In [None]:
ac

### Cifar-bw experiment

In [None]:
cifar_loader = load_cifar_bw(batch_size)

In [None]:
cifar_truth, cifar_probs, cifar_images = not_mnist_predictions([
    models[0],
    models[1],
    models[2],
    models[3],
    models[4]
], cifar_loader)

In [None]:
roc, ac, fpr, tpr, pr, re = non_distribution(
    test_probs,
    predictive_entropy(softmax2d_ensemble(test_probs)).reshape(10000,1),
    predictive_entropy(cifar_probs).reshape(50000,1),
    60000,
    10000
)

In [None]:
roc

In [None]:
ac

### Notmnist

In [None]:
notmnist_loader = load_notmnist(batch_size)

In [None]:
notmnist_truth, notmnist_probs, notmnist_images = not_mnist_predictions([
    models[0],
    models[1],
    models[2],
    models[3],
    models[4]
], notmnist_loader)

In [None]:
roc, ac, fpr, tpr, pr, re = non_distribution(
    test_probs,
    predictive_entropy(softmax2d_ensemble(test_probs)).reshape(10000,1),
    predictive_entropy(notmnist_probs).reshape(18724,1),
    28724,
    10000
)

In [None]:
roc

In [None]:
ac

# Test error vs uncertainty

In [None]:
roc, ac, fpr, tpr, pr, re = correlation_test_error_uncertainty_variational(
    predictive_entropy,
    softmax2d_ensemble(test_probs),
    test_labels1
)

In [None]:
roc

In [None]:
ac