In [None]:
from models_code.utilities import create_model
from models_code.utilities import dump_results

from models_code.experiments import correlation_test_error_uncertainty
from models_code.experiments import correlation_test_error_uncertainty_variational
from models_code.experiments import load_lfw
from models_code.experiments import not_mnist_predictions
from models_code.experiments import not_mnist_prediction_variational
from models_code.experiments import prediction_variational
from models_code.experiments import non_distribution
from models_code.experiments import test_eval
from models_code.experiments import test_eval_variational
from models_code.experiments import softmax
from models_code.experiments import softmax2d

from models_code.mnist import perform_training
from models_code.mnist import MonteCarloDropout

from models_code.cifar import load_data
from models_code.cifar import load_svhn
from models_code.cifar import Cifar
from models_code.cifar import MCCifar
from models_code.cifar import ISCifar

from models_code.bayesbybackprop import BBPMnist
from models_code.bayesbybackprop import BBPCifar
from models_code.bayesbybackprop import train_bbp
from models_code.bayesbybackprop import test_bbp

from models_code.utilities import load_model

from utilities.metric import predictive_entropy
from utilities.metric import entropy

import torch

import numpy as np

from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss

In [None]:
def set_same_seed():
    torch.manual_seed(9)
    torch.cuda.manual_seed(9)

In [None]:
batch_size = 64
log_interval = 100
epochs = 100

# Default model

In [None]:
set_same_seed()
train_loader, test_loader = load_data(batch_size)

In [None]:
# standard, optimizer, cross_entropy  = create_model(Cifar)

# perform_training(
#     epochs,
#     standard,
#     train_loader,
#     test_loader,
#     optimizer,
#     cross_entropy,
#     log_interval,
#     './models/cifar_lenet/standard.torch',
#     60000 // batch_size + 1,
#     channels=3
# )

standard = load_model(Cifar, './models/cifar_lenet/standard.torch')

### First experiments - test accuracy

In [None]:
test_preds, test_labels, test_probs = test_eval(standard, test_loader, channels=3)

In [None]:
accuracy_score(test_labels, test_preds)

In [None]:
log_loss(test_labels, softmax(test_probs))

### Second experiment  - test error vs uncertainty


In [None]:
roc, ac, fpr, tpr, pr, re = correlation_test_error_uncertainty(
    lambda x: entropy(x),
    softmax2d(test_probs),
    test_labels
)

# roc, ac, fpr, tpr, pr, re = correlation_test_error_uncertainty(
#     lambda x: -np.max(x, axis=1),
#     softmax2d(test_probs),
#     test_labels
# )

In [None]:
roc

In [None]:
ac

In [None]:
dump_results(fpr, tpr, pr, re, './results/cifar/standard.pickle')

### Third experiment - CIFAR vs SVHN

In [None]:
svhn_loader = load_svhn(batch_size)

In [None]:
svhn_preds, svhn_labels, svhn_probs = test_eval(standard, svhn_loader, channels=3)

In [None]:
# roc, ac, fpr, tpr, pr, re = non_distribution(
#     test_probs,
#     entropy(softmax(test_probs)).reshape(10000,1),
#     entropy(softmax(svhn_probs)).reshape(73257,1),
#     83257,
#     10000
# )

roc, ac, fpr, tpr, pr, re = non_distribution(
    test_probs,
    np.max(softmax(test_probs), axis=1).reshape(10000,1),
    np.max(softmax(svhn_probs), axis=1).reshape(73257,1),
    83257,
    10000
)

In [None]:
roc

In [None]:
ac

In [None]:
dump_results(fpr, tpr, pr, re, './results/svhn/standard.pickle')

### Cifar vs LFW-a

In [None]:
lfw_loader = load_lfw(batch_size)

In [None]:
lfw_preds, lfw_labels, lfw_probs = test_eval(standard, lfw_loader, channels=3)

In [None]:
lfw_probs.shape

In [None]:
roc, ac, fpr, tpr, pr, re = non_distribution(
    test_probs,
    entropy(softmax(test_probs)).reshape(10000,1),
    entropy(softmax(lfw_probs)).reshape(1054,1),
    11054,
    10000
)

print(roc)
print(ac)

roc, ac, fpr, tpr, pr, re = non_distribution(
    test_probs,
    np.max(softmax(test_probs), axis=1).reshape(10000,1),
    np.max(softmax(lfw_probs), axis=1).reshape(1054,1),
    11054,
    10000
)

In [None]:
roc

In [None]:
ac

# Deep ensembles

In [None]:
set_same_seed()
train_loader, test_loader = load_data(batch_size, shuffle=False)
svhn_loader = load_svhn(batch_size, shuffle=False)

In [None]:
import torch

test_labels_all = []
test_probs_all = []
svhn_labels_all = []
svhn_probs_all = []
lfw_labels_all = []
lfw_probs_all = []


In [None]:
# test_predictions
for i in range(5):
    print(i)
    de = load_model(Cifar, './models/cifar_lenet/de{}.torch'.format(i))
    torch.cuda.empty_cache()
    test_preds, test_labels, test_probs = test_eval(de, test_loader, channels=3)
    
    test_probs_all.append(test_probs)
    
    if i == 4:
        test_labels_all = test_labels
    
# svhn predictions
for i in range(5):
    print(i)
    de = load_model(Cifar, './models/cifar_lenet/de{}.torch'.format(i))
    torch.cuda.empty_cache()
    svhn_preds, svhn_labels, svhn_probs = test_eval(de, svhn_loader, channels=3)
    
    svhn_probs_all.append(svhn_probs)
    
    if i == 4:
        svhn_labels_all = svhn_labels
        
# lfw predictions
for i in range(5):
    print(i)
    de = load_model(Cifar, './models/cifar_lenet/de{}.torch'.format(i))
    torch.cuda.empty_cache()
    lfw_preds, lfw_labels, lfw_probs = test_eval(de, lfw_loader, channels=3)
    
    lfw_probs_all.append(lfw_probs)
    
    if i == 4:
        lfw_labels_all = lfw_labels

### First experiments - test accuracy

In [None]:
test_probs_stacked = np.stack([softmax2d(sample) for sample in test_probs_all])
svhn_probs_stacked = np.stack([softmax2d(sample) for sample in svhn_probs_all])
lfw_probs_stacked = np.stack([softmax2d(sample) for sample in lfw_probs_all])

In [None]:
accuracy_score(test_labels_all, np.argmax(np.mean(test_probs_stacked, axis=0), axis=1))

In [None]:
log_loss(test_labels_all, np.mean(test_probs_stacked, axis=0))

### Second experiment  - test error vs uncertainty


In [None]:
roc, ac, fpr, tpr, pr, re = correlation_test_error_uncertainty_variational(
    predictive_entropy,
    test_probs_stacked,
    test_labels
)

In [None]:
roc

In [None]:
ac

In [None]:
dump_results(fpr, tpr, pr, re, './results/cifar/de.pickle')

### Third experiment - CIFAR vs SVHN

In [None]:
roc, ac, fpr, tpr, pr, re = non_distribution(
    test_preds,
    predictive_entropy(test_probs_stacked).reshape(10000,1),
    predictive_entropy(svhn_probs_stacked).reshape(73257,1),
    83257,
    10000
)

In [None]:
roc

In [None]:
ac

In [None]:
dump_results(fpr, tpr, pr, re, './results/svhn/de.pickle')

In [None]:
roc, ac, fpr, tpr, pr, re = non_distribution(
    test_preds,
    predictive_entropy(test_probs_stacked).reshape(10000,1),
    predictive_entropy(lfw_probs_stacked).reshape(1054,1),
    11054,
    10000
)

In [None]:
roc

In [None]:
ac

# Monte Carlo Dropout

In [None]:
set_same_seed()
train_loader, test_loader = load_data(batch_size)

In [None]:
# mc, optimizer, cross_entropy  = create_model(MCCifar)

# perform_training(
#     epochs + 50,
#     mc,
#     train_loader,
#     test_loader,
#     optimizer,
#     cross_entropy,
#     log_interval,
#     './models/cifar_lenet/mc.torch',
#     60000 // batch_size + 1,
#     channels=3
# )

mc = load_model(Cifar, './models/cifar_lenet/mc.torch')

In [None]:
mc.dropout2 = MonteCarloDropout(0.25)

In [None]:
test_labels, test_preds = test_eval_variational(mc, test_loader, 50, channels=3)

In [None]:
accuracy_score(test_labels, np.argmax(np.mean(test_preds, axis=0), axis=1))

In [None]:
log_loss(test_labels, np.mean(test_preds, axis=0))

### Second experiment

In [None]:
roc, ac, fpr, tpr, pr, re = correlation_test_error_uncertainty_variational(
    predictive_entropy,
    test_preds,
    test_labels
)

In [None]:
roc

In [None]:
ac

In [None]:
dump_results(fpr, tpr, pr, re, './results/cifar/mc.pickle')

### Third experiment


In [None]:
svhn_loader = load_svhn(batch_size)

In [None]:
notmnist_labels, notmnist_probs, notmnist_images = prediction_variational(mc, svhn_loader, 50,
                                                                                    channels=3)

In [None]:
roc, ac, fpr, tpr, pr, re = non_distribution(
    test_preds,
    predictive_entropy(test_preds).reshape(10000,1),
    predictive_entropy(notmnist_probs).reshape(73257,1),
    83257,
    10000
)

In [None]:
roc

In [None]:
ac

In [None]:
dump_results(fpr, tpr, pr, re, './results/svhn/mc.pickle')

### LFW-a

In [None]:
lfw_loader = load_lfw(batch_size)

lfw_labels, lfw_probs, lfw_images = prediction_variational(mc, lfw_loader, 50, channels=3)

roc, ac, fpr, tpr, pr, re = non_distribution(
    test_preds,
    predictive_entropy(test_preds).reshape(10000,1),
    predictive_entropy(lfw_probs).reshape(1054,1),
    11054,
    10000
)

In [None]:
roc

In [None]:
ac

# Bayes by Backprop

In [None]:
set_same_seed()
train_loader, test_loader = load_data(batch_size)

In [None]:
# bbp, optimizer, cross_entropy  = create_model(BBPCifar)


# for epoch in range(1, epochs):
#     train_bbp(
#         bbp,
#         optimizer,
#         train_loader,
#         cross_entropy,
#         batch_size,
#         log_interval,
#         60000 // batch_size + 1,
#         epoch,
#         channels=3
#     )
#     test_bbp(
#         bbp,
#         test_loader,
#         cross_entropy,
#         batch_size,
#         epoch,
#         channels=3
#     )


bbp = load_model(BBPCifar, './models/cifar_lenet/bbp.torch')

In [None]:

torch.save(bbp.state_dict(), './models/cifar_lenet/bbp02.torch')


In [None]:
test_labels, test_preds = test_eval_variational(bbp, test_loader, 10, channels=3)

In [None]:
accuracy_score(test_labels, np.argmax(np.mean(test_preds, axis=0), axis=1))

In [None]:
log_loss(test_labels, np.mean(test_preds, axis=0))

### Second experiment

In [None]:
roc, ac, fpr, tpr, pr, re = correlation_test_error_uncertainty_variational(
    predictive_entropy,
    test_preds,
    test_labels
)

In [None]:
roc

In [None]:
ac

In [None]:
dump_results(fpr, tpr, pr, re, './results/cifar/bbp.pickle')

### Third experiment

In [None]:
svhn_loader = load_svhn(batch_size)

In [None]:
notmnist_labels, notmnist_probs, notmnist_images = prediction_variational(bbp, svhn_loader, 10,
                                                                                    channels=3)

In [None]:
roc, ac, fpr, tpr, pr, re = non_distribution(
    test_preds,
    predictive_entropy(test_preds).reshape(10000,1),
    predictive_entropy(notmnist_probs).reshape(73257,1),
    83257,
    10000
)

In [None]:
roc

In [None]:
ac

In [None]:
dump_results(fpr, tpr, pr, re, './results/svhn/bbp.pickle')

### Lfw

In [None]:
lfw_loader = load_lfw(batch_size)

In [None]:
lfw_labels, lfw_probs, lfw_images = prediction_variational(bbp, lfw_loader, 10, channels=3)

In [None]:
roc, ac, fpr, tpr, pr, re = non_distribution(
    test_preds,
    predictive_entropy(test_preds).reshape(10000,1),
    predictive_entropy(lfw_probs).reshape(1054,1),
    11054,
    10000
)

In [None]:
roc

In [None]:
ac

# Inhibited softmax

In [None]:
set_same_seed()
train_loader, test_loader = load_data(batch_size)

In [None]:
is_, optimizer, cross_entropy  = create_model(ISCifar)

def is_loss(model):
    
    return (
        lambda pred,y: cross_entropy(pred,y)
        + 0.01 * (model.dense3.weight.data ** 2).sum()
        + 0.000001 * pred.sum()
    )

perform_training(
    epochs,
    is_,
    train_loader,
    test_loader,
    optimizer,
    is_loss(is_),
    log_interval,
    './models/cifar_lenet/is2.torch',
    60000 // batch_size + 1,
    channels=3
)

# is_ = load_model(ISCifar, './models/cifar_lenet/is.torch')

In [None]:
test_preds, test_labels, test_probs = test_eval(is_, test_loader, channels=3)

In [None]:
accuracy_score(test_labels, test_preds)

In [None]:
log_loss(test_labels, softmax2d(test_probs[:,:10]))

### Second experiment

In [None]:
roc, ac, fpr, tpr, pr, re = correlation_test_error_uncertainty(
    lambda x: softmax2d(x)[:,10],
    test_probs,
    test_labels
)

# roc, ac, fpr, tpr, pr, re = correlation_test_error_uncertainty(
#     lambda x: entropy(softmax2d(x)[:,:10]),
#     test_probs,
#     test_labels
# )

In [None]:
roc

In [None]:
ac

In [None]:
dump_results(fpr, tpr, pr, re, './results/mnist/is.pickle')

### Third experiment

In [None]:
svhn_loader = load_svhn(batch_size)

In [None]:
svhn_preds, svhn_labels, svhn_probs = test_eval(is_, svhn_loader, channels=3)

In [None]:
roc, ac, fpr, tpr, pr, re = non_distribution(
    test_probs,
    softmax2d(test_probs)[:,10].reshape(10000,1),
    softmax2d(svhn_probs)[:,10].reshape(73257,1),
    83257,
    10000
)

# roc, ac, fpr, tpr, pr, re = non_distribution(
#     test_probs,
#     entropy(softmax2d(test_probs[:,:10])).reshape(10000,1),
#     entropy(softmax2d(svhn_probs[:,:10])).reshape(73257,1),
#     83257,
#     10000
# )

In [None]:
roc

In [None]:
ac

In [None]:
dump_results(fpr, tpr, pr, re, './results/notmnist/is.pickle')

### LFW-a

In [None]:
lfw_loader = load_lfw(batch_size)

In [None]:
lfw_preds, lfw_labels, lfw_probs = test_eval(is_, lfw_loader, channels=3)

In [None]:
roc, ac, fpr, tpr, pr, re = non_distribution(
    test_probs,
    softmax2d(test_probs)[:,10].reshape(10000,1),
    softmax2d(lfw_probs)[:,10].reshape(1054,1),
    11054,
    10000
)


In [None]:
roc

In [None]:
ac