In [1]:
import os
if 'Conformal-Sparsemax/notebooks' in os.getcwd():
    os.chdir(os.path.dirname(os.getcwd()))

In [6]:
from conformal_sparsemax.classifier import CNN, CNN_CIFAR,get_data, evaluate
from sklearn.metrics import f1_score
import torch
import numpy as np
import pickle
from entmax.losses import SparsemaxLoss, Entmax15Loss

In [9]:
loss = 'softmax' #sparsemax, softmax or entmax15
dataset='MNIST' #CIFAR100 or MNIST

In [10]:
if dataset == 'CIFAR100':
    model = CNN_CIFAR(loss)
elif dataset == 'MNIST':
    model = CNN(loss,n_classes=10,input_size=28,channels=1)
else:
    raise Exception('Wrong dataset name')

_,_, test_dataloader, cal_dataloader = get_data(0.2,16,dataset = dataset)
model.load_state_dict(torch.load(f'models/{dataset}_{loss}.pth'))
if loss == 'sparsemax':
    criterion = SparsemaxLoss()
elif loss == 'entmax15':
    criterion = Entmax15Loss()
elif loss == 'softmax':
    criterion = torch.nn.NLLLoss()

In [11]:
test_proba, test_pred, test_true, test_loss = evaluate(
                                                    model,
                                                    test_dataloader,
                                                    criterion)

test_f1 = f1_score(test_pred, test_true, average='weighted')

print(f'Test loss: {test_loss:.3f}')
print(f'Test f1: {test_f1:.3f}')

Test loss: -0.983
Test f1: 0.984


In [14]:
print('type:', type(test_proba))
print('pred_proba shape:',test_proba.shape)
print('pred_labels shape:',test_pred.shape)
print('true_labels shape:',test_true.shape)

type: <class 'numpy.ndarray'>
pred_proba shape: (10000, 10)
pred_labels shape: (10000,)
true_labels shape: (10000,)


In [15]:
((test_proba==0).sum(axis=1)/100).mean()

9e-06

In [16]:
cal_proba, cal_pred, cal_true, cal_loss = evaluate(
                                                    model,
                                                    cal_dataloader,
                                                    criterion)

cal_f1 = f1_score(cal_pred, cal_true, average='weighted')

print(f'Calibration loss: {cal_loss:.3f}')
print(f'Calibration f1: {cal_f1:.3f}')

Calibration loss: -0.991
Calibration f1: 0.991


In [18]:
#One Hot Encoding
test_true_enc = np.zeros((test_true.size, test_true.max()+1), dtype=int)
test_true_enc[np.arange(test_true.size),test_true] = 1

cal_true_enc = np.zeros((cal_true.size, cal_true.max()+1), dtype=int)
cal_true_enc[np.arange(cal_true.size),cal_true] = 1

In [19]:
predictions = {'test':{'proba':test_proba,'true':test_true_enc},
 'cal':{'proba':cal_proba,'true':cal_true_enc}}

In [20]:
for dataset_type in ['cal','test']:
    for y in ['proba','true']:
        with open(f'predictions/{dataset}_{loss}_{dataset_type}_{y}.pickle', 'wb') as f:
            pickle.dump(predictions[dataset_type][y], f)

In [21]:
with open('predictions/CIFAR100_sparsemax_test_true.pickle', 'rb') as f:
    test_true = pickle.load(f)