# CIFAR3 Calibration Benchmark

In [1]:
%load_ext autoreload

In [2]:
%autoreload 1

In [3]:
import os
import sys
import time
import itertools
sys.path.append('..')

import ternary
from ternary import scatter
import numpy as np
import tensorflow as tf
from scipy.special import softmax
import matplotlib.pyplot as plt

%aimport utils
%aimport utils.ops
%aimport utils.metrics
%aimport utils.visualization
%aimport flows.nice
%aimport calibrators
from utils.data import get_cifar3, load_logits
from utils.ops import onehot_encode
from utils.metrics import neg_log_likelihood, expected_calibration_error
from utils.visualization import plot_pdf_simplex, plot_prob_simplex, reliability_diagram, plot_cal_regions_ternary, plot_nll_curve
from calibrators import NiceCalibrator, PlanarFlowCalibrator, RadialFlowCalibrator, RealNvpCalibrator, TempScalingCalibrator, DummyCalibrator 

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [4]:
tf.__version__

'1.13.1'

In [5]:
# Select GPU to use
os.environ["CUDA_VISIBLE_DEVICES"]="1"; 

In [6]:
%matplotlib inline
np.random.seed(1)

## Load data

In [7]:
cifar_dir = '../cifar-10'
models_dir = '../pretrained-models'

In [8]:
model = 'cnn_cifar3'

cifar3, ix2label = get_cifar3(cifar_dir, test=True)
target = onehot_encode(cifar3['test_labels'])

_, logits = load_logits(os.path.join(models_dir, model))
probs = softmax(logits, axis=1)

# val/test split
random_split = np.random.permutation(3000)

val_target = target[random_split[:1500], :]
test_target = target[random_split[1500:], :]

val_logits = logits[random_split[:1500], :]
test_logits = logits[random_split[1500:], :]

### Helper methods:

In [9]:
def add_spoiling_samples(ax, pred_probs, target, probs):
    indv_nll = -np.sum(target*np.log(pred_probs+1e-7), axis=1)
    # spoiling_idx = np.argsort(indv_nll)[:-21:-1]
    spoiling_probs = probs[indv_nll>=np.log2(3), :]
    spoiling_target = np.argmax(target[indv_nll>=np.log2(3), :], axis=1)

    ax.scatter(spoiling_probs[spoiling_target==0, :]*ax._scale, s=50, color='red', edgecolors='k', lw=1, zorder=2)
    ax.scatter(spoiling_probs[spoiling_target==1, :]*ax._scale, s=50, color='green', edgecolors='k', lw=1, zorder=2)
    ax.scatter(spoiling_probs[spoiling_target==2, :]*ax._scale, s=50, color='blue', edgecolors='k', lw=1, zorder=2)
    
    return ax

In [10]:
def train_and_eval_cal(Calibrator):
    results = {}
    t = time.time()
    cal = Calibrator(val_logits, val_target)
    print('Calibrator fitted in {:.3f}s'.format(time.time()-t))
    results['nll_val'] = neg_log_likelihood(cal(val_logits), val_target)
    results['ece_val'] = expected_calibration_error(cal(val_logits), val_target)
    results['nll_test'] = neg_log_likelihood(cal(test_logits), test_target)
    results['ece_test'] = expected_calibration_error(cal(test_logits), test_target)
    results['cal'] = cal
    
    return results

## Train Calibrators

In [11]:
calibrators = {
    'Uncalibrated': DummyCalibrator,
    'Temp-Scaling': TempScalingCalibrator,
}

layers = [2, 6, 10, 20, 50, 100]

nice_args = [
    {
        'layers': l,
        'hidden_size': [3, 3],
        'epochs': 1000,
        'batch_size': val_logits.shape[0],
    } for l in layers
]

nvp_args = [
    {
        'layers': l,
        'hidden_size': [3, 3],
        'epochs': 1000,
        'batch_size': val_logits.shape[0],
    } for l in layers
]

planar_args = [
    {
        'layers': l,
        'epochs': 1000,
        'batch_size': val_logits.shape[0],
    } for l in layers
]

radial_args = [
    {
        'layers': l,
        'epochs': 1000,
        'batch_size': val_logits.shape[0],
    } for l in layers
]



for nice in nice_args:
    name = 'NICE_l{}_hs{}'.format(nice['layers'], nice['hidden_size'])
    calibrators[name] = lambda logits, target: NiceCalibrator(logits, target, **nice)
    
for nvp in nvp_args:
    name = 'RealNVP_l{}_hs{}'.format(nvp['layers'], nvp['hidden_size'])
    calibrators[name] = lambda logits, target: RealNvpCalibrator(logits, target, **nvp)

for planar in planar_args:
    name = 'Planar_l{}'.format(planar['layers'])
    calibrators[name] = lambda logits, target: PlanarFlowCalibrator(logits, target, **planar)

for radial in radial_args:
    name = 'Radial_l{}'.format(radial['layers'])
    calibrators[name] = lambda logits, target: RadialFlowCalibrator(logits, target, **radial)

In [None]:
# Train all calibrators
results = {}
for name, Calibrator in calibrators.items():
    print('Training {:s}...'.format(name))
    results[name] = train_and_eval_cal(Calibrator)

Training Uncalibrated...
Calibrator fitted in 0.000s
Training Temp-Scaling...
Calibrator fitted in 0.009s
Training NICE_l2_hs[3, 3]...
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Calibrator fitted in 117.086s
Training NICE_l6_hs[3, 3]...
Calibrator fitted in 135.422s
Training NICE_l10_hs[3, 3]...
Calibrator fitted in 141.184s
Training NICE_l20_hs[3, 3]...
Calibrator fitted in 150.399s
Training NICE_l50_hs[3, 3]...
Calibrator fitted in 164.229s
Training NICE_l100_hs[3, 3]...
Calibrator fitted in 192.256s
Training RealNVP_l2_hs[3, 3]...
Calibrator fitted in 363.264s
Training RealNVP_l6_hs[3, 3]...
Calibrator fitted in 442.596s
Training RealNVP_l10_hs[3, 3]...
Calibrator fitted in 574.900s
Training RealNVP_l20_hs[3, 3]...


In [None]:
plt.plot(results['RealNVP_l20_hs[3, 3]']['cal'].history.history['loss'])

## Comparing flows:

### Overfitting:

In [None]:
colors = ['b', 'r', 'g', 'y']
flows = ['NICE_l{}_hs[3, 3]', 'RealNVP_l{}_hs[3, 3]', 'Planar_l{}', 'Radial_l{}']


fig, ax_arr = plt.subplots(1, 2, figsize=(18, 7))

for color, flow in zip(colors, flows)
    ## NICE
    val_nll = []
    test_nll = []
    val_ece = []
    test_ece = []
    for l in layers:
        val_nll.append(results['NICE_l{}_hs[3, 3]'.format(l)]['nll_val'])
        test_nll.append(results['NICE_l{}_hs[3, 3]'.format(l)]['nll_test'])
        val_ece.append(results['NICE_l{}_hs[3, 3]'.format(l)]['ece_val'])
        test_ece.append(results['NICE_l{}_hs[3, 3]'.format(l)]['ece_test'])
    ax_arr[0].plot(layers, val_nll, color, label=flow.split('_')[0] + 'Validation')
    ax_arr[0].plot(layers, test_nll, color + '--', label=flow.split('_')[0] + 'Test')

    ax_arr[1].plot(layers, val_ece, color, label=flow.split('_')[0] + 'Validation')
    ax_arr[1].plot(layers, test_ece, color + '--', label=flow.split('_')[0] + 'Test')


ax_arr[0].legend()
ax_arr[0].set_ylabel('NLL')
ax_arr[0].set_xlabel('Layers')
ax_arr[0].set_title('NLL Flows')

ax_arr[1].legend()
ax_arr[1].set_ylabel('ECE')
ax_arr[1].set_xlabel('Layers')
ax_arr[1].set_title('ECE Flows')