In [1]:
import os
import sys
import time
sys.path.extend(['..'])

import torch
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

In [2]:
plt.rc('text', usetex=True)
plt.rc('text.latex', preamble=r'\usepackage{amsmath} \usepackage{amssymb}')

In [9]:
res_path = '../../results/pretrained'

ece15_file = 'logitsJuan_ECE15_reg_{}.csv' 
ece_file = 'logitsJuan_ECE_reg_{}.csv'
nll_file = 'logitsJuan_NLL_reg_{}.csv'
bri_file = 'logitsJuan_BRI_reg_{}.csv'

In [10]:
ECE15s = [pd.read_csv(os.path.join(res_path, ece15_file.format(i))).drop(['Unnamed: 0'], axis=1).set_index(['Dataset', 'Model']) for i in range(10)]
ECEs = [pd.read_csv(os.path.join(res_path, ece_file.format(i))).drop(['Unnamed: 0'], axis=1).set_index(['Dataset', 'Model']) for i in range(10)]
NLLs = [pd.read_csv(os.path.join(res_path, nll_file.format(i))).drop(['Unnamed: 0'], axis=1).set_index(['Dataset', 'Model']) for i in range(10)]
Bris = [pd.read_csv(os.path.join(res_path, bri_file.format(i))).drop(['Unnamed: 0'], axis=1).set_index(['Dataset', 'Model']) for i in range(10)]

In [11]:
ECE15 = sum(ECE15s)/len(ECE15s)
ECE = sum(ECEs)/len(ECEs)
NLL = sum(NLLs)/len(NLLs)
Bri = sum(Bris)/len(Bris)

In [12]:
def highlight_min(s):
    '''
    highlight the minimum.
    '''
    is_max = s == s.min()
    return ['font-weight: bold' if v else '' for v in is_max]

In [13]:
def highlight_min_br(s):
    print(s['ECE'])
    
    
    s['ECE'].style.apply(highlight_min, subset=s.select_dtypes(float))
    s['NLL'].style.apply(highlight_min, subset=s.select_dtypes(float))
    s['Brier'].style.apply(highlight_min, subset=s.select_dtypes(float))
      
    return s

### CIFAR 10

In [14]:
curr_ece15 = ECE15.loc['cifar10']
curr_ece = ECE.loc['cifar10']
curr_nll = NLL.loc['cifar10']
curr_bri = Bri.loc['cifar10']

In [15]:
curr_ece15[['Uncalibrated', 'TS', 'ETS', 'MIR', 'BTS', 'HTS_torch', 'LTS_torch', 'HnLTS_torch', 'PTS', 'PTS_ece']].transpose().style.apply(highlight_min)

Model,densenet-121,densenet-169,resnet-50,resnet-101,resnext-29_8x16,vgg-19,wide-resnet-28x10,wide-resnet-40x10
Uncalibrated,2.642584,2.663559,10.686585,4.268243,2.832803,4.56187,1.834526,3.00097
TS,1.014121,0.829599,2.351264,1.198918,0.738372,1.718529,0.516938,0.918689
ETS,1.797012,2.127014,2.328939,1.173737,0.568172,2.881861,1.520533,0.798777
MIR,1.245873,1.15484,0.769365,1.621956,0.96686,1.545232,1.227502,0.884049
BTS,0.803925,0.916503,1.407277,1.277797,0.942596,1.162204,0.821558,1.01457
HTS_torch,1.161412,0.975218,1.265405,1.220092,0.690891,1.424388,0.668687,0.804692
LTS_torch,1.079376,0.906155,1.270541,1.299158,0.819179,1.418915,0.616258,0.924151
HnLTS_torch,1.158244,0.912929,1.476751,1.330338,0.819495,1.485409,0.61933,1.013741
PTS,0.927272,0.607299,1.399774,1.076689,0.727943,1.198729,0.61141,0.784724
PTS_ece,2.641596,1.991046,6.308228,3.82961,3.04682,4.698709,0.687952,3.811178


In [16]:
print(curr_ece15[['Uncalibrated', 'TS', 'ETS', 'MIR', 'BTS', 'HTS_torch', 'LTS_torch', 'HnLTS_torch', 'PTS', 'PTS_ece']].to_latex()) 

\begin{tabular}{lrrrrrrrrrr}
\toprule
{} &  Uncalibrated &        TS &       ETS &       MIR &       BTS &  HTS\_torch &  LTS\_torch &  HnLTS\_torch &       PTS &   PTS\_ece \\
Model             &               &           &           &           &           &            &            &              &           &           \\
\midrule
densenet-121      &      2.642584 &  1.014121 &  1.797012 &  1.245873 &  0.803925 &   1.161412 &   1.079376 &     1.158244 &  0.927272 &  2.641596 \\
densenet-169      &      2.663559 &  0.829599 &  2.127014 &  1.154840 &  0.916503 &   0.975218 &   0.906155 &     0.912929 &  0.607299 &  1.991046 \\
resnet-50         &     10.686585 &  2.351264 &  2.328939 &  0.769365 &  1.407277 &   1.265405 &   1.270541 &     1.476751 &  1.399774 &  6.308228 \\
resnet-101        &      4.268243 &  1.198918 &  1.173737 &  1.621956 &  1.277797 &   1.220092 &   1.299158 &     1.330338 &  1.076689 &  3.829610 \\
resnext-29\_8x16   &      2.832803 &  0.738372 &  0.568172 &  0.

In [17]:
curr_ece

Unnamed: 0_level_0,Uncalibrated,TS,ETS,MIR,BTS,PTS,PTS_ece,LTS,HTS,HnLTS,LTS_torch,HTS_torch,HnLTS_torch
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
densenet-121,2.83513,1.386325,2.257274,1.425367,1.091841,1.184547,2.841167,1.438661,1.303224,1.349832,1.415263,1.376855,1.342219
densenet-169,2.791077,1.650473,2.30978,1.355161,1.266717,0.84159,2.477552,1.291028,1.293537,1.212882,1.104452,1.034405,1.019813
resnet-50,10.709518,2.544719,2.837071,1.180272,1.864384,1.760154,6.536506,2.52441,2.438013,1.454847,1.428311,1.583076,2.067411
resnet-101,4.450539,1.373523,1.49105,1.927313,1.718641,1.425883,4.061585,1.336975,1.417078,1.696917,1.597964,1.598841,1.410625
resnext-29_8x16,2.883827,1.026017,0.852116,1.092094,1.185582,1.042904,3.167896,1.079328,1.111489,0.927186,1.043953,1.183239,1.140408
vgg-19,4.605082,2.514875,3.304092,1.630511,1.375733,1.478166,4.825529,2.10229,2.151581,1.651571,1.616039,1.735882,1.648723
wide-resnet-28x10,1.929727,0.697262,1.593208,1.268281,1.071903,0.907849,1.049454,0.791909,0.882593,0.727414,0.886443,0.841236,0.879223
wide-resnet-40x10,3.11525,1.041778,1.137043,1.228119,1.275605,1.021832,3.882253,1.069909,1.20681,1.114712,1.104502,1.076345,1.208887


In [18]:
curr_ece[['Uncalibrated', 'TS', 'ETS', 'MIR', 'BTS', 'HTS_torch', 'LTS_torch', 'HnLTS_torch', 'PTS', 'PTS_ece']].transpose().style.apply(highlight_min)

Model,densenet-121,densenet-169,resnet-50,resnet-101,resnext-29_8x16,vgg-19,wide-resnet-28x10,wide-resnet-40x10
Uncalibrated,2.83513,2.791077,10.709518,4.450539,2.883827,4.605082,1.929727,3.11525
TS,1.386325,1.650473,2.544719,1.373523,1.026017,2.514875,0.697262,1.041778
ETS,2.257274,2.30978,2.837071,1.49105,0.852116,3.304092,1.593208,1.137043
MIR,1.425367,1.355161,1.180272,1.927313,1.092094,1.630511,1.268281,1.228119
BTS,1.091841,1.266717,1.864384,1.718641,1.185582,1.375733,1.071903,1.275605
HTS_torch,1.376855,1.034405,1.583076,1.598841,1.183239,1.735882,0.841236,1.076345
LTS_torch,1.415263,1.104452,1.428311,1.597964,1.043953,1.616039,0.886443,1.104502
HnLTS_torch,1.342219,1.019813,2.067411,1.410625,1.140408,1.648723,0.879223,1.208887
PTS,1.184547,0.84159,1.760154,1.425883,1.042904,1.478166,0.907849,1.021832
PTS_ece,2.841167,2.477552,6.536506,4.061585,3.167896,4.825529,1.049454,3.882253


In [19]:
print(curr_ece[['Uncalibrated', 'TS', 'ETS', 'MIR', 'BTS', 'HTS_torch', 'LTS_torch', 'HnLTS_torch', 'PTS', 'PTS_ece']].to_latex()) 

\begin{tabular}{lrrrrrrrrrr}
\toprule
{} &  Uncalibrated &        TS &       ETS &       MIR &       BTS &  HTS\_torch &  LTS\_torch &  HnLTS\_torch &       PTS &   PTS\_ece \\
Model             &               &           &           &           &           &            &            &              &           &           \\
\midrule
densenet-121      &      2.835130 &  1.386325 &  2.257274 &  1.425367 &  1.091841 &   1.376855 &   1.415263 &     1.342219 &  1.184547 &  2.841167 \\
densenet-169      &      2.791077 &  1.650473 &  2.309780 &  1.355161 &  1.266717 &   1.034405 &   1.104452 &     1.019813 &  0.841590 &  2.477552 \\
resnet-50         &     10.709518 &  2.544719 &  2.837071 &  1.180272 &  1.864384 &   1.583076 &   1.428311 &     2.067411 &  1.760154 &  6.536506 \\
resnet-101        &      4.450539 &  1.373523 &  1.491050 &  1.927313 &  1.718641 &   1.598841 &   1.597964 &     1.410625 &  1.425883 &  4.061585 \\
resnext-29\_8x16   &      2.883827 &  1.026017 &  0.852116 &  1.

In [20]:
print(curr_ece[['Uncalibrated', 'TS', 'ETS', 'MIR', 'BTS', 'HTS_torch', 'LTS_torch', 'HnLTS_torch', 'PTS', 'PTS_ece']].to_latex(float_format="%.2f")) 

\begin{tabular}{lrrrrrrrrrr}
\toprule
{} &  Uncalibrated &   TS &  ETS &  MIR &  BTS &  HTS\_torch &  LTS\_torch &  HnLTS\_torch &  PTS &  PTS\_ece \\
Model             &               &      &      &      &      &            &            &              &      &          \\
\midrule
densenet-121      &          2.84 & 1.39 & 2.26 & 1.43 & 1.09 &       1.38 &       1.42 &         1.34 & 1.18 &     2.84 \\
densenet-169      &          2.79 & 1.65 & 2.31 & 1.36 & 1.27 &       1.03 &       1.10 &         1.02 & 0.84 &     2.48 \\
resnet-50         &         10.71 & 2.54 & 2.84 & 1.18 & 1.86 &       1.58 &       1.43 &         2.07 & 1.76 &     6.54 \\
resnet-101        &          4.45 & 1.37 & 1.49 & 1.93 & 1.72 &       1.60 &       1.60 &         1.41 & 1.43 &     4.06 \\
resnext-29\_8x16   &          2.88 & 1.03 & 0.85 & 1.09 & 1.19 &       1.18 &       1.04 &         1.14 & 1.04 &     3.17 \\
vgg-19            &          4.61 & 2.51 & 3.30 & 1.63 & 1.38 &       1.74 &       1.62 &      

In [21]:
curr_nll[['Uncalibrated', 'TS', 'ETS', 'MIR', 'BTS', 'HTS_torch', 'LTS_torch', 'HnLTS_torch', 'PTS', 'PTS_ece']].transpose().style.apply(highlight_min)

Model,densenet-121,densenet-169,resnet-50,resnet-101,resnext-29_8x16,vgg-19,wide-resnet-28x10,wide-resnet-40x10
Uncalibrated,0.188129,0.187019,0.789676,0.30469,0.199952,0.299754,0.149671,0.206792
TS,0.161812,0.160774,0.447289,0.216332,0.162096,0.235521,0.136182,0.163392
ETS,0.167319,0.168556,0.451503,0.219902,0.161691,0.242422,0.142409,0.164236
MIR,0.168084,0.164048,0.443231,0.218557,0.171089,0.225514,0.149453,0.165278
BTS,0.193055,0.236106,0.449789,0.256801,0.204134,0.290246,0.210356,0.183968
HTS_torch,0.160669,0.15866,0.438665,0.215263,0.163187,0.230331,0.135608,0.162632
LTS_torch,0.161062,0.158992,0.444695,0.217394,0.162189,0.233027,0.136367,0.163369
HnLTS_torch,0.160417,0.158389,0.438345,0.214451,0.163423,0.229903,0.135501,0.163305
PTS,0.158585,0.153747,0.444679,0.216955,0.162256,0.229274,0.134573,0.160989
PTS_ece,0.220024,0.230503,0.716711,0.35526,inf,0.374395,0.136754,0.315521


In [22]:
print(curr_nll[['Uncalibrated', 'TS', 'ETS', 'MIR', 'BTS', 'HTS_torch', 'LTS_torch', 'HnLTS_torch', 'PTS', 'PTS_ece']].to_latex(float_format="%.4f")) 

\begin{tabular}{lrrrrrrrrrr}
\toprule
{} &  Uncalibrated &     TS &    ETS &    MIR &    BTS &  HTS\_torch &  LTS\_torch &  HnLTS\_torch &    PTS &  PTS\_ece \\
Model             &               &        &        &        &        &            &            &              &        &          \\
\midrule
densenet-121      &        0.1881 & 0.1618 & 0.1673 & 0.1681 & 0.1931 &     0.1607 &     0.1611 &       0.1604 & 0.1586 &   0.2200 \\
densenet-169      &        0.1870 & 0.1608 & 0.1686 & 0.1640 & 0.2361 &     0.1587 &     0.1590 &       0.1584 & 0.1537 &   0.2305 \\
resnet-50         &        0.7897 & 0.4473 & 0.4515 & 0.4432 & 0.4498 &     0.4387 &     0.4447 &       0.4383 & 0.4447 &   0.7167 \\
resnet-101        &        0.3047 & 0.2163 & 0.2199 & 0.2186 & 0.2568 &     0.2153 &     0.2174 &       0.2145 & 0.2170 &   0.3553 \\
resnext-29\_8x16   &        0.2000 & 0.1621 & 0.1617 & 0.1711 & 0.2041 &     0.1632 &     0.1622 &       0.1634 & 0.1623 &      inf \\
vgg-19            &      

In [23]:
curr_bri[['Uncalibrated', 'TS', 'ETS', 'MIR', 'BTS', 'HTS_torch', 'LTS_torch', 'HnLTS_torch', 'PTS', 'PTS_ece']].transpose().style.apply(highlight_min)

Model,densenet-121,densenet-169,resnet-50,resnet-101,resnext-29_8x16,vgg-19,wide-resnet-28x10,wide-resnet-40x10
Uncalibrated,0.076359,0.075375,0.239194,0.110169,0.082816,0.110094,0.062887,0.081958
TS,0.072888,0.071723,0.203659,0.101096,0.078307,0.101859,0.060798,0.076777
ETS,0.07244,0.07121,0.202883,0.101027,0.078233,0.100686,0.060941,0.076449
MIR,0.072387,0.071438,0.202246,0.101698,0.078487,0.099821,0.061136,0.076852
BTS,0.072232,0.071064,0.203574,0.101902,0.079086,0.100335,0.061111,0.077036
HTS_torch,0.07244,0.07094,0.201083,0.100291,0.078223,0.099873,0.060974,0.07659
LTS_torch,0.07257,0.071171,0.202631,0.101085,0.078369,0.100529,0.060711,0.076711
HnLTS_torch,0.072449,0.070962,0.201293,0.100341,0.078334,0.100022,0.060561,0.076668
PTS,0.071984,0.070466,0.202632,0.100761,0.077998,0.099758,0.060561,0.07642
PTS_ece,0.077213,0.07529,0.224219,0.110799,0.086058,0.11255,0.060982,0.087129


In [24]:
print(curr_bri[['Uncalibrated', 'TS', 'ETS', 'MIR', 'BTS', 'HTS_torch', 'LTS_torch', 'HnLTS_torch', 'PTS', 'PTS_ece']].to_latex(float_format="%.4f")) 

\begin{tabular}{lrrrrrrrrrr}
\toprule
{} &  Uncalibrated &     TS &    ETS &    MIR &    BTS &  HTS\_torch &  LTS\_torch &  HnLTS\_torch &    PTS &  PTS\_ece \\
Model             &               &        &        &        &        &            &            &              &        &          \\
\midrule
densenet-121      &        0.0764 & 0.0729 & 0.0724 & 0.0724 & 0.0722 &     0.0724 &     0.0726 &       0.0724 & 0.0720 &   0.0772 \\
densenet-169      &        0.0754 & 0.0717 & 0.0712 & 0.0714 & 0.0711 &     0.0709 &     0.0712 &       0.0710 & 0.0705 &   0.0753 \\
resnet-50         &        0.2392 & 0.2037 & 0.2029 & 0.2022 & 0.2036 &     0.2011 &     0.2026 &       0.2013 & 0.2026 &   0.2242 \\
resnet-101        &        0.1102 & 0.1011 & 0.1010 & 0.1017 & 0.1019 &     0.1003 &     0.1011 &       0.1003 & 0.1008 &   0.1108 \\
resnext-29\_8x16   &        0.0828 & 0.0783 & 0.0782 & 0.0785 & 0.0791 &     0.0782 &     0.0784 &       0.0783 & 0.0780 &   0.0861 \\
vgg-19            &      

### CIFAR 100

In [25]:
curr_ece = ECE.loc['cifar100']
curr_nll = NLL.loc['cifar100']
curr_bri = Bri.loc['cifar100']

pd.concat([curr_ece, curr_nll, curr_bri], axis=1, keys=['ECE', 'NLL', 'Brier'], names=['Metric', 'Model'])

Metric,ECE,ECE,ECE,ECE,ECE,ECE,ECE,ECE,ECE,ECE,...,Brier,Brier,Brier,Brier,Brier,Brier,Brier,Brier,Brier,Brier
Model,Uncalibrated,TS,ETS,MIR,BTS,PTS,PTS_ece,LTS,HTS,HnLTS,...,MIR,BTS,PTS,PTS_ece,LTS,HTS,HnLTS,LTS_torch,HTS_torch,HnLTS_torch
Model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
densenet-121,8.76025,3.93363,3.039655,4.738307,2.768561,3.210199,2.482668,4.198,4.109172,3.313878,...,0.304987,0.305248,0.302494,0.30209,0.303898,0.303952,0.304911,0.304975,0.303184,0.303607
densenet-169,8.932411,3.948774,2.897083,4.996996,3.187847,3.190743,2.525341,4.229603,4.173203,3.314126,...,0.301862,0.301962,0.29791,0.298055,0.299873,0.299932,0.301612,0.301661,0.299293,0.299539
resnet-101,11.446499,2.246798,2.243738,3.895105,2.224123,1.888606,2.278895,2.375905,2.653914,2.268631,...,0.384248,0.38247,0.380529,0.381305,0.382163,0.381698,0.381698,0.38173,0.382068,0.381372
resnext-29_8x16,9.692425,3.139508,2.675645,4.209867,2.061077,1.876482,2.739559,3.251355,3.519886,2.021386,...,0.31162,0.309646,0.307013,0.308122,0.309433,0.309965,0.309001,0.309026,0.308849,0.309656
vgg-19,17.631318,5.133481,5.364392,4.147693,3.885672,3.311389,4.133892,3.728082,3.548877,3.621421,...,0.388874,0.388022,0.385095,0.386259,0.386526,0.38826,0.389501,0.389665,0.386308,0.387827
wide-resnet-28x10,5.187933,4.629842,3.546959,3.919492,3.105283,3.358554,4.575089,4.603869,4.612102,3.57485,...,0.285417,0.287132,0.283104,0.288369,0.285669,0.285106,0.287185,0.287206,0.284323,0.283972
wide-resnet-40x10,14.784526,4.201723,2.739724,5.343658,3.550641,4.049001,2.601632,4.425745,4.170606,3.730715,...,0.330068,0.329314,0.324527,0.322819,0.324544,0.32399,0.32859,0.328738,0.324243,0.323987


In [26]:
curr_ece[['Uncalibrated', 'TS', 'ETS', 'MIR', 'BTS', 'HTS_torch', 'LTS_torch', 'HnLTS_torch', 'PTS', 'PTS_ece']].transpose().style.apply(highlight_min)

Model,densenet-121,densenet-169,resnet-101,resnext-29_8x16,vgg-19,wide-resnet-28x10,wide-resnet-40x10
Uncalibrated,8.76025,8.932411,11.446499,9.692425,17.631318,5.187933,14.784526
TS,3.93363,3.948774,2.246798,3.139508,5.133481,4.629842,4.201723
ETS,3.039655,2.897083,2.243738,2.675645,5.364392,3.546959,2.739724
MIR,4.738307,4.996996,3.895105,4.209867,4.147693,3.919492,5.343658
BTS,2.768561,3.187847,2.224123,2.061077,3.885672,3.105283,3.550641
HTS_torch,3.480141,3.746898,2.252193,2.738368,3.65083,3.867962,4.004588
LTS_torch,3.268709,3.361696,2.286849,2.10388,3.776828,3.542564,3.80173
HnLTS_torch,3.244991,3.694206,2.351044,2.6723,3.588133,3.712199,3.978157
PTS,3.210199,3.190743,1.888606,1.876482,3.311389,3.358554,4.049001
PTS_ece,2.482668,2.525341,2.278895,2.739559,4.133892,4.575089,2.601632


In [27]:
print(curr_ece[['Uncalibrated', 'TS', 'ETS', 'MIR', 'BTS', 'HTS_torch', 'LTS_torch', 'HnLTS_torch', 'PTS', 'PTS_ece']].to_latex(float_format="%.2f")) 

\begin{tabular}{lrrrrrrrrrr}
\toprule
{} &  Uncalibrated &   TS &  ETS &  MIR &  BTS &  HTS\_torch &  LTS\_torch &  HnLTS\_torch &  PTS &  PTS\_ece \\
Model             &               &      &      &      &      &            &            &              &      &          \\
\midrule
densenet-121      &          8.76 & 3.93 & 3.04 & 4.74 & 2.77 &       3.48 &       3.27 &         3.24 & 3.21 &     2.48 \\
densenet-169      &          8.93 & 3.95 & 2.90 & 5.00 & 3.19 &       3.75 &       3.36 &         3.69 & 3.19 &     2.53 \\
resnet-101        &         11.45 & 2.25 & 2.24 & 3.90 & 2.22 &       2.25 &       2.29 &         2.35 & 1.89 &     2.28 \\
resnext-29\_8x16   &          9.69 & 3.14 & 2.68 & 4.21 & 2.06 &       2.74 &       2.10 &         2.67 & 1.88 &     2.74 \\
vgg-19            &         17.63 & 5.13 & 5.36 & 4.15 & 3.89 &       3.65 &       3.78 &         3.59 & 3.31 &     4.13 \\
wide-resnet-28x10 &          5.19 & 4.63 & 3.55 & 3.92 & 3.11 &       3.87 &       3.54 &      

In [28]:
curr_nll[['Uncalibrated', 'TS', 'ETS', 'MIR', 'BTS', 'HTS_torch', 'LTS_torch', 'HnLTS_torch', 'PTS', 'PTS_ece']].transpose().style.apply(highlight_min)

Model,densenet-121,densenet-169,resnet-101,resnext-29_8x16,vgg-19,wide-resnet-28x10,wide-resnet-40x10
Uncalibrated,0.893868,0.874816,1.13431,0.939844,1.541421,0.817338,1.224773
TS,0.835487,0.81557,1.000677,0.822038,1.199659,0.813459,0.905464
ETS,0.859196,0.832331,1.009053,0.839845,1.206897,0.830835,0.926263
MIR,0.837843,0.816676,1.010458,0.833204,1.18649,0.799622,0.925424
BTS,0.827085,0.808159,1.00396,0.818997,1.192554,0.787617,0.903724
HTS_torch,0.819009,0.801315,1.006663,0.815291,1.175813,0.778474,0.883636
LTS_torch,0.825375,0.806211,1.00022,0.812815,1.194054,0.78708,0.899391
HnLTS_torch,0.820993,0.802342,1.003576,0.818284,1.182075,0.777067,0.883699
PTS,0.815834,0.792558,0.997198,0.806017,1.1747,0.772173,0.883559
PTS_ece,0.829434,0.808984,1.001982,0.8148,1.191896,0.81283,0.892132


In [29]:
print(curr_nll[['Uncalibrated', 'TS', 'ETS', 'MIR', 'BTS', 'HTS_torch', 'LTS_torch', 'HnLTS_torch', 'PTS', 'PTS_ece']].to_latex(float_format="%.4f")) 

\begin{tabular}{lrrrrrrrrrr}
\toprule
{} &  Uncalibrated &     TS &    ETS &    MIR &    BTS &  HTS\_torch &  LTS\_torch &  HnLTS\_torch &    PTS &  PTS\_ece \\
Model             &               &        &        &        &        &            &            &              &        &          \\
\midrule
densenet-121      &        0.8939 & 0.8355 & 0.8592 & 0.8378 & 0.8271 &     0.8190 &     0.8254 &       0.8210 & 0.8158 &   0.8294 \\
densenet-169      &        0.8748 & 0.8156 & 0.8323 & 0.8167 & 0.8082 &     0.8013 &     0.8062 &       0.8023 & 0.7926 &   0.8090 \\
resnet-101        &        1.1343 & 1.0007 & 1.0091 & 1.0105 & 1.0040 &     1.0067 &     1.0002 &       1.0036 & 0.9972 &   1.0020 \\
resnext-29\_8x16   &        0.9398 & 0.8220 & 0.8398 & 0.8332 & 0.8190 &     0.8153 &     0.8128 &       0.8183 & 0.8060 &   0.8148 \\
vgg-19            &        1.5414 & 1.1997 & 1.2069 & 1.1865 & 1.1926 &     1.1758 &     1.1941 &       1.1821 & 1.1747 &   1.1919 \\
wide-resnet-28x10 &      

In [30]:
curr_bri[['Uncalibrated', 'TS', 'ETS', 'MIR', 'BTS', 'HTS_torch', 'LTS_torch', 'HnLTS_torch', 'PTS', 'PTS_ece']].transpose().style.apply(highlight_min)

Model,densenet-121,densenet-169,resnet-101,resnext-29_8x16,vgg-19,wide-resnet-28x10,wide-resnet-40x10
Uncalibrated,0.317058,0.314196,0.405327,0.327484,0.443283,0.289225,0.370023
TS,0.304773,0.30167,0.381723,0.309558,0.391811,0.288609,0.327633
ETS,0.304354,0.301037,0.381624,0.309294,0.391021,0.287947,0.326992
MIR,0.304987,0.301862,0.384248,0.31162,0.388874,0.285417,0.330068
BTS,0.305248,0.301962,0.38247,0.309646,0.388022,0.287132,0.329314
HTS_torch,0.303184,0.299293,0.382068,0.308849,0.386308,0.284323,0.324243
LTS_torch,0.304975,0.301661,0.38173,0.309026,0.389665,0.287206,0.328738
HnLTS_torch,0.303607,0.299539,0.381372,0.309656,0.387827,0.283972,0.323987
PTS,0.302494,0.29791,0.380529,0.307013,0.385095,0.283104,0.324527
PTS_ece,0.30209,0.298055,0.381305,0.308122,0.386259,0.288369,0.322819


In [31]:
print(curr_bri[['Uncalibrated', 'TS', 'ETS', 'MIR', 'BTS', 'HTS_torch', 'LTS_torch', 'HnLTS_torch', 'PTS', 'PTS_ece']].to_latex(float_format="%.4f")) 

\begin{tabular}{lrrrrrrrrrr}
\toprule
{} &  Uncalibrated &     TS &    ETS &    MIR &    BTS &  HTS\_torch &  LTS\_torch &  HnLTS\_torch &    PTS &  PTS\_ece \\
Model             &               &        &        &        &        &            &            &              &        &          \\
\midrule
densenet-121      &        0.3171 & 0.3048 & 0.3044 & 0.3050 & 0.3052 &     0.3032 &     0.3050 &       0.3036 & 0.3025 &   0.3021 \\
densenet-169      &        0.3142 & 0.3017 & 0.3010 & 0.3019 & 0.3020 &     0.2993 &     0.3017 &       0.2995 & 0.2979 &   0.2981 \\
resnet-101        &        0.4053 & 0.3817 & 0.3816 & 0.3842 & 0.3825 &     0.3821 &     0.3817 &       0.3814 & 0.3805 &   0.3813 \\
resnext-29\_8x16   &        0.3275 & 0.3096 & 0.3093 & 0.3116 & 0.3096 &     0.3088 &     0.3090 &       0.3097 & 0.3070 &   0.3081 \\
vgg-19            &        0.4433 & 0.3918 & 0.3910 & 0.3889 & 0.3880 &     0.3863 &     0.3897 &       0.3878 & 0.3851 &   0.3863 \\
wide-resnet-28x10 &      

# OLD

### Cars

In [90]:
curr_ece = ECE.loc[ECE.Dataset=='cars'].set_index('Model')
curr_nll = NLL.loc[ECE.Dataset=='cars'].set_index('Model')
curr_bri = Bri.loc[ECE.Dataset=='cars'].set_index('Model')

pd.concat([curr_ece, curr_nll, curr_bri], axis=1, keys=['ECE', 'NLL', 'Brier'], names=['Metric', 'Model'])

Metric,ECE,ECE,ECE,ECE,ECE,ECE,ECE,ECE,ECE,ECE,...,Brier,Brier,Brier,Brier,Brier,Brier,Brier,Brier,Brier,Brier
Model,Dataset,Uncalibrated,TS,ETS,MIR,BTS,PTS,PTS_ece,LTS,HTS,...,MIR,BTS,PTS,PTS_ece,LTS,HTS,HnLTS,LTS_torch,HTS_torch,HnLTS_torch
Model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
densenet-121,cars,5.865746,2.342571,1.795338,2.906047,2.161027,1.7222,2.516041,4.381359,3.024916,...,0.166519,0.166408,0.164753,0.16541,0.177144,0.167052,0.164132,0.171022,0.176489,0.167648
densenet-169,cars,5.821673,2.380512,1.952333,3.06458,2.568552,2.625037,3.4139,4.633774,3.037256,...,0.165727,0.167407,0.162831,0.165174,0.17878,0.168812,0.162922,0.170277,0.17912,0.169025
resnet-18,cars,7.038802,1.87421,2.563862,3.184676,2.807826,2.427751,6.532263,4.661463,3.046411,...,0.196493,0.199192,0.196377,0.206226,0.208044,0.197072,0.194221,0.201498,0.208392,0.197337
resnet-50,cars,5.190265,2.481779,1.506835,2.479405,2.61041,2.994893,2.302823,4.79106,2.588476,...,0.1539,0.156919,0.153924,0.15392,0.167468,0.155652,0.153273,0.157945,0.169409,0.156156
resnet-101,cars,5.400409,2.312754,1.797904,2.318862,2.133736,1.875768,7.291529,3.983332,2.61231,...,0.154523,0.157109,0.154183,0.173008,0.16668,0.155528,0.15295,0.158828,0.167641,0.15604


In [91]:
curr_ece.drop('Dataset', axis=1).drop(columns=['LTS', 'HTS', 'HnLTS']).transpose().style.apply(highlight_min)

Model,densenet-121,densenet-169,resnet-18,resnet-50,resnet-101
Uncalibrated,5.865746,5.821673,7.038802,5.190265,5.400409
TS,2.342571,2.380512,1.87421,2.481779,2.312754
ETS,1.795338,1.952333,2.563862,1.506835,1.797904
MIR,2.906047,3.06458,3.184676,2.479405,2.318862
BTS,2.161027,2.568552,2.807826,2.61041,2.133736
PTS,1.7222,2.625037,2.427751,2.994893,1.875768
PTS_ece,2.516041,3.4139,6.532263,2.302823,7.291529
LTS_torch,3.675098,4.013015,4.479547,3.125023,3.089634
HTS_torch,4.126354,4.673913,4.77063,5.12111,4.588614
HnLTS_torch,2.704132,3.029084,2.932321,2.647035,2.569912


In [92]:
curr_nll.drop('Dataset', axis=1).drop(columns=['LTS', 'HTS', 'HnLTS']).transpose().style.apply(highlight_min)

Model,densenet-121,densenet-169,resnet-18,resnet-50,resnet-101
Uncalibrated,0.533742,0.534814,0.635785,0.493354,0.49331
TS,0.417259,0.416619,0.492074,0.390955,0.384694
ETS,0.441761,0.434734,0.512542,0.396151,0.395235
MIR,0.425016,0.445568,0.521404,0.42189,0.401355
BTS,0.504977,0.510134,0.664115,0.607957,0.470656
PTS,0.415976,0.412732,0.50511,0.408865,0.384875
PTS_ece,0.418281,0.443467,0.638041,0.388326,0.67818
LTS_torch,0.456298,0.455783,0.541167,0.421261,0.41603
HTS_torch,0.575168,0.850279,0.747025,1.067392,0.645671
HnLTS_torch,0.44697,0.475942,0.53684,0.418875,0.407852


In [93]:
curr_bri.drop('Dataset', axis=1).drop(columns=['LTS', 'HTS', 'HnLTS']).transpose().style.apply(highlight_min)

Model,densenet-121,densenet-169,resnet-18,resnet-50,resnet-101
Uncalibrated,0.17345,0.172543,0.207543,0.160202,0.16174
TS,0.164603,0.163421,0.194467,0.154095,0.15365
ETS,0.164872,0.162912,0.194731,0.153287,0.153035
MIR,0.166519,0.165727,0.196493,0.1539,0.154523
BTS,0.166408,0.167407,0.199192,0.156919,0.157109
PTS,0.164753,0.162831,0.196377,0.153924,0.154183
PTS_ece,0.16541,0.165174,0.206226,0.15392,0.173008
LTS_torch,0.171022,0.170277,0.201498,0.157945,0.158828
HTS_torch,0.176489,0.17912,0.208392,0.169409,0.167641
HnLTS_torch,0.167648,0.169025,0.197337,0.156156,0.15604


### Birds

In [94]:
curr_ece = ECE.loc[ECE.Dataset=='birds'].set_index('Model')
curr_nll = NLL.loc[ECE.Dataset=='birds'].set_index('Model')
curr_bri = Bri.loc[ECE.Dataset=='birds'].set_index('Model')

pd.concat([curr_ece, curr_nll, curr_bri], axis=1, keys=['ECE', 'NLL', 'Brier'], names=['Metric', 'Model'])

Metric,ECE,ECE,ECE,ECE,ECE,ECE,ECE,ECE,ECE,ECE,...,Brier,Brier,Brier,Brier,Brier,Brier,Brier,Brier,Brier,Brier
Model,Dataset,Uncalibrated,TS,ETS,MIR,BTS,PTS,PTS_ece,LTS,HTS,...,MIR,BTS,PTS,PTS_ece,LTS,HTS,HnLTS,LTS_torch,HTS_torch,HnLTS_torch
Model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
densenet-121,birds,12.42989,2.940008,3.177977,4.306846,3.561389,2.89036,14.38883,7.703473,4.586972,...,0.327244,0.329046,0.322501,0.365645,0.355864,0.329576,0.323027,0.330846,0.35967,0.331048
densenet-169,birds,12.649694,3.089719,3.098738,4.596288,4.548998,2.559659,6.301814,8.913095,4.38746,...,0.31899,0.324452,0.314553,0.321666,0.350502,0.321184,0.314144,0.322514,0.358431,0.323444
resnet-18,birds,15.638467,3.39792,3.834035,6.047435,3.937566,3.353479,21.639745,9.101882,5.018642,...,0.376872,0.374576,0.370457,0.47265,0.410515,0.377509,0.370251,0.381369,0.414287,0.378883
resnet-50,birds,12.986916,3.416744,3.209112,5.21423,4.291787,2.907789,19.694574,7.382448,4.083497,...,0.325711,0.329486,0.32001,0.422586,0.348203,0.322907,0.32132,0.331713,0.447188,0.323952
resnet-101,birds,12.642524,2.837463,3.192655,5.38526,3.864756,4.764418,14.319949,8.876726,4.184777,...,0.310454,0.311545,0.307426,0.349501,0.339734,0.309445,0.305333,0.31491,0.342638,0.312077


In [95]:
curr_ece.drop('Dataset', axis=1).drop(columns=['LTS', 'HTS', 'HnLTS']).transpose().style.apply(highlight_min)

Model,densenet-121,densenet-169,resnet-18,resnet-50,resnet-101
Uncalibrated,12.42989,12.649694,15.638467,12.986916,12.642524
TS,2.940008,3.089719,3.39792,3.416744,2.837463
ETS,3.177977,3.098738,3.834035,3.209112,3.192655
MIR,4.306846,4.596288,6.047435,5.21423,5.38526
BTS,3.561389,4.548998,3.937566,4.291787,3.864756
PTS,2.89036,2.559659,3.353479,2.907789,4.764418
PTS_ece,14.38883,6.301814,21.639745,19.694574,14.319949
LTS_torch,6.408002,5.917763,7.439505,6.942747,6.569755
HTS_torch,9.106183,11.193807,10.808414,24.153763,9.985612
HnLTS_torch,4.377864,4.90295,5.234135,3.798688,4.132398


In [96]:
curr_nll.drop('Dataset', axis=1).drop(columns=['LTS', 'HTS', 'HnLTS']).transpose().style.apply(highlight_min)

Model,densenet-121,densenet-169,resnet-18,resnet-50,resnet-101
Uncalibrated,1.196887,1.184059,1.450385,1.200745,1.169428
TS,0.900072,0.867165,1.035979,0.878792,0.837691
ETS,0.901693,0.867903,1.040046,0.874402,0.839318
MIR,0.978267,0.899091,1.086031,0.94065,0.87099
BTS,1.159346,1.269112,1.141942,1.209747,1.108864
PTS,0.895374,0.868241,1.033661,0.867702,0.85868
PTS_ece,1.378288,0.939876,inf,inf,1.351493
LTS_torch,0.950042,0.909707,1.089677,0.921009,0.890148
HTS_torch,inf,inf,inf,1.41588,inf
HnLTS_torch,1.010287,0.984211,1.135566,0.918255,0.890713


In [97]:
curr_bri.drop('Dataset', axis=1).drop(columns=['LTS', 'HTS', 'HnLTS']).transpose().style.apply(highlight_min)

Model,densenet-121,densenet-169,resnet-18,resnet-50,resnet-101
Uncalibrated,0.352798,0.347038,0.417436,0.358351,0.337751
TS,0.323056,0.314235,0.370182,0.321053,0.305229
ETS,0.323283,0.31454,0.371532,0.321442,0.305637
MIR,0.327244,0.31899,0.376872,0.325711,0.310454
BTS,0.329046,0.324452,0.374576,0.329486,0.311545
PTS,0.322501,0.314553,0.370457,0.32001,0.307426
PTS_ece,0.365645,0.321666,0.47265,0.422586,0.349501
LTS_torch,0.330846,0.322514,0.381369,0.331713,0.31491
HTS_torch,0.35967,0.358431,0.414287,0.447188,0.342638
HnLTS_torch,0.331048,0.323444,0.378883,0.323952,0.312077


### SVHN

In [88]:
curr_ece = ECE.loc[ECE.Dataset=='svhn'].set_index('Model')
curr_nll = NLL.loc[ECE.Dataset=='svhn'].set_index('Model')
curr_bri = Bri.loc[ECE.Dataset=='svhn'].set_index('Model')

pd.concat([curr_ece, curr_nll, curr_bri], axis=1, keys=['ECE', 'NLL', 'Brier'], names=['Metric', 'Model'])

Metric,ECE,ECE,ECE,ECE,ECE,ECE,ECE,ECE,ECE,ECE,...,Brier,Brier,Brier,Brier,Brier,Brier,Brier,Brier,Brier,Brier
Model,Dataset,Uncalibrated,TS,ETS,MIR,BTS,PTS,PTS_ece,LTS,HTS,...,MIR,BTS,PTS,PTS_ece,LTS,HTS,HnLTS,LTS_torch,HTS_torch,HnLTS_torch
Model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
densenet-121,svhn,2.0588,1.445866,2.573994,0.845361,0.724117,1.095324,2.638281,1.677259,1.396225,...,0.052341,0.052084,0.052191,0.058026,0.052383,0.052558,0.052127,0.052376,0.052118,0.052371
densenet-169,svhn,0.491088,1.037124,1.156383,1.016361,0.933092,0.85207,1.198361,0.954254,1.266279,...,0.05232,0.052088,0.0519,0.052059,0.051998,0.052255,0.051816,0.051553,0.051911,0.051669
resnet-50,svhn,0.956623,1.106313,1.449459,1.04481,1.006148,1.011055,2.622655,1.043941,1.20205,...,0.055794,0.055683,0.055442,0.061196,0.05554,0.0557,0.055496,0.055381,0.055474,0.055433
wide-resnet-28x10,svhn,1.553618,1.083106,1.589714,0.90352,1.095303,1.033192,2.287509,1.062386,0.845815,...,0.052946,0.053085,0.052773,0.057173,0.052627,0.05264,0.052786,0.052928,0.052653,0.052729
wide-resnet-40x10,svhn,1.330812,1.27623,2.535067,0.853099,0.880129,0.969191,1.549845,1.257521,1.467862,...,0.048224,0.048211,0.047818,0.048508,0.048138,0.04809,0.048094,0.048317,0.048017,0.048169


In [89]:
curr_ece.drop('Dataset', axis=1).drop(columns=['LTS', 'HTS', 'HnLTS']).transpose().style.apply(highlight_min)

Model,densenet-121,densenet-169,resnet-50,wide-resnet-28x10,wide-resnet-40x10
Uncalibrated,2.0588,0.491088,0.956623,1.553618,1.330812
TS,1.445866,1.037124,1.106313,1.083106,1.27623
ETS,2.573994,1.156383,1.449459,1.589714,2.535067
MIR,0.845361,1.016361,1.04481,0.90352,0.853099
BTS,0.724117,0.933092,1.006148,1.095303,0.880129
PTS,1.095324,0.85207,1.011055,1.033192,0.969191
PTS_ece,2.638281,1.198361,2.622655,2.287509,1.549845
LTS_torch,1.177281,0.798178,0.968126,1.093625,1.237495
HTS_torch,0.979599,0.996584,1.043561,1.039999,1.017069
HnLTS_torch,1.229644,0.790663,1.008837,1.010006,1.074584


In [82]:
curr_nll.drop('Dataset', axis=1).drop(columns=['LTS', 'HTS', 'HnLTS']).transpose().style.apply(highlight_min)

Model,densenet-121,densenet-169,resnet-50,wide-resnet-28x10,wide-resnet-40x10
Uncalibrated,0.159902,0.127533,0.140505,0.141678,0.128077
TS,0.141412,0.12926,0.140116,0.134269,0.126885
ETS,0.151157,0.129777,0.14207,0.13757,0.137088
MIR,0.138444,0.130222,0.143252,0.133155,0.124451
BTS,0.168346,0.152188,0.151398,0.145151,0.136512
PTS,0.139548,0.128507,0.138841,0.133768,0.125052
PTS_ece,0.243524,0.138088,0.269564,0.182695,0.127343
LTS_torch,0.140261,0.127443,0.138837,0.133522,0.126197
HTS_torch,0.139961,0.128683,0.139213,0.133058,0.125757
HnLTS_torch,0.14022,0.127803,0.139209,0.132619,0.125048


In [81]:
curr_bri.drop('Dataset', axis=1).drop(columns=['LTS', 'HTS', 'HnLTS']).transpose().style.apply(highlight_min)

Model,densenet-121,densenet-169,resnet-50,wide-resnet-28x10,wide-resnet-40x10
Uncalibrated,0.054979,0.051542,0.05565,0.054144,0.049306
TS,0.052406,0.052033,0.055628,0.052815,0.048192
ETS,0.052608,0.052127,0.055899,0.053107,0.048666
MIR,0.052341,0.05232,0.055794,0.052946,0.048224
BTS,0.052084,0.052088,0.055683,0.053085,0.048211
PTS,0.052191,0.0519,0.055442,0.052773,0.047818
PTS_ece,0.058026,0.052059,0.061196,0.057173,0.048508
LTS_torch,0.052376,0.051553,0.055381,0.052928,0.048317
HTS_torch,0.052118,0.051911,0.055474,0.052653,0.048017
HnLTS_torch,0.052371,0.051669,0.055433,0.052729,0.048169
