In [1]:
import os
import sys
import time
sys.path.extend(['..'])

import torch
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

In [2]:
plt.rc('text', usetex=True)
plt.rc('text.latex', preamble=r'\usepackage{amsmath} \usepackage{amssymb}')

In [3]:
res_path = '../../results/pretrained'

ece_file_reg = 'logitsJuan_ECE_reg.csv'
nll_file_reg = 'logitsJuan_NLL_reg.csv'
bri_file_reg = 'logitsJuan_BRI_reg.csv'

In [4]:
ECE = pd.read_csv(os.path.join(res_path, ece_file_reg)).drop(['Unnamed: 0'], axis=1)
NLL = pd.read_csv(os.path.join(res_path, nll_file_reg)).drop(['Unnamed: 0'], axis=1)
Bri = pd.read_csv(os.path.join(res_path, bri_file_reg)).drop(['Unnamed: 0'], axis=1)

In [5]:
datasets = ECE['Dataset'].unique()
print(datasets)

['cifar10' 'cifar100' 'cars' 'birds' 'svhn']


In [6]:
def highlight_min(s):
    '''
    highlight the minimum.
    '''
    is_max = s == s.min()
    return ['font-weight: bold' if v else '' for v in is_max]

In [7]:
def highlight_min_br(s):
    print(s['ECE'])
    
    
    s['ECE'].style.apply(highlight_min, subset=s.select_dtypes(float))
    s['NLL'].style.apply(highlight_min, subset=s.select_dtypes(float))
    s['Brier'].style.apply(highlight_min, subset=s.select_dtypes(float))
      
    return s

### CIFAR 10

In [8]:
curr_ece = ECE.loc[ECE.Dataset=='cifar10'].set_index('Model')
curr_nll = NLL.loc[ECE.Dataset=='cifar10'].set_index('Model')
curr_bri = Bri.loc[ECE.Dataset=='cifar10'].set_index('Model')

In [9]:
curr_ece.drop('Dataset', axis=1).transpose().style.apply(highlight_min)

Model,densenet-121,densenet-169,resnet-101,resnext-29_8x16,vgg-19,wide-resnet-28x10,wide-resnet-40x10
Uncalibrated,2.83513,2.791077,4.450539,2.883827,4.605082,1.929727,3.11525
TS,1.386325,1.650473,1.373523,1.026017,2.514875,0.697262,1.041778
ETS,2.257274,2.30978,1.49105,0.852116,3.304092,1.593208,1.137043
MIR,1.425373,1.355157,1.917205,1.076754,1.63063,1.268224,1.237285
BTS,1.091841,1.266717,1.718641,1.185582,1.375733,1.071903,1.275605
PTS,1.324239,1.051156,1.593318,1.145192,1.787131,1.078594,1.474918
LTS,1.438661,1.291028,1.336975,1.079328,2.10229,0.791909,1.069909
HTS,1.270865,1.297424,1.466046,1.195204,2.102406,0.83993,1.255213
HnLTS,1.349832,1.212882,1.696917,0.927186,1.651571,0.727414,1.114712
LTS_torch,1.485014,1.134628,1.649367,1.069985,1.748888,0.747242,1.133219


In [10]:
print(curr_ece.drop('Dataset', axis=1).transpose().to_latex()) 

\begin{tabular}{lrrrrrrr}
\toprule
Model &  densenet-121 &  densenet-169 &  resnet-101 &  resnext-29\_8x16 &    vgg-19 &  wide-resnet-28x10 &  wide-resnet-40x10 \\
\midrule
Uncalibrated &      2.835130 &      2.791077 &    4.450539 &         2.883827 &  4.605082 &           1.929727 &           3.115250 \\
TS           &      1.386325 &      1.650473 &    1.373523 &         1.026017 &  2.514875 &           0.697262 &           1.041778 \\
ETS          &      2.257274 &      2.309780 &    1.491050 &         0.852116 &  3.304092 &           1.593208 &           1.137043 \\
MIR          &      1.425373 &      1.355157 &    1.917205 &         1.076754 &  1.630630 &           1.268224 &           1.237285 \\
BTS          &      1.091841 &      1.266717 &    1.718641 &         1.185582 &  1.375733 &           1.071903 &           1.275605 \\
PTS          &      1.324239 &      1.051156 &    1.593318 &         1.145192 &  1.787131 &           1.078594 &           1.474918 \\
LTS          &   

In [11]:
curr_nll.drop('Dataset', axis=1).transpose().style.apply(highlight_min)

Model,densenet-121,densenet-169,resnet-101,resnext-29_8x16,vgg-19,wide-resnet-28x10,wide-resnet-40x10
Uncalibrated,0.188129,0.187019,0.30469,0.199952,0.299754,0.149671,0.206792
TS,0.161812,0.160774,0.216332,0.162096,0.235521,0.136182,0.163392
ETS,0.167319,0.168556,0.219902,0.161691,0.242422,0.142409,0.164236
MIR,0.162327,0.160594,0.218557,0.167636,0.225514,0.142545,0.165278
BTS,0.193055,0.236106,0.256801,0.204134,0.290246,0.210356,0.183968
PTS,0.161118,0.159222,0.220177,0.171337,0.233324,0.138533,0.188751
LTS,0.1599,0.159712,0.213228,0.162771,0.231287,0.135062,0.163067
HTS,0.160745,0.159368,0.213354,0.164026,0.231543,0.13508,0.163423
HnLTS,0.161326,0.15917,0.217995,0.162159,0.233461,0.136649,0.163337
LTS_torch,0.160971,0.158987,0.217037,0.162298,0.23292,0.136206,0.163509


In [12]:
print(curr_nll.drop('Dataset', axis=1).transpose().to_latex()) 

\begin{tabular}{lrrrrrrr}
\toprule
Model &  densenet-121 &  densenet-169 &  resnet-101 &  resnext-29\_8x16 &    vgg-19 &  wide-resnet-28x10 &  wide-resnet-40x10 \\
\midrule
Uncalibrated &      0.188129 &      0.187019 &    0.304690 &         0.199952 &  0.299754 &           0.149671 &           0.206792 \\
TS           &      0.161812 &      0.160774 &    0.216332 &         0.162096 &  0.235521 &           0.136182 &           0.163392 \\
ETS          &      0.167319 &      0.168556 &    0.219902 &         0.161691 &  0.242422 &           0.142409 &           0.164236 \\
MIR          &      0.162327 &      0.160594 &    0.218557 &         0.167636 &  0.225514 &           0.142545 &           0.165278 \\
BTS          &      0.193055 &      0.236106 &    0.256801 &         0.204134 &  0.290246 &           0.210356 &           0.183968 \\
PTS          &      0.161118 &      0.159222 &    0.220177 &         0.171337 &  0.233324 &           0.138533 &           0.188751 \\
LTS          &   

In [13]:
curr_bri.drop('Dataset', axis=1).transpose().style.apply(highlight_min)

Model,densenet-121,densenet-169,resnet-101,resnext-29_8x16,vgg-19,wide-resnet-28x10,wide-resnet-40x10
Uncalibrated,0.076359,0.075375,0.110169,0.082816,0.110094,0.062887,0.081958
TS,0.072888,0.071723,0.101096,0.078307,0.101859,0.060798,0.076777
ETS,0.07244,0.07121,0.101027,0.078233,0.100686,0.060941,0.076449
MIR,0.072387,0.071437,0.101698,0.078487,0.099821,0.061136,0.076852
BTS,0.072232,0.071064,0.101902,0.079086,0.100335,0.061111,0.077036
PTS,0.072323,0.071051,0.101349,0.079318,0.100705,0.061378,0.07888
LTS,0.072542,0.071387,0.100319,0.078227,0.101059,0.061003,0.076703
HTS,0.072672,0.071392,0.100458,0.078375,0.101158,0.060643,0.076766
HnLTS,0.072547,0.071127,0.101074,0.078303,0.100406,0.060695,0.076618
LTS_torch,0.072614,0.071239,0.101119,0.078432,0.100732,0.060745,0.076813


In [14]:
print(curr_bri.drop('Dataset', axis=1).transpose().to_latex()) 

\begin{tabular}{lrrrrrrr}
\toprule
Model &  densenet-121 &  densenet-169 &  resnet-101 &  resnext-29\_8x16 &    vgg-19 &  wide-resnet-28x10 &  wide-resnet-40x10 \\
\midrule
Uncalibrated &      0.076359 &      0.075375 &    0.110169 &         0.082816 &  0.110094 &           0.062887 &           0.081958 \\
TS           &      0.072888 &      0.071723 &    0.101096 &         0.078307 &  0.101859 &           0.060798 &           0.076777 \\
ETS          &      0.072440 &      0.071210 &    0.101027 &         0.078233 &  0.100686 &           0.060941 &           0.076449 \\
MIR          &      0.072387 &      0.071437 &    0.101698 &         0.078487 &  0.099821 &           0.061136 &           0.076852 \\
BTS          &      0.072232 &      0.071064 &    0.101902 &         0.079086 &  0.100335 &           0.061111 &           0.077036 \\
PTS          &      0.072323 &      0.071051 &    0.101349 &         0.079318 &  0.100705 &           0.061378 &           0.078880 \\
LTS          &   

In [15]:
pd.concat([curr_ece, curr_nll, curr_bri], axis=1, keys=['ECE', 'NLL', 'Brier'], names=['Metric', 'Model'])

Metric,ECE,ECE,ECE,ECE,ECE,ECE,ECE,ECE,ECE,ECE,...,Brier,Brier,Brier,Brier,Brier,Brier,Brier,Brier,Brier,Brier
Model,Dataset,Uncalibrated,TS,ETS,MIR,BTS,PTS,LTS,HTS,HnLTS,...,ETS,MIR,BTS,PTS,LTS,HTS,HnLTS,LTS_torch,HTS_torch,HnLTS_torch
Model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
densenet-121,cifar10,2.83513,1.386325,2.257274,1.425373,1.091841,1.324239,1.438661,1.270865,1.349832,...,0.07244,0.072387,0.072232,0.072323,0.072542,0.072672,0.072547,0.072614,0.07244,0.072482
densenet-169,cifar10,2.791077,1.650473,2.30978,1.355157,1.266717,1.051156,1.291028,1.297424,1.212882,...,0.07121,0.071437,0.071064,0.071051,0.071387,0.071392,0.071127,0.071239,0.07094,0.071011
resnet-101,cifar10,4.450539,1.373523,1.49105,1.917205,1.718641,1.593318,1.336975,1.466046,1.696917,...,0.101027,0.101698,0.101902,0.101349,0.100319,0.100458,0.101074,0.101119,0.100291,0.100384
resnext-29_8x16,cifar10,2.883827,1.026017,0.852116,1.076754,1.185582,1.145192,1.079328,1.195204,0.927186,...,0.078233,0.078487,0.079086,0.079318,0.078227,0.078375,0.078303,0.078432,0.078223,0.078397
vgg-19,cifar10,4.605082,2.514875,3.304092,1.63063,1.375733,1.787131,2.10229,2.102406,1.651571,...,0.100686,0.099821,0.100335,0.100705,0.101059,0.101158,0.100406,0.100732,0.099873,0.100247
wide-resnet-28x10,cifar10,1.929727,0.697262,1.593208,1.268224,1.071903,1.078594,0.791909,0.83993,0.727414,...,0.060941,0.061136,0.061111,0.061378,0.061003,0.060643,0.060695,0.060745,0.060974,0.060583
wide-resnet-40x10,cifar10,3.11525,1.041778,1.137043,1.237285,1.275605,1.474918,1.069909,1.255213,1.114712,...,0.076449,0.076852,0.077036,0.07888,0.076703,0.076766,0.076618,0.076813,0.07659,0.076733


### CIFAR 100

In [16]:
curr_ece = ECE.loc[ECE.Dataset=='cifar100'].set_index('Model')
curr_nll = NLL.loc[ECE.Dataset=='cifar100'].set_index('Model')
curr_bri = Bri.loc[ECE.Dataset=='cifar100'].set_index('Model')

pd.concat([curr_ece, curr_nll, curr_bri], axis=1, keys=['ECE', 'NLL', 'Brier'], names=['Metric', 'Model'])

Metric,ECE,ECE,ECE,ECE,ECE,ECE,ECE,ECE,ECE,ECE,...,Brier,Brier,Brier,Brier,Brier,Brier,Brier,Brier,Brier,Brier
Model,Dataset,Uncalibrated,TS,ETS,MIR,BTS,PTS,LTS,HTS,HnLTS,...,ETS,MIR,BTS,PTS,LTS,HTS,HnLTS,LTS_torch,HTS_torch,HnLTS_torch
Model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
densenet-121,cifar100,8.76025,3.93363,3.039655,4.72858,2.768561,4.380885,4.198,4.135675,3.313878,...,0.304354,0.304987,0.305248,0.304647,0.303898,0.303162,0.304911,0.309006,0.303184,0.304812
densenet-169,cifar100,8.932411,3.948774,2.897083,4.997186,3.187847,3.824487,4.229603,4.178941,3.314126,...,0.301037,0.301863,0.301962,0.29913,0.299873,0.299267,0.301612,0.305121,0.299293,0.301423
resnet-101,cifar100,11.446499,2.246798,2.243738,3.895614,2.224123,2.519535,2.375905,2.312218,2.268631,...,0.381624,0.384249,0.38247,0.380642,0.382163,0.380911,0.381698,0.385131,0.382068,0.38111
resnext-29_8x16,cifar100,9.692425,3.139508,2.675645,4.200096,2.061077,3.475635,3.251355,3.692094,2.021386,...,0.309294,0.31162,0.309646,0.309406,0.309433,0.309545,0.309001,0.311474,0.308849,0.310553
vgg-19,cifar100,17.631318,5.133481,5.364392,4.146808,3.885672,3.562247,3.728082,3.57921,3.621421,...,0.391021,0.388874,0.388022,0.388606,0.386526,0.38909,0.389501,0.398676,0.386308,0.392842
wide-resnet-28x10,cifar100,5.187933,4.629842,3.546959,3.919535,3.105283,4.366616,4.603869,4.342499,3.57485,...,0.287947,0.285417,0.287132,0.286019,0.285669,0.285447,0.287185,0.287528,0.284323,0.283893
wide-resnet-40x10,cifar100,14.784526,4.201723,2.739724,5.344059,3.550641,4.418229,4.425745,4.793971,3.730715,...,0.326992,0.33007,0.329314,0.324064,0.324544,0.32479,0.32859,0.334641,0.324243,0.32637


In [17]:
curr_ece.drop('Dataset', axis=1).transpose().style.apply(highlight_min)

Model,densenet-121,densenet-169,resnet-101,resnext-29_8x16,vgg-19,wide-resnet-28x10,wide-resnet-40x10
Uncalibrated,8.76025,8.932411,11.446499,9.692425,17.631318,5.187933,14.784526
TS,3.93363,3.948774,2.246798,3.139508,5.133481,4.629842,4.201723
ETS,3.039655,2.897083,2.243738,2.675645,5.364392,3.546959,2.739724
MIR,4.72858,4.997186,3.895614,4.200096,4.146808,3.919535,5.344059
BTS,2.768561,3.187847,2.224123,2.061077,3.885672,3.105283,3.550641
PTS,4.380885,3.824487,2.519535,3.475635,3.562247,4.366616,4.418229
LTS,4.198,4.229603,2.375905,3.251355,3.728082,4.603869,4.425745
HTS,4.135675,4.178941,2.312218,3.692094,3.57921,4.342499,4.793971
HnLTS,3.313878,3.314126,2.268631,2.021386,3.621421,3.57485,3.730715
LTS_torch,4.96567,4.999064,4.863569,3.762103,7.214644,3.998251,5.772907


In [18]:
print(curr_ece.drop('Dataset', axis=1).transpose().to_latex()) 

\begin{tabular}{lrrrrrrr}
\toprule
Model &  densenet-121 &  densenet-169 &  resnet-101 &  resnext-29\_8x16 &     vgg-19 &  wide-resnet-28x10 &  wide-resnet-40x10 \\
\midrule
Uncalibrated &      8.760250 &      8.932411 &   11.446499 &         9.692425 &  17.631318 &           5.187933 &          14.784526 \\
TS           &      3.933630 &      3.948774 &    2.246798 &         3.139508 &   5.133481 &           4.629842 &           4.201723 \\
ETS          &      3.039655 &      2.897083 &    2.243738 &         2.675645 &   5.364392 &           3.546959 &           2.739724 \\
MIR          &      4.728580 &      4.997186 &    3.895614 &         4.200096 &   4.146808 &           3.919535 &           5.344059 \\
BTS          &      2.768561 &      3.187847 &    2.224123 &         2.061077 &   3.885672 &           3.105283 &           3.550641 \\
PTS          &      4.380885 &      3.824487 &    2.519535 &         3.475635 &   3.562247 &           4.366616 &           4.418229 \\
LTS       

In [19]:
curr_nll.drop('Dataset', axis=1).transpose().style.apply(highlight_min)

Model,densenet-121,densenet-169,resnet-101,resnext-29_8x16,vgg-19,wide-resnet-28x10,wide-resnet-40x10
Uncalibrated,0.893868,0.874816,1.13431,0.939844,1.541421,0.817338,1.224773
TS,0.835487,0.81557,1.000677,0.822038,1.199659,0.813459,0.905464
ETS,0.859196,0.832331,1.009053,0.839845,1.206897,0.830835,0.926263
MIR,0.837844,0.816677,1.01046,0.833205,1.186491,0.799622,0.925428
BTS,0.827085,0.808159,1.00396,0.818997,1.192554,0.787617,0.903724
PTS,0.838474,0.813414,1.000943,0.824054,1.186666,0.797633,0.899661
LTS,0.835883,0.815818,1.007779,0.825208,1.179373,0.803225,0.897687
HTS,0.829349,0.810638,1.001205,0.825866,1.184594,0.803109,0.900487
HnLTS,0.825378,0.806233,1.000188,0.812846,1.194114,0.787076,0.899259
LTS_torch,0.832834,0.812004,1.015056,0.81998,1.218717,0.787368,0.913559


In [20]:
print(curr_nll.drop('Dataset', axis=1).transpose().to_latex()) 

\begin{tabular}{lrrrrrrr}
\toprule
Model &  densenet-121 &  densenet-169 &  resnet-101 &  resnext-29\_8x16 &    vgg-19 &  wide-resnet-28x10 &  wide-resnet-40x10 \\
\midrule
Uncalibrated &      0.893868 &      0.874816 &    1.134310 &         0.939844 &  1.541421 &           0.817338 &           1.224773 \\
TS           &      0.835487 &      0.815570 &    1.000677 &         0.822038 &  1.199659 &           0.813459 &           0.905464 \\
ETS          &      0.859196 &      0.832331 &    1.009053 &         0.839845 &  1.206897 &           0.830835 &           0.926263 \\
MIR          &      0.837844 &      0.816677 &    1.010460 &         0.833205 &  1.186491 &           0.799622 &           0.925428 \\
BTS          &      0.827085 &      0.808159 &    1.003960 &         0.818997 &  1.192554 &           0.787617 &           0.903724 \\
PTS          &      0.838474 &      0.813414 &    1.000943 &         0.824054 &  1.186666 &           0.797633 &           0.899661 \\
LTS          &   

In [21]:
curr_bri.drop('Dataset', axis=1).transpose().style.apply(highlight_min)

Model,densenet-121,densenet-169,resnet-101,resnext-29_8x16,vgg-19,wide-resnet-28x10,wide-resnet-40x10
Uncalibrated,0.317058,0.314196,0.405327,0.327484,0.443283,0.289225,0.370023
TS,0.304773,0.30167,0.381723,0.309558,0.391811,0.288609,0.327633
ETS,0.304354,0.301037,0.381624,0.309294,0.391021,0.287947,0.326992
MIR,0.304987,0.301863,0.384249,0.31162,0.388874,0.285417,0.33007
BTS,0.305248,0.301962,0.38247,0.309646,0.388022,0.287132,0.329314
PTS,0.304647,0.29913,0.380642,0.309406,0.388606,0.286019,0.324064
LTS,0.303898,0.299873,0.382163,0.309433,0.386526,0.285669,0.324544
HTS,0.303162,0.299267,0.380911,0.309545,0.38909,0.285447,0.32479
HnLTS,0.304911,0.301612,0.381698,0.309001,0.389501,0.287185,0.32859
LTS_torch,0.309006,0.305121,0.385131,0.311474,0.398676,0.287528,0.334641


In [22]:
print(curr_bri.drop('Dataset', axis=1).transpose().to_latex()) 

\begin{tabular}{lrrrrrrr}
\toprule
Model &  densenet-121 &  densenet-169 &  resnet-101 &  resnext-29\_8x16 &    vgg-19 &  wide-resnet-28x10 &  wide-resnet-40x10 \\
\midrule
Uncalibrated &      0.317058 &      0.314196 &    0.405327 &         0.327484 &  0.443283 &           0.289225 &           0.370023 \\
TS           &      0.304773 &      0.301670 &    0.381723 &         0.309558 &  0.391811 &           0.288609 &           0.327633 \\
ETS          &      0.304354 &      0.301037 &    0.381624 &         0.309294 &  0.391021 &           0.287947 &           0.326992 \\
MIR          &      0.304987 &      0.301863 &    0.384249 &         0.311620 &  0.388874 &           0.285417 &           0.330070 \\
BTS          &      0.305248 &      0.301962 &    0.382470 &         0.309646 &  0.388022 &           0.287132 &           0.329314 \\
PTS          &      0.304647 &      0.299130 &    0.380642 &         0.309406 &  0.388606 &           0.286019 &           0.324064 \\
LTS          &   

### Cars

In [23]:
curr_ece = ECE.loc[ECE.Dataset=='cars'].set_index('Model')
curr_nll = NLL.loc[ECE.Dataset=='cars'].set_index('Model')
curr_bri = Bri.loc[ECE.Dataset=='cars'].set_index('Model')

pd.concat([curr_ece, curr_nll, curr_bri], axis=1, keys=['ECE', 'NLL', 'Brier'], names=['Metric', 'Model'])

Metric,ECE,ECE,ECE,ECE,ECE,ECE,ECE,ECE,ECE,ECE,...,Brier,Brier,Brier,Brier,Brier,Brier,Brier,Brier,Brier,Brier
Model,Dataset,Uncalibrated,TS,ETS,MIR,BTS,PTS,LTS,HTS,HnLTS,...,ETS,MIR,BTS,PTS,LTS,HTS,HnLTS,LTS_torch,HTS_torch,HnLTS_torch
Model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
densenet-121,cars,5.865746,2.342571,1.912941,2.703824,2.161027,10.371915,4.381359,3.063674,1.542926,...,0.165007,0.166427,0.166408,0.218779,0.177144,0.167982,0.164132,0.168628,0.176489,0.169262
densenet-169,cars,5.821673,2.380512,1.951687,2.389794,2.568552,7.804429,4.633774,3.470671,1.754317,...,0.162912,0.165602,0.167407,0.197024,0.17878,0.170651,0.162922,0.167869,0.17912,0.171441
resnet-18,cars,7.038802,1.87421,2.357757,2.95656,2.807826,8.553017,4.661463,2.673714,1.784478,...,0.194553,0.196421,0.199192,0.232065,0.208044,0.197424,0.194221,0.200765,0.208392,0.198198
resnet-50,cars,5.190265,2.481779,1.506835,2.076162,2.61041,6.400995,4.79106,2.808393,1.757856,...,0.153287,0.153729,0.156919,0.177963,0.167468,0.156327,0.153273,0.15627,0.169409,0.157146
resnet-101,cars,5.400409,2.312754,1.78631,1.963644,2.133736,8.125364,3.983332,2.319405,1.582546,...,0.15304,0.154453,0.157109,0.191357,0.16668,0.156315,0.15295,0.157172,0.16764,0.156984


In [24]:
curr_ece.drop('Dataset', axis=1).transpose().style.apply(highlight_min)

Model,densenet-121,densenet-169,resnet-18,resnet-50,resnet-101
Uncalibrated,5.865746,5.821673,7.038802,5.190265,5.400409
TS,2.342571,2.380512,1.87421,2.481779,2.312754
ETS,1.912941,1.951687,2.357757,1.506835,1.78631
MIR,2.703824,2.389794,2.95656,2.076162,1.963644
BTS,2.161027,2.568552,2.807826,2.61041,2.133736
PTS,10.371915,7.804429,8.553017,6.400995,8.125364
LTS,4.381359,4.633774,4.661463,4.79106,3.983332
HTS,3.063674,3.470671,2.673714,2.808393,2.319405
HnLTS,1.542926,1.754317,1.784478,1.757856,1.582546
LTS_torch,3.318847,3.592184,4.307771,2.620323,2.940201


In [25]:
curr_nll.drop('Dataset', axis=1).transpose().style.apply(highlight_min)

Model,densenet-121,densenet-169,resnet-18,resnet-50,resnet-101
Uncalibrated,0.533742,0.534814,0.635785,0.493354,0.49331
TS,0.417259,0.416619,0.492074,0.390955,0.384694
ETS,0.443188,0.434737,0.50993,0.396151,0.395463
MIR,0.426203,0.429888,0.507968,0.402207,0.39259
BTS,0.504127,0.510134,0.664115,0.607957,0.470656
PTS,inf,inf,inf,inf,inf
LTS,0.597424,0.818243,0.728613,0.80679,0.599737
HTS,0.456851,0.512802,0.537502,0.425045,0.41115
HnLTS,0.412039,0.410757,0.490101,0.384607,0.378862
LTS_torch,0.44011,0.440125,0.535677,0.408725,0.404921


In [26]:
curr_bri.drop('Dataset', axis=1).transpose().style.apply(highlight_min)

Model,densenet-121,densenet-169,resnet-18,resnet-50,resnet-101
Uncalibrated,0.17345,0.172543,0.207543,0.160202,0.16174
TS,0.164603,0.163421,0.194467,0.154095,0.15365
ETS,0.165007,0.162912,0.194553,0.153287,0.15304
MIR,0.166427,0.165602,0.196421,0.153729,0.154453
BTS,0.166408,0.167407,0.199192,0.156919,0.157109
PTS,0.218779,0.197024,0.232065,0.177963,0.191357
LTS,0.177144,0.17878,0.208044,0.167468,0.16668
HTS,0.167982,0.170651,0.197424,0.156327,0.156315
HnLTS,0.164132,0.162922,0.194221,0.153273,0.15295
LTS_torch,0.168628,0.167869,0.200765,0.15627,0.157172


### Birds

In [27]:
curr_ece = ECE.loc[ECE.Dataset=='birds'].set_index('Model')
curr_nll = NLL.loc[ECE.Dataset=='birds'].set_index('Model')
curr_bri = Bri.loc[ECE.Dataset=='birds'].set_index('Model')

pd.concat([curr_ece, curr_nll, curr_bri], axis=1, keys=['ECE', 'NLL', 'Brier'], names=['Metric', 'Model'])

Metric,ECE,ECE,ECE,ECE,ECE,ECE,ECE,ECE,ECE,ECE,...,Brier,Brier,Brier,Brier,Brier,Brier,Brier,Brier,Brier,Brier
Model,Dataset,Uncalibrated,TS,ETS,MIR,BTS,PTS,LTS,HTS,HnLTS,...,ETS,MIR,BTS,PTS,LTS,HTS,HnLTS,LTS_torch,HTS_torch,HnLTS_torch
Model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
densenet-121,birds,12.42989,2.940008,3.145051,4.141056,3.561389,16.208561,7.703473,4.438555,3.100523,...,0.323294,0.327072,0.329046,0.409538,0.355864,0.329922,0.323027,0.334633,0.35967,0.331863
densenet-169,birds,12.649694,3.089719,3.098617,4.461036,4.548998,10.978034,8.913095,4.372993,2.529571,...,0.314541,0.318877,0.324452,0.35678,0.350502,0.320999,0.314144,0.326057,0.358429,0.323257
resnet-101,birds,12.642524,2.837463,3.252857,5.178915,3.864756,8.798844,8.876726,4.24098,2.759475,...,0.305641,0.31026,0.311545,0.339828,0.339734,0.310262,0.305333,0.31824,0.342638,0.312904


In [28]:
curr_ece.drop('Dataset', axis=1).transpose().style.apply(highlight_min)

Model,densenet-121,densenet-169,resnet-101
Uncalibrated,12.42989,12.649694,12.642524
TS,2.940008,3.089719,2.837463
ETS,3.145051,3.098617,3.252857
MIR,4.141056,4.461036,5.178915
BTS,3.561389,4.548998,3.864756
PTS,16.208561,10.978034,8.798844
LTS,7.703473,8.913095,8.876726
HTS,4.438555,4.372993,4.24098
HnLTS,3.100523,2.529571,2.759475
LTS_torch,7.171504,6.653252,7.085772


In [29]:
curr_nll.drop('Dataset', axis=1).transpose().style.apply(highlight_min)

Model,densenet-121,densenet-169,resnet-101
Uncalibrated,1.196887,1.184059,1.169428
TS,0.900072,0.867165,0.837691
ETS,0.901822,0.867915,0.839361
MIR,0.914021,0.885687,0.854628
BTS,1.159346,1.269112,1.108864
PTS,inf,inf,inf
LTS,1.775099,inf,inf
HTS,0.987829,0.948902,0.883248
HnLTS,0.897376,0.863778,0.832701
LTS_torch,0.975315,0.93053,0.911211


In [30]:
curr_bri.drop('Dataset', axis=1).transpose().style.apply(highlight_min)

Model,densenet-121,densenet-169,resnet-101
Uncalibrated,0.352798,0.347038,0.337751
TS,0.323056,0.314235,0.305229
ETS,0.323294,0.314541,0.305641
MIR,0.327072,0.318877,0.31026
BTS,0.329046,0.324452,0.311545
PTS,0.409538,0.35678,0.339828
LTS,0.355864,0.350502,0.339734
HTS,0.329922,0.320999,0.310262
HnLTS,0.323027,0.314144,0.305333
LTS_torch,0.334633,0.326057,0.31824


### SVHN

In [31]:
curr_ece = ECE.loc[ECE.Dataset=='svhn'].set_index('Model')
curr_nll = NLL.loc[ECE.Dataset=='svhn'].set_index('Model')
curr_bri = Bri.loc[ECE.Dataset=='svhn'].set_index('Model')

pd.concat([curr_ece, curr_nll, curr_bri], axis=1, keys=['ECE', 'NLL', 'Brier'], names=['Metric', 'Model'])

Metric,ECE,ECE,ECE,ECE,ECE,ECE,ECE,ECE,ECE,ECE,...,Brier,Brier,Brier,Brier,Brier,Brier,Brier,Brier,Brier,Brier
Model,Dataset,Uncalibrated,TS,ETS,MIR,BTS,PTS,LTS,HTS,HnLTS,...,ETS,MIR,BTS,PTS,LTS,HTS,HnLTS,LTS_torch,HTS_torch,HnLTS_torch
Model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
densenet-121,svhn,2.0588,1.445866,2.573994,0.838403,0.724117,0.983322,1.569962,1.537188,0.944558,...,0.052608,0.052342,0.052084,0.052567,0.052431,0.052481,0.052127,0.052107,0.052118,0.052071
densenet-169,svhn,0.491088,1.037124,1.156383,1.017681,0.933092,0.949873,0.954254,1.074799,0.970604,...,0.052127,0.05232,0.052088,0.052333,0.051998,0.052118,0.051816,0.051687,0.051911,0.051785
wide-resnet-28x10,svhn,1.553618,1.083106,1.589714,0.914009,1.095303,1.099233,0.938792,1.04917,1.090027,...,0.053107,0.052946,0.053085,0.052691,0.052587,0.052628,0.052786,0.052785,0.052653,0.052614
wide-resnet-40x10,svhn,1.330812,1.27623,2.535067,0.844367,0.880129,1.131839,1.272668,1.357064,1.083464,...,0.048666,0.048223,0.048211,0.048277,0.048142,0.048122,0.048094,0.048122,0.048017,0.047991


In [32]:
curr_ece.drop('Dataset', axis=1).transpose().style.apply(highlight_min)

Model,densenet-121,densenet-169,wide-resnet-28x10,wide-resnet-40x10
Uncalibrated,2.0588,0.491088,1.553618,1.330812
TS,1.445866,1.037124,1.083106,1.27623
ETS,2.573994,1.156383,1.589714,2.535067
MIR,0.838403,1.017681,0.914009,0.844367
BTS,0.724117,0.933092,1.095303,0.880129
PTS,0.983322,0.949873,1.099233,1.131839
LTS,1.569962,0.954254,0.938792,1.272668
HTS,1.537188,1.074799,1.04917,1.357064
HnLTS,0.944558,0.970604,1.090027,1.083464
LTS_torch,1.032282,0.864346,1.061532,1.143867


In [33]:
curr_nll.drop('Dataset', axis=1).transpose().style.apply(highlight_min)

Model,densenet-121,densenet-169,wide-resnet-28x10,wide-resnet-40x10
Uncalibrated,0.159902,0.127533,0.141678,0.128077
TS,0.141412,0.12926,0.134269,0.126885
ETS,0.151157,0.129777,0.13757,0.137088
MIR,0.138487,0.129358,0.133161,0.124456
BTS,0.168346,0.152188,0.145151,0.136512
PTS,0.140627,0.130542,0.132951,0.126536
LTS,0.141636,0.128872,0.132491,0.125764
HTS,0.141514,0.129467,0.132928,0.125919
HnLTS,0.140094,0.128562,0.134065,0.127044
LTS_torch,0.139748,0.128041,0.133697,0.126503


In [34]:
curr_bri.drop('Dataset', axis=1).transpose().style.apply(highlight_min)

Model,densenet-121,densenet-169,wide-resnet-28x10,wide-resnet-40x10
Uncalibrated,0.054979,0.051542,0.054144,0.049306
TS,0.052406,0.052033,0.052815,0.048192
ETS,0.052608,0.052127,0.053107,0.048666
MIR,0.052342,0.05232,0.052946,0.048223
BTS,0.052084,0.052088,0.053085,0.048211
PTS,0.052567,0.052333,0.052691,0.048277
LTS,0.052431,0.051998,0.052587,0.048142
HTS,0.052481,0.052118,0.052628,0.048122
HnLTS,0.052127,0.051816,0.052786,0.048094
LTS_torch,0.052107,0.051687,0.052785,0.048122
