In [1]:
import pandas as pd
import numpy as np
import utils
import torch.nn.functional as F
import torch

In [2]:
# List of available dataset
datasets = ["Caltech256","SUN397","CINIC10","CIFAR10","iWildCam","iWildCamOOD"]
input_prefix = "../Pred/"
target_prefix = "../Pred/"
output_prefix = "csv/uncertainty_"

In [3]:
for ds in datasets:
    df18 = pd.read_csv(f"{input_prefix}{ds}_Resnet18.csv")
    df50 = pd.read_csv(f"{input_prefix}{ds}_Resnet50.csv")
    df101 = pd.read_csv(f"{input_prefix}{ds}_Resnet101.csv")
    label = pd.read_csv(f"{target_prefix}target_{ds}.csv")
    prediction = pd.DataFrame({
    "pred18": df18.idxmax(axis=1),
    "pred50": df50.idxmax(axis=1),
    "pred101": df101.idxmax(axis=1),
    "target": label['target']
    })
    prediction["pred18"] = prediction["pred18"].str.extract('(\d+)').astype(int)
    prediction["pred50"] = prediction["pred50"].str.extract('(\d+)').astype(int)
    prediction["pred101"] = prediction["pred101"].str.extract('(\d+)').astype(int)
    prediction.to_csv(f"csv/prediction_{ds}_Resnet.csv")
    unc_pred = pd.DataFrame()
    unc_pred["ent_18"] = utils.calc_entr_torch(df18)
    unc_pred["ent_50"] = utils.calc_entr_torch(df50)
    unc_pred["ent_101"] = utils.calc_entr_torch(df101)
    unc_pred["ce_18_50"] = utils.calc_cross_entr_torch(df18,df50)
    unc_pred["ce_50_18"] = utils.calc_cross_entr_torch(df50,df18)
    
    unc_pred["ent_18_np"] = utils.calc_entropy_np(df18)
    unc_pred["ent_50_np"] = utils.calc_entropy_np(df50)
    unc_pred["ent_101_np"] = utils.calc_entropy_np(df101)
    unc_pred["ce_18_50_np"] = utils.calc_cross_ent_np(df18,df50)
    unc_pred["ce_50_18_np"] = utils.calc_cross_ent_np(df50,df18)
    
    # unc_pred["ce_18_101"] = utils.calc_cross_entr_torch(df18,df101)
    # unc_pred["ce_101_18"] = utils.calc_cross_entr_torch(df101,df18)
    # unc_pred["kl_18_101"] = utils.calc_kl_torch(df18,df101)
    # unc_pred["kl_101_18"] = utils.calc_kl_torch(df101,df18)
    # unc_pred["kl_18_50"] = utils.calc_kl_torch(df18,df50)
    # unc_pred["kl_50_18"] = utils.calc_kl_torch(df50,df18)
    
    # Calculate NLL and Brier Scores for each sample and add as columns
    # ==============
    flat_label = label['target'].values.flatten()
    df18_array = F.softmax(torch.tensor(df18.values),dim=1).numpy()
    df50_array = F.softmax(torch.tensor(df50.values),dim=1).numpy()
    df101_array = F.softmax(torch.tensor(df101.values),dim=1).numpy()
    # Extracting the predicted probabilities for the true class labels
    predicted_probabilities18 = df18_array[np.arange(df18_array.shape[0]), flat_label]
    predicted_probabilities50 = df50_array[np.arange(df50_array.shape[0]), flat_label]
    predicted_probabilities101 = df101_array[np.arange(df101_array.shape[0]), flat_label]
    # NLL
    unc_pred["nll_18"] = (-np.log(predicted_probabilities18)).clip(0)
    unc_pred["nll_50"] = (-np.log(predicted_probabilities50)).clip(0)
    unc_pred["nll_101"] = (-np.log(predicted_probabilities101)).clip(0)
    # Brier Scores:
    unc_pred["brier_18"] = (predicted_probabilities18-1)**2 + np.sum(np.square(df18),axis=1) - predicted_probabilities18**2
    unc_pred["brier_50"] = (predicted_probabilities50-1)**2 + np.sum(np.square(df50),axis=1) - predicted_probabilities50**2
    unc_pred["brier_101"] = (predicted_probabilities101-1)**2 + np.sum(np.square(df101),axis=1) - predicted_probabilities101**2
    
    unc_pred["softmax_response18"] = np.max(df18_array,axis=1)
    unc_pred["softmax_response50"] = np.max(df50_array,axis=1)
    unc_pred["softmax_response101"] = np.max(df101_array,axis=1)
    
    unc_pred.to_csv(f"{output_prefix}{ds}_Resnet.csv")

In [4]:
unc_pred.describe()

Unnamed: 0,ent_18,ent_50,ent_101,ce_18_50,ce_50_18,ent_18_np,ent_50_np,ent_101_np,ce_18_50_np,ce_50_18_np,nll_18,nll_50,nll_101,brier_18,brier_50,brier_101,softmax_response18,softmax_response50,softmax_response101
count,42791.0,42791.0,42791.0,42791.0,42791.0,42791.0,42791.0,42791.0,42791.0,42791.0,42791.0,42791.0,42791.0,42791.0,42791.0,42791.0,42791.0,42791.0,42791.0
mean,0.889204,0.614015,0.6041855,1.904223,1.258586,0.908081,0.63427,0.624498,1.859655,1.274438,1.40297,1.246521,1.205563,15809.016222,51085.848852,51293.506864,0.735688,0.808094,0.811942
std,0.841927,0.715217,0.6885433,2.084453,1.373666,0.839355,0.713424,0.686733,1.96265,1.368781,2.093564,2.527356,2.453801,5867.312412,37197.453033,42290.403784,0.268657,0.247314,0.238007
min,1.3e-05,2e-06,6.648495e-07,0.000115,7.3e-05,0.022618,0.02261,0.022609,0.022685,0.022654,7.529751e-07,8.741377e-08,3.69816e-08,3273.658247,4350.102204,4042.685223,0.118557,0.129125,0.108329
25%,0.036333,0.029135,0.02015596,0.048672,0.055884,0.057744,0.050791,0.042135,0.068042,0.075636,0.004128977,0.003397866,0.002300647,11329.549917,25523.874881,23626.483949,0.486693,0.620672,0.635831
50%,0.699956,0.206073,0.2523807,1.321672,0.658207,0.719415,0.226523,0.273073,1.30872,0.67477,0.2900073,0.03996835,0.05343149,16599.133756,42656.692101,38958.37541,0.825028,0.964913,0.954128
75%,1.676837,1.186758,1.150174,3.246978,2.324253,1.693381,1.205482,1.169036,3.20994,2.338785,2.293432,1.3131,1.196242,20021.998761,63813.437549,64436.355518,0.995898,0.996651,0.997726
max,3.135219,3.124727,3.151564,17.263042,10.188409,3.146506,3.136331,3.161757,11.353037,9.824684,22.56981,26.12422,31.62048,39465.65425,338336.15718,437540.781308,0.999999,1.0,1.0


In [5]:
for ds in datasets:
    df18 = pd.read_csv(f"{input_prefix}{ds}_EffNet_S.csv")
    df50 = pd.read_csv(f"{input_prefix}{ds}_EffNet_M.csv")
    df101 = pd.read_csv(f"{input_prefix}{ds}_EffNet_L.csv")
    label = pd.read_csv(f"{target_prefix}target_{ds}.csv")
    prediction = pd.DataFrame({
    "pred18": df18.idxmax(axis=1),
    "pred50": df50.idxmax(axis=1),
    "pred101": df101.idxmax(axis=1),
    "target": label['target']
    })
    prediction["pred18"] = prediction["pred18"].str.extract('(\d+)').astype(int)
    prediction["pred50"] = prediction["pred50"].str.extract('(\d+)').astype(int)
    prediction["pred101"] = prediction["pred101"].str.extract('(\d+)').astype(int)
    prediction.to_csv(f"csv/prediction_{ds}_Effnet.csv")
    unc_pred = pd.DataFrame()
    unc_pred["ent_18"] = utils.calc_entr_torch(df18)
    unc_pred["ent_50"] = utils.calc_entr_torch(df50)
    unc_pred["ent_101"] = utils.calc_entr_torch(df101)
    unc_pred["kl_18_101"] = utils.calc_kl_torch(df18,df101)
    unc_pred["kl_101_18"] = utils.calc_kl_torch(df101,df18)
    unc_pred["kl_18_50"] = utils.calc_kl_torch(df18,df50)
    unc_pred["kl_50_18"] = utils.calc_kl_torch(df50,df18)
    unc_pred["ce_18_101"] = utils.calc_cross_entr_torch(df18,df101)
    unc_pred["ce_101_18"] = utils.calc_cross_entr_torch(df101,df18)
    unc_pred["ce_18_50"] = utils.calc_cross_entr_torch(df18,df50)
    unc_pred["ce_50_18"] = utils.calc_cross_entr_torch(df50,df18)
    # Calculate NLL and Brier Scores for each sample and add as columns
    # ==============
    flat_label = label['target'].values.flatten()
    df18_array = F.softmax(torch.tensor(df18.values),dim=1).numpy()
    df50_array = F.softmax(torch.tensor(df50.values),dim=1).numpy()
    df101_array = F.softmax(torch.tensor(df101.values),dim=1).numpy()
    # Extracting the predicted probabilities for the true class labels
    predicted_probabilities18 = df18_array[np.arange(df18_array.shape[0]), flat_label]
    predicted_probabilities50 = df50_array[np.arange(df50_array.shape[0]), flat_label]
    predicted_probabilities101 = df101_array[np.arange(df101_array.shape[0]), flat_label]
    # NLL
    unc_pred["nll_18"] = (-np.log(predicted_probabilities18)).clip(0)
    unc_pred["nll_50"] = (-np.log(predicted_probabilities50)).clip(0)
    unc_pred["nll_101"] = (-np.log(predicted_probabilities101)).clip(0)
    # Brier Scores:
    unc_pred["brier_18"] = (predicted_probabilities18-1)**2 + np.sum(np.square(df18),axis=1) - predicted_probabilities18**2
    unc_pred["brier_50"] = (predicted_probabilities50-1)**2 + np.sum(np.square(df50),axis=1) - predicted_probabilities50**2
    unc_pred["brier_101"] = (predicted_probabilities101-1)**2 + np.sum(np.square(df101),axis=1) - predicted_probabilities101**2
    
    unc_pred["softmax_response18"] = np.max(df18_array,axis=1)
    unc_pred["softmax_response50"] = np.max(df50_array,axis=1)
    unc_pred["softmax_response101"] = np.max(df101_array,axis=1)
    
    unc_pred.to_csv(f"{output_prefix}{ds}_Effnet.csv")

In [6]:
unc_pred.describe()

Unnamed: 0,ent_18,ent_50,ent_101,kl_18_101,kl_101_18,kl_18_50,kl_50_18,ce_18_101,ce_101_18,ce_18_50,ce_50_18,nll_18,nll_50,nll_101,brier_18,brier_50,brier_101,softmax_response18,softmax_response50,softmax_response101
count,42791.0,42791.0,42791.0,42791.0,42791.0,42791.0,42791.0,42791.0,42791.0,42791.0,42791.0,42791.0,42791.0,42791.0,42791.0,42791.0,42791.0,42791.0,42791.0,42791.0
mean,0.847943,0.7331715,0.6647671,0.593326,0.499082,0.537546,0.466557,1.441269,1.163849,1.385488,1.199728,1.198337,1.305732,1.022361,5047.923408,7188.314262,25456.735228,0.75359,0.776771,0.804105
std,0.947356,0.8761408,0.8016632,0.906902,0.761314,0.851016,0.756175,1.711959,1.342142,1.580087,1.437407,1.998626,2.206212,2.057509,3537.499997,4699.958335,14647.962317,0.29102,0.278324,0.254182
min,5.1e-05,5.915678e-07,5.464411e-07,2e-05,1.4e-05,8e-06,5e-06,7.8e-05,3.3e-05,6.2e-05,1.6e-05,3e-06,3.049463e-08,2.960561e-08,439.791988,803.259014,3351.505976,0.102278,0.085067,0.109315
25%,0.033944,0.00710419,0.01249933,0.012331,0.006337,0.00816,0.003834,0.054429,0.0236,0.04504,0.016354,0.003806,0.0006953539,0.00132615,2568.206771,3561.454603,15068.032929,0.477282,0.514777,0.634873
50%,0.305084,0.1520712,0.2835208,0.157553,0.129576,0.149117,0.082469,0.524348,0.494535,0.531936,0.285409,0.062973,0.03004306,0.05848378,3946.424092,6148.76459,21444.894751,0.945124,0.975203,0.946978
75%,1.691683,1.551648,1.168732,0.801505,0.722386,0.796781,0.682479,2.542042,2.236074,2.653186,2.412531,1.871165,2.100984,1.014336,6382.898398,9481.519049,32019.70354,0.996234,0.999309,0.998683
max,3.449413,3.974632,3.323578,9.154055,8.48232,15.302627,9.684196,10.893841,10.007609,16.054497,13.376656,15.955302,23.54293,23.42685,28363.706849,51582.364686,145817.138231,0.999997,1.0,1.0
