In [20]:
import pandas as pd
import numpy as np
from scipy.special import rel_entr, entr, xlogy

In [21]:
# List of available dataset
datasets = ["DTD","EuroSAT","GTSRB","MNIST","SVHN","Caltech256"]

In [22]:
def calc_kl(P, Q):
    return np.sum(rel_entr(P, Q), axis=1)

# Calculate Entropy
def calc_entropy(P):
    return np.sum(entr(P), axis=1)

# Calculate Cross-Entropy
def calc_cross_ent(P, Q):
    # return np.sum(entr(P,Q), axis=1)
    # return -np.sum(P * np.log(Q), axis=1)
    return -np.sum(xlogy(P,Q), axis=1)

def calc_nll(p):
    return -np.log(p)


In [24]:
for ds in datasets:
    df18 = pd.read_csv(f"../Pred/{ds}_Resnet18.csv")
    df50 = pd.read_csv(f"../Pred/{ds}_Resnet50.csv")
    df101 = pd.read_csv(f"../Pred/{ds}_Resnet101.csv")
    label = pd.read_csv(f"../Pred/target_{ds}.csv")
    prediction = pd.DataFrame({
    "pred18": df18.idxmax(axis=1),
    "pred50": df50.idxmax(axis=1),
    "pred101": df101.idxmax(axis=1),
    "target": label['target']
    })
    prediction["pred18"] = prediction["pred18"].str.extract('(\d+)').astype(int)
    prediction["pred50"] = prediction["pred50"].str.extract('(\d+)').astype(int)
    prediction["pred101"] = prediction["pred101"].str.extract('(\d+)').astype(int)
    prediction.to_csv(f"prediction_{ds}.csv")
    unc_pred = pd.DataFrame()
    unc_pred["kl_18_101"] = calc_kl(df18,df101)
    unc_pred["kl_101_18"] = calc_kl(df101,df18)
    unc_pred["ent_18"] = calc_entropy(df18)
    unc_pred["ent_101"] = calc_entropy(df101)
    unc_pred["ce_18_101"] = calc_cross_ent(df18,df101)
    unc_pred["ce_101_18"] = calc_cross_ent(df101,df18)
    # Calculate NLL and Brier Scores for each sample and add as columns
    # ==============
    flat_label = label['target'].values.flatten()
    df18_array = df18.values
    df101_array = df101.values
    print("df18_array shape:", df18_array.shape)
    print("df101_array shape:", df101_array.shape)
    print("flat_label shape:", flat_label.shape)
    # Extracting the predicted probabilities for the true class labels
    predicted_probabilities18 = df18_array[np.arange(df18_array.shape[0]), flat_label]
    predicted_probabilities101 = df101_array[np.arange(df101_array.shape[0]), flat_label]
    unc_pred["nll_18"] = -np.log(predicted_probabilities18)
    unc_pred["nll_101"] = -np.log(predicted_probabilities101)
    # Brier Scores:
    unc_pred["brier_18"] = (predicted_probabilities18-1)**2 + np.sum(np.square(df18),axis=1) - predicted_probabilities18**2
    unc_pred["brier_101"] = (predicted_probabilities101-1)**2 + np.sum(np.square(df101),axis=1) - predicted_probabilities101**2

    unc_pred.to_csv(f"uncertainty_{ds}.csv")

df18_array shape: (1880, 47)
df101_array shape: (1880, 47)
flat_label shape: (1880,)
df18_array shape: (5400, 10)
df101_array shape: (5400, 10)
flat_label shape: (5400,)
df18_array shape: (6315, 43)
df101_array shape: (6315, 43)
flat_label shape: (6315,)
df18_array shape: (5000, 10)
df101_array shape: (5000, 10)
flat_label shape: (5000,)
df18_array shape: (13016, 10)
df101_array shape: (13016, 10)
flat_label shape: (13016,)
df18_array shape: (3061, 257)
df101_array shape: (3061, 257)
flat_label shape: (3061,)
