In [None]:
import pathlib
import utils
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
from collections.abc import Callable
from functools import partial
from sklearn.manifold import TSNE
from tqdm.notebook import tqdm

In [None]:
DATASET = "iNaturalist19"
PATH_EXPERIMENTS = pathlib.Path("..") / "experiments" / DATASET
PATH_RESULTS = pathlib.Path(".") / "results"

# TODO ln datasets2 at ".."
PATH_DATASET = pathlib.Path("/Users/simo") / "Developer" / "datasets2" / "datasets" / DATASET
PATH_ENCODINGS = PATH_DATASET / "encodings"
HIERARCHY = np.load(PATH_DATASET / "hierarchy" / "hierarchy.npy")

# Experiments
- **xe-onehot**: the baseline, i.e. one-hot encoding + cross entropy loss
- **xe-b3p-beta\***: a type of hierarchical encoding propose in "Beyond One-Hot Encoding: Injecting Semantics to Drive Image Classifiers" by Perotti, Bertolotto, Pastor and Panisson hence "b3p". This approch required to choose $\beta \in [0, 1]$, an hyperparameter the rappresent the "amount of one hot encoding".
- **xe-mbm-beta\***: a type of hierarchical encoding proposed in "Making Better Mistakes: Leveraging Class Hierarchies with Deep Networks" by Bertinetto et al. $\beta \rightarrow +\infty \Rightarrow \textrm{mbm} \rightarrow \textrm{onehot}$
- **cd-barz-denzler**: a type of hierarchical encoding proposed in "Hierarchy-based Image Embeddings for Semantic Image Retrieval" by Barz and Denzler
- **cd-desc-pca-n_components\***: descriptions encodings, gpt + ada + pca to n_components.

In [None]:
experiments = {
    #xe-onehot
    "xe-onehot":[
        "0810_1157_d654214a_xe-onehot",
    ],

    # cd-barz-denzler
    "cd-barz-denzler":[
        "0810_2203_538b8970_cd-barz-denzler",
    ],
    
    #xe-b3p
     "xe-b3p-beta0.1" : [
         "0814_1130_4c1708e6_xe-b3p-beta0.1",
     ],
     "xe-b3p-beta0.2" : [
         "0816_0709_636ef7c0_xe-b3p-beta0.2",
     ],
    "xe-b3p-beta0.3" : [
        "0817_1729_6c02220c_xe-b3p-beta0.3",
    ],
    "xe-b3p-beta0.4" : [
        "0818_1843_cff54970_xe-b3p-beta0.4",
    ],
    "xe-b3p-beta0.5" : [
        "0819_1329_2432f7ac_xe-b3p-beta0.5",
    ],
    "xe-b3p-beta0.6" : [
        "0820_1117_6e2b0835_xe-b3p-beta0.6",
    ],
    "xe-b3p-beta0.7" : [
        "0821_0908_6f9d1756_xe-b3p-beta0.7",
    ],
    "xe-b3p-beta0.8" : [
        "0822_1913_b681b04f_xe-b3p-beta0.8",
    ],
    "xe-b3p-beta0.9" : [
        "0824_0253_79cffac7_xe-b3p-beta0.9",
    ],

    # xe-mbm
    "xe-mbm-beta1.0" : [
        "0825_0507_68906d90_xe-mbm-beta1.0",
    ],
    "xe-mbm-beta2.0" : [
        "0825_1813_619ded60_xe-mbm-beta2.0",
    ],
    "xe-mbm-beta3.0" : [
        "0826_0104_70d32692_xe-mbm-beta3.0",
    ],
    "xe-mbm-beta4.0" : [
        "0826_1131_7c443b7d_xe-mbm-beta4.0",
    ],
    "xe-mbm-beta5.0" : [
        "0826_2321_e40a7d5c_xe-mbm-beta5.0",
    ],
    "xe-mbm-beta10.0" : [
        "0827_2105_696c59cb_xe-mbm-beta10.0",
    ],
    "xe-mbm-beta15.0" : [
        "0829_1305_3cea2b42_xe-mbm-beta15.0",
    ],
    "xe-mbm-beta20.0" : [
        "0831_0606_329b467a_xe-mbm-beta20.0",
    ],
    "xe-mbm-beta30.0" : [
        "0901_2319_b85f5f2a_xe-mbm-beta30.0",
    ],
}

experiments_ = {
    value: key
    for key, values in experiments.items()
    for value in values
}

encodings = {
    # xe-onehot
    "xe-onehot": np.eye(HIERARCHY.shape[-1]),

    # xe-b3p
    "xe-b3p-beta0.1": np.load(PATH_ENCODINGS / "b3p" / "beta0.1.npy"),
    "xe-b3p-beta0.2": np.load(PATH_ENCODINGS / "b3p" / "beta0.2.npy"),
    "xe-b3p-beta0.3": np.load(PATH_ENCODINGS / "b3p" / "beta0.3.npy"),
    "xe-b3p-beta0.4": np.load(PATH_ENCODINGS / "b3p" / "beta0.4.npy"),
    "xe-b3p-beta0.5": np.load(PATH_ENCODINGS / "b3p" / "beta0.5.npy"),
    "xe-b3p-beta0.6": np.load(PATH_ENCODINGS / "b3p" / "beta0.6.npy"),
    "xe-b3p-beta0.7": np.load(PATH_ENCODINGS / "b3p" / "beta0.7.npy"),
    "xe-b3p-beta0.8": np.load(PATH_ENCODINGS / "b3p" / "beta0.8.npy"),
    "xe-b3p-beta0.9": np.load(PATH_ENCODINGS / "b3p" / "beta0.9.npy"),
    
    # xe-mbm
    "xe-mbm-beta1.0": np.load(PATH_ENCODINGS / "mbm" / "beta1.0.npy"),
    "xe-mbm-beta2.0": np.load(PATH_ENCODINGS / "mbm" / "beta2.0.npy"),
    "xe-mbm-beta3.0": np.load(PATH_ENCODINGS / "mbm" / "beta3.0.npy"),
    "xe-mbm-beta4.0": np.load(PATH_ENCODINGS / "mbm" / "beta4.0.npy"),
    "xe-mbm-beta5.0": np.load(PATH_ENCODINGS / "mbm" / "beta5.0.npy"),
    "xe-mbm-beta10.0": np.load(PATH_ENCODINGS / "mbm" / "beta10.0.npy"),
    "xe-mbm-beta15.0": np.load(PATH_ENCODINGS / "mbm" / "beta15.0.npy"),
    "xe-mbm-beta20.0": np.load(PATH_ENCODINGS / "mbm" / "beta20.0.npy"),
    "xe-mbm-beta30.0": np.load(PATH_ENCODINGS / "mbm" / "beta30.0.npy"),

    # cd-barz-denzler
    "cd-barz-denzler": np.load(PATH_ENCODINGS / "barz-denzler.npy"),

}

# Metrics

Metrics are a scalar values that capture one aspect of model performance.
They are computed on test split of the dataset (or on validation split if test does not exists).

They can be compute by considering top k (k) predictions of the models at different hierarchy level (l).

- **error_rate*** number of misclassified samples over total samples. It account for the number of error (quantity)
- **hier_dist*** hierarchical distance are the predictions weighted by lca matrix over the number of total samples (quantity and quality of error).
- **hier_dist_mistake** hierarchical distance mistake are are the predictions weighted by lca matrix over the number of misclassified samples. It represent the severity of errors (quality).

In [None]:
def preds_labels(exp):
    # Load test results
    data = np.load(PATH_EXPERIMENTS / exp / "outputs_targets.npz")
    outputs, targets = data["outputs"], data["targets"]

    # For one-hot encoding targets are already class index (aka labels)
    # Convert back to one hot encoding to be consistent with others encondings
    if targets.shape[-1] == 1:
        labels = targets.squeeze().astype(int)
        targets = np.eye(HIERARCHY.shape[-1])[labels]

    # Select the encoder matrix
    encs = encodings[experiments_[exp]]

    # Normalize quantites
    outputs /= np.linalg.norm(outputs, axis=1, keepdims=True)
    targets /= np.linalg.norm(targets, axis=1, keepdims=True)
    encs /= np.linalg.norm(encs, axis=1, keepdims=True)

    # Calculate predictions and labels from outputs and targets
    preds = outputs @ encs.T
    labels = (targets @ encs.T).argmax(axis=-1)
    
    return preds, labels

In [None]:
def error_rate(exp: str, level: int = 0, k: int = 1):
    return utils.error_rate(*preds_labels(exp), HIERARCHY, level, k)

def hier_dist_mistake(exp: str, level: int = 0, k: int = 1):
    return utils.hier_dist_mistake(*preds_labels(exp), HIERARCHY, level, k)

def hier_dist(exp: str, level: int = 0, k: int = 1):
    return utils.hier_dist(*preds_labels(exp), HIERARCHY, level, k)

index =  pd.Index(
    data=[exp for exps in experiments.values() for exp in exps], 
    dtype=str, 
    name="experiments",
)

columns = pd.MultiIndex.from_product(
    iterables=[
        range(len(HIERARCHY)-1), 
        ["error_rate", "hier_dist_mistake", "hier_dist"],
    ],
    names=['level', 'metric'],
)

df = pd.DataFrame(index=index, columns=columns, dtype=float)

# Uncomment to compute metrics
# for lvl, metric in tqdm(df, total=len(columns)):
#     func = partial(globals()[metric], k=1, level=lvl)
#     df[(lvl, metric)] = df.index.map(func)
# df.to_pickle(PATH_RESULTS / f"{DATASET}.pkl")

df = pd.read_pickle(PATH_RESULTS / f"{DATASET}.pkl")

In [None]:
df

In [None]:
def mean(exp: str):
    print(exp)
    return df.loc[experiments[exp], :].mean(), df.loc[experiments[exp], :].std()

def std(exp: str, level: int = 0, k: int = 1):
    return utils.hier_dist_mistake(*preds_labels(exp), HIERARCHY, level, k)


index =  pd.Index(
    data=experiments.keys(), 
    dtype=str, 
    name="experiments",
)

columns = pd.MultiIndex.from_product(
    iterables=[
        range(len(HIERARCHY)-1), 
        ["error_rate", "hier_dist_mistake", "hier_dist"],
        ["mean", "std"],
    ],
    names=['level', 'metric', None],
)

DF = pd.DataFrame(index=index, columns=columns, dtype=float)

for idx, row in DF.iterrows():
    row.loc[pd.IndexSlice[:, :, "mean"]] = df.loc[experiments[idx], :].mean()
    row.loc[pd.IndexSlice[:, :, "std"]] = df.loc[experiments[idx], :].std()

DF.loc[:, pd.IndexSlice[:, :, "mean"]].style.background_gradient(axis=0, cmap="Greens_r")
#DF

# Quantity vs Quality: various models

In [None]:
EXPERIMENTS = {
    "xe-onehot": "One-hot + Cross Entropy",
    "xe-b3p-beta0.3": "Hier. b3p + Cross Entropy",
}

In [None]:
# Set those according to number of level in hierarchy
NROWS = 2
NCOLS = 3

plt.style.use('default')
figsize = (3 * NCOLS, 3 * NROWS)
fig, axs = plt.subplots(
    nrows=NROWS,
    ncols=NCOLS,
    figsize=figsize,
)

for lvl, ax in enumerate(axs.ravel()):
    for exp, label in EXPERIMENTS.items():
        ax.errorbar(
            DF.loc[exp, (lvl, "error_rate", "mean")],
            DF.loc[exp, (lvl, "hier_dist_mistake", "mean")],
            #xerr=DF.loc[exp, (lvl, "error_rate", "std")],
            #yerr=DF.loc[exp, (lvl, "hier_dist_mistake", "std")],
            fmt='o',
            label=label,
        )
    ax.set_title(f"Level {lvl}")

fig.tight_layout()

handles, labels = ax.get_legend_handles_labels()
fig.legend(
    loc='lower center',
    handles=handles,
    labels=labels,
    bbox_to_anchor=(0.5, -0.2),
    ncol=NCOLS,
)

plt.show()

# Quantity vs Quality: Perotti et. al (b3p)

In [None]:
EXPERIMENTS = df[df.index.str.contains('xe-b3p')]
HYPERPARAM = np.arange(0.1, 1, 0.1)

cmap = plt.get_cmap('viridis')
fig, ax = plt.subplots(figsize=(6, 4))

scatter = ax.scatter(
    EXPERIMENTS[0]['error_rate'],
    EXPERIMENTS[0]['hier_dist_mistake'],
    c=HYPERPARAM,
    cmap=cmap,
    marker='o',
)

# Add a colorbar to the plot
cbar = plt.colorbar(scatter, ax=ax, label=r'$\alpha$')

# Add labels and a title
ax.set_xlabel('Error Rate')
ax.set_ylabel('Hier Dist Mistake')
ax.set_title('b3p: hyper-param tradeoff')

# Show the plot
plt.show()

In [None]:
plt.style.use("figures/slides/metropolis.mplstyle")

lvl = 0
fig, ax = plt.subplots(nrows=1, ncols=1)

# Scatter plots
scatter1 = ax.errorbar(
    df.loc[experiments["xe-onehot"], pd.IndexSlice[lvl, ["error_rate"]]].mean(),
    df.loc[experiments["xe-onehot"], pd.IndexSlice[lvl, ["hier_dist_mistake"]]].mean(),
    xerr=df.loc[experiments["xe-onehot"], pd.IndexSlice[lvl, ["error_rate"]]].std(), 
    yerr=df.loc[experiments["xe-onehot"], pd.IndexSlice[lvl, ["hier_dist_mistake"]]].std(),
    fmt='o',
    label='One-hot enc.\nCross Entropy',
)
scatter2 = ax.errorbar(
    df.loc[experiments["xe-b3p-beta0.4"], pd.IndexSlice[lvl, ["error_rate"]]].mean(),
    df.loc[experiments["xe-b3p-beta0.4"], pd.IndexSlice[lvl, ["hier_dist_mistake"]]].mean(),
    xerr=df.loc[experiments["xe-b3p-beta0.4"], pd.IndexSlice[lvl, ["error_rate"]]].std(), 
    yerr=df.loc[experiments["xe-b3p-beta0.4"], pd.IndexSlice[lvl, ["hier_dist_mistake"]]].std(),
    fmt='o',
    label='Hierarchical enc.\nCross Entropy',
)
scatter3 = ax.errorbar(
    df.loc[experiments["cd-desc-pca-n_components100"], pd.IndexSlice[lvl, ["error_rate"]]].mean(),
    df.loc[experiments["cd-desc-pca-n_components100"], pd.IndexSlice[lvl, ["hier_dist_mistake"]]].mean(),
    xerr=df.loc[experiments["cd-desc-pca-n_components100"], pd.IndexSlice[lvl, ["error_rate"]]].std(), 
    yerr=df.loc[experiments["cd-desc-pca-n_components100"], pd.IndexSlice[lvl, ["hier_dist_mistake"]]].std(),
    fmt='o',
    label='Description enc.\nCosine Distance',
)


# Get the legend handles and labels
handles, labels = ax.get_legend_handles_labels()

# Customize the legend markers
new_handles = [
    plt.Line2D([], [], marker='o', markersize=7, linestyle='', color=scatter1.lines[0].get_color()),
    plt.Line2D([], [], marker='o', markersize=7, linestyle='', color=scatter2.lines[0].get_color()),
    plt.Line2D([], [], marker='o', markersize=7, linestyle='', color=scatter3.lines[0].get_color()),
]
ax.legend(
    loc='center left',
    handles=new_handles,
    labels=labels,
    bbox_to_anchor=(1, 0.5),
    labelspacing=1.5,
    fontsize=8,
)


ax.set_xlim(0.268, 0.2858) 
ax.set_xticks([0.270, 0.273, 0.276, 0.279, 0.282, 0.286])
ax.set_xticklabels(
    ["27.0", "27.3", "27.6", "27.9", "28.2", "Error \%"],
    va='top',
)

ax.set_ylim(2.24, 2.50) 
ax.set_yticks([2.26, 2.30, 2.34, 2.38, 2.42, 2.46, 2.50])
ax.set_yticklabels(["2.26", "2.30", "2.34", "2.38", "2.42", "2.46", "Hierarchical\ndist. mistake"])


plt.subplots_adjust(right=0.68) 
fig.tight_layout()

fig.savefig("figures/slides/CIFAR100/scatter.pgf")
plt.show()



In [None]:
def plot_2d_descriptions_encodings(level: int):

    assert 1 < level < 5
    
    COLOR_MAP = {
        2: [
            "#332288", "#6699cc", "#88ccee", "#117733", 
            "#999933", "#ddcc77", "#cc6677", "#882255",
        ],
        3: [
            "#332288", "#6699cc", "#ddcc77", "#cc6677",
        ],
        4: [
            "#6699cc", "#cc6677",
        ]
    }
        
    CLASSES = {
        2 : [
            "Flora", "Fishes \& Aq. Mammals", "Terrestrial Mammals",
            "Household Items", "Non-Mammals", "Vehicles", 
            "Buildings", "Natural Landscapes",
        ],
        3 : [
            "Flora", "Fauna", "Man-Made Objects", "Natural Landscapes",
        ],
        4 : [
            "Living Beings", "Non-Living Subjects",
        ]
    }
    
    labels = HIERARCHY[level]
    color = [COLOR_MAP[level][label] for label in labels]
    
    x, y = np.load("n_components2.npy").T
    
    fig, ax = plt.subplots(nrows=1, ncols=1)
    scatter = ax.scatter(x, y, c=color, s=20)
    
    handles = [
        plt.Line2D([], [], marker='o', markersize=7, linestyle='', color=color, label=label)
        for color, label in zip(COLOR_MAP[level], CLASSES[level])
    ]
    ax.legend(
        loc='center left',
        handles=handles,
        bbox_to_anchor=(1, 0.5),
        labelspacing=1.5,
        fontsize=8,
    )
    
    # Remove ticks and tick labels
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_xticklabels([])
    ax.set_yticklabels([])
    fig.tight_layout()

    
    plt.subplots_adjust(right=0.57) 


    
    # Save the figure
    fig.savefig(f"figures/slides/CIFAR100/encodings-level{level}.pgf")
    
    plt.show()


plot_2d_descriptions_encodings(level=2)
plot_2d_descriptions_encodings(level=3)
plot_2d_descriptions_encodings(level=4)

# Quantity vs Quality: Bertinetto et. al (mbm)

In [None]:
EXPERIMENTS = df[df.index.str.contains('xe-mbm')]
HYPERPARAM = [1, 2, 3, 4, 5, 10, 15, 20, 30]

cmap = plt.get_cmap('viridis')
fig, ax = plt.subplots(figsize=(6, 4))

scatter = ax.scatter(
    EXPERIMENTS[0]['error_rate'],
    EXPERIMENTS[0]['hier_dist_mistake'],
    c=HYPERPARAM,
    cmap=cmap,
    marker='o',
)

# Add a colorbar to the plot
cbar = plt.colorbar(scatter, ax=ax, label=r'$\alpha$')

# Add labels and a title
ax.set_xlabel('Error Rate')
ax.set_ylabel('Hier Dist Mistake')
ax.set_title('mbm: hyper-param tradeoff')

# Show the plot
plt.show()