In [None]:
from pathlib import Path
import itertools

# Data Analysis
import pandas as pd
import numpy as np

# Plotting
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pgf import FigureCanvasPgf
import scienceplots

TEXTWIDTH = 5.95114

plt.style.use(['science', 'scatter'])
matplotlib.backend_bases.register_backend('pdf', FigureCanvasPgf)
matplotlib.rcParams.update(
    {
        "pgf.texsystem": "pdflatex",
        "font.family": "serif",
        "text.usetex": True,
        "pgf.rcfonts": False,
    }
)
colors = ['#0C5DA5', '#00B945', '#FF9500', '#FF2C00', '#845B97', '#474747', '#9e9e9e']
markers = ['o', '^', 'v', '<', '>', 's', 'd']
linestyles = [' ', ' ', ' ', ' ', ' ', ' ', ' '] 

shifted_cycle = plt.cycler(
    color=colors * len(markers),
    ls=linestyles * len(markers),
    marker=itertools.chain(*(markers[i:] + markers[:i] for i, _ in enumerate(markers)))
)


PATH_ROOT = Path("..")
PATH_DATASETS = PATH_ROOT / "datasets"

## CIFAR100

In [None]:

DATASET = "CIFAR100"
PATH_DATASET = PATH_ROOT / "datasets" / DATASET

PATH_HIERARCHY = PATH_DATASET / "hierarchy" / "hierarchy.npy"
HIERARCHY = np.load(PATH_HIERARCHY)

PATH_UMAP = PATH_DATASET / "projections" / "umap"
PATH_PROJECTIONS = {
    "mbm":  PATH_UMAP / "mbm" / "beta5.0.npy", # r"MBM $\alpha \, 5.0$"
    "b3p": PATH_UMAP / "b3p" / "beta0.4.npy", # r"B3P $\beta \, 0.4$"
    "bd": PATH_UMAP / "barz-denzler.npy", # r"BD"
    "desc": PATH_UMAP / "desc-pca" / "ada" / "austen" / "d100.npy", #r"Desc. $d \, 100$"
}
PROJECTIONS = {
    name: np.load(path)
    for name, path in PATH_PROJECTIONS.items()
}

PATH_ENCODINGS ={
    "onehot": PATH_DATASET / "encodings" / "onehot.npy",
    "mbm": PATH_DATASET / "encodings" / "mbm" / "beta5.0.npy",
    "b3p": PATH_DATASET / "encodings" / "b3p" / "beta0.4.npy",
    "bd": PATH_DATASET / "encodings" / "barz-denzler.npy",
    "desc": PATH_DATASET / "encodings" / "desc-pca" / "ada" / "austen" / "d100.npy",
}
ENCODINGS = {
    name: np.load(path)
    for name, path in PATH_ENCODINGS.items()
}

PATH_CLASSES = PATH_DATASET/ "classes" / "classes.txt"
CLASSES = np.loadtxt(PATH_CLASSES, dtype=str)

### UMAP Projections

- We have to decide the hierarchical level to based the color on.

In [None]:
LEVEL = 2
LABELS = HIERARCHY[LEVEL]

for name, projection in PROJECTIONS.items():
    fig, ax = plt.subplots(figsize=(TEXTWIDTH * 0.23, TEXTWIDTH * 0.23))
    ax.set_prop_cycle(shifted_cycle)
    for cls in np.unique(LABELS):
        x, y = projection[LABELS == cls].T
        ax.plot(x, y, markersize=1)
    ax.tick_params(axis='both', which='both', bottom=False, top=False, left=False, right=False)
    ax.set_xticks([])
    ax.set_yticks([])

    fig.tight_layout()
    fig.savefig(PATH_ROOT / "notebooks" / "projections" / f'cifar100-{name}.pdf')
    print(name)
    plt.show()

### Similarity Matrices

In [None]:
idx = np.lexsort(HIERARCHY)
cmap = "Blues"# 'cividis'


for name, encoding in ENCODINGS.items():
    fig, ax = plt.subplots(figsize=(TEXTWIDTH * 0.49, TEXTWIDTH * 0.49))
    norm_encoding = encoding / np.linalg.norm(encoding, axis=1, keepdims=True)
    similarity =  norm_encoding @ norm_encoding.T
    sorted_similarity = similarity[idx, :][:, idx]


    ax.imshow(sorted_similarity, cmap=cmap)
    ax.set_xticks([])
    ax.set_yticks([])

    fig.tight_layout()
    fig.savefig(PATH_ROOT / "notebooks" / "similarities" / f'{DATASET}-{name}.pdf')
    print(name)
    plt.show()
    
    

## iNaturalist19

In [None]:
DATASET = "iNaturalist19"
PATH_DATASET = PATH_ROOT / "datasets" / DATASET

PATH_HIERARCHY = PATH_DATASET / "hierarchy" / "hierarchy.npy"
HIERARCHY = np.load(PATH_HIERARCHY)

PATH_UMAP = PATH_DATASET / "projections" / "umap"
PATH_PROJECTIONS = {
    "mbm":  PATH_UMAP / "mbm" / "beta5.0.npy", # Check for BUG # r"MBM $\alpha \, 5.0$"
    "b3p": PATH_UMAP / "b3p" / "beta0.5.npy", # r"B3P $\beta \, 0.5$"
    "bd": PATH_UMAP / "barz-denzler.npy", # r"BD"
    "desc": PATH_UMAP / "desc-pca" / "ada" / "austen" / "d300.npy", #r"Desc. $d \, 300$"
}
PROJECTIONS = {
    name: np.load(path)
    for name, path in PATH_PROJECTIONS.items()
}

PATH_ENCODINGS ={
    "onehot": PATH_DATASET / "encodings" / "onehot.npy",
    "mbm": PATH_DATASET / "encodings" / "mbm" / "beta5.0.npy",
    "b3p": PATH_DATASET / "encodings" / "b3p" / "beta0.5.npy",
    "bd": PATH_DATASET / "encodings" / "barz-denzler.npy",
    "desc": PATH_DATASET / "encodings" / "desc-pca" / "ada" / "austen" / "d300.npy",
}
ENCODINGS = {
    name: np.load(path)
    for name, path in PATH_ENCODINGS.items()
}

PATH_CLASSES = PATH_DATASET/ "classes" / "classes.txt"

with open(PATH_CLASSES, 'r') as file:
    CLASSES = np.array([line.strip() for line in file.readlines()])


### UMAP Projections

In [None]:
LEVEL = 4
LABELS = HIERARCHY[LEVEL]

for name, projection in PROJECTIONS.items():
    fig, ax = plt.subplots(figsize=(TEXTWIDTH * 0.49, TEXTWIDTH * 0.49))
    ax.set_prop_cycle(shifted_cycle)
    for cls in np.unique(LABELS):
        x, y = projection[LABELS == cls].T
        ax.plot(x, y, markersize=1)
    ax.tick_params(axis='both', which='both', bottom=False, top=False, left=False, right=False)
    ax.set_xticks([])
    ax.set_yticks([])

    fig.tight_layout()
    fig.savefig(PATH_ROOT / "notebooks" / "projections" / f'{DATASET}-{name}.pdf')
    print(name)
    plt.show()

In [None]:
idx = np.lexsort(HIERARCHY)
cmap = "Blues"# 'cividis'


for name, encoding in ENCODINGS.items():
    fig, ax = plt.subplots(figsize=(TEXTWIDTH * 0.49, TEXTWIDTH * 0.49))
    norm_encoding = encoding / np.linalg.norm(encoding, axis=1, keepdims=True)
    similarity =  norm_encoding @ norm_encoding.T
    sorted_similarity = similarity[idx, :][:, idx]


    ax.imshow(sorted_similarity, cmap=cmap)
    ax.set_xticks([])
    ax.set_yticks([])

    fig.tight_layout()
    fig.savefig(PATH_ROOT / "notebooks" / "similarities" / f'{DATASET}-{name}.pdf')
    if name == "b3p":
        print("b3p, sembra onehot ma in realtà i valori sono != 0 e != 1.")
        print("nonstante la similarità con onehot a prima vista, il training produce risultati diversi.")
    else:
        print(name)

    plt.show()
    
    