# Datasets Projection

- This notebook is used to produce plot the UMAP 2D plot of various encodings.
- These plots will be included in a LaTeX document.
- The library `matplotlib` is used as plotting library.
- The hyperparm in `MBM` encoding is refer to $\beta$ in the codebase but it's called $\alpha$ in the LaTeX document

In [None]:
from pathlib import Path
import itertools

# Data Analysis
import pandas as pd
import numpy as np

# Plotting
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pgf import FigureCanvasPgf
import scienceplots

TEXTWIDTH = 5.95114

plt.style.use(['science', 'scatter'])
matplotlib.backend_bases.register_backend('pdf', FigureCanvasPgf)
matplotlib.rcParams.update(
    {
        "pgf.texsystem": "pdflatex",
        "font.family": "serif",
        "text.usetex": True,
        "pgf.rcfonts": False,
    }
)
colors = ['#0C5DA5', '#00B945', '#FF9500', '#FF2C00', '#845B97', '#474747', '#9e9e9e']
markers = ['o', '^', 'v', '<', '>', 's', 'd']
linestyles = [' ', ' ', ' ', ' ', ' ', ' ', ' '] 

shifted_cycle = plt.cycler(
    color=colors * len(markers),
    ls=linestyles * len(markers),
    marker=itertools.chain(*(markers[i:] + markers[:i] for i, _ in enumerate(markers)))
)


PATH_ROOT = Path("..")
PATH_DATASETS = PATH_ROOT / "datasets"

## CIFAR100

In [None]:
DATASET = "CIFAR100"

PATH_DATASET = PATH_ROOT / "datasets" / DATASET
PATH_HIERARCHY = PATH_DATASET / "hierarchy" / "hierarchy.npy"
PATH_PROJECTIONS = PATH_DATASET / "projections" / "umap"
PATH_ENCODINGS = {
    "mbm":  PATH_PROJECTIONS / "mbm" / "beta5.0.npy", # r"MBM $\alpha \, 5.0$"
    "b3p": PATH_PROJECTIONS / "b3p" / "beta0.4.npy", # r"B3P $\beta \, 0.4$"
    "bd": PATH_PROJECTIONS / "barz-denzler.npy", # r"BD"
    "desc": PATH_PROJECTIONS / "desc-pca" / "ada" / "austen" / "d100.npy", #r"Desc. $d \, 100$"
}

HIERARCHY = np.load(PATH_HIERARCHY)
ENCODINGS = {
    name: np.load(path)
    for name, path in PATH_ENCODINGS.items()
}
LEVEL = 2
LABELS = HIERARCHY[LEVEL]

In [None]:
for name, encoding in ENCODINGS.items():
    fig, ax = plt.subplots(figsize=(TEXTWIDTH * 0.23, TEXTWIDTH * 0.23))
    ax.set_prop_cycle(shifted_cycle)
    for cls in np.unique(LABELS):
        x, y = encoding[LABELS == cls].T
        ax.plot(x, y, markersize=1)
    ax.tick_params(axis='both', which='both', bottom=False, top=False, left=False, right=False)
    ax.set_xticks([])
    ax.set_yticks([])

    fig.tight_layout()
    fig.savefig(f'cifar100-encodings-{name}.pdf')
    print(name)
    plt.show()

## iNaturalist19

In [None]:
DATASET = "iNaturalist19"

PATH_DATASET = PATH_ROOT / "datasets" / DATASET
PATH_HIERARCHY = PATH_DATASET / "hierarchy" / "hierarchy.npy"
PATH_PROJECTIONS = PATH_DATASET / "projections" / "umap"
PATH_ENCODINGS = {
    "mbm":  PATH_PROJECTIONS / "mbm" / "beta5.0.npy", # Check for BUG # r"MBM $\alpha \, 5.0$"
    "b3p": PATH_PROJECTIONS / "b3p" / "beta0.5.npy", # r"B3P $\beta \, 0.5$"
    "bd": PATH_PROJECTIONS / "barz-denzler.npy", # r"BD"
    "desc": PATH_PROJECTIONS / "desc-pca" / "ada" / "austen" / "d100.npy", #r"Desc. $d \, 100$"
}

HIERARCHY = np.load(PATH_HIERARCHY)
ENCODINGS = {
    name: np.load(path)
    for name, path in PATH_ENCODINGS.items()
}
LEVEL = 3
LABELS = HIERARCHY[LEVEL]

In [None]:
for name, encoding in ENCODINGS.items():
    fig, ax = plt.subplots(figsize=(TEXTWIDTH / 2, TEXTWIDTH / 2))
    ax.set_prop_cycle(shifted_cycle)
    for cls in np.unique(LABELS):
        x, y = encoding[LABELS == cls].T
        ax.plot(x, y, markersize=1)
    print(cls)
    ax.tick_params(axis='both', which='both', bottom=False, top=False, left=False, right=False)
    ax.set_xticks([])
    ax.set_yticks([])

    fig.tight_layout()
    fig.savefig(f'inaturalist19-encodings-{name}.pdf')
    print(name)
    plt.show()