In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib

from glob import glob
import functools
import json
import re

sns.set_theme(style="darkgrid", context="notebook", palette=sns.color_palette("rocket", 4))
matplotlib.rcParams['figure.figsize'] = (20, 10)

%matplotlib inline

In [None]:
results_files = sorted(glob(f"results/*.json"))
all_results = []
layers = {}
for file in results_files:
    with open(file, "r") as f:
        results = json.load(f)
    model = re.search(r"results/(.*?).json", file).group(1)
    layers[model] = results.pop("layers")
    for task_name, problem in results.items():
        all_results.append({
            "task": task_name,
            "model": model,
            "model/task": f"{model}/{task_name}"}
            |
            {
                problem_name: np.array(values)
                for problem_name, values in problem.items()
            }
        )
df = pd.DataFrame(all_results)
df.head()

In [None]:
df["percent"] = df["outliers"] * 100

In [None]:
resnet_df = df[df["model"].str.match(r"resnet\d+$")]
resnet_df = resnet_df.iloc[[2,3,4,0,1]]
base_df = df[df["model"].str.match(r"[a-z0-9]+-base")]
large_df = df[df["model"].str.match(r"[a-z0-9]+-large")]
rotated_df = df[df["model"].str.match(r"([a-z0-9]+-rotated)|(gptr2-.*)")]

In [None]:
compose = lambda *F: functools.reduce(lambda f, g: lambda x: f(g(x)), F)

In [None]:
def plot_layer_chart(results: pd.DataFrame, column: str, title: str = None, split="task", transform=None):
    splits = results[split].unique()
    models = results["model"].unique()
    fig, axes = plt.subplots(len(models))
    fig.tight_layout(rect=[0, 0.03, 1, 0.95])
    data = []
    for pivot in splits:
        for model in models:
            sub_df = results[(results[split] == pivot) & (results["model"] == model)]
            for i, row in sub_df.iterrows():
                values = row[column]
                for layer, value in enumerate(values):
                    data.append({
                        split: pivot,
                        "model": model,
                        "layer": layer,
                        column: value
                    })
    data = pd.DataFrame(data)
    previous_tasks = None

    for i, model in enumerate(models):
        ax = axes[i] if len(models) > 1 else axes
        sns.barplot(data[data["model"] == model], x="layer", y=column, hue=split, ax=ax)
        tasks = data.loc[data["model"] == model,split].unique()
        if set(tasks) == previous_tasks:
            ax.get_legend().remove()
        else:
            previous_tasks = set(tasks)
        if transform is not None:
            transform(ax)
        if not all(layer[1] == 0 for layer in layers[model]):
            for i, layer in enumerate(layers[model]):
                if layer[1] == 0:
                    ax.axvline(i - 0.5, ls="--", color=sns.color_palette()[-2])
        ax.set_title(model)
        ax.set_xlabel("")
    plt.suptitle(title)


In [None]:
def add_values(ax: plt.Axes) -> plt.Axes:
    max_height = max([bar.get_height() for bar in ax.patches])
    for bar, line in zip(ax.patches, ax.lines):
        x = bar.get_x()
        width = bar.get_width()
        height = bar.get_height()
        y = max(line.get_ydata())
        if np.isnan(y):
            y = bar.get_height()
        ax.text(x + width / 2., y + max_height / 50, f"{height:.{int(height < 10)}f}", ha="center", va="bottom", size=min(int(500 / len(ax.patches)), 12))
    return ax

In [None]:
def draw_horizontal_line(ax: plt.Axes, y: float) -> plt.Axes:
    ax.axhline(y=y, color=sns.color_palette()[-2], linestyle="--", linewidth=1)
    return ax

In [None]:
plot_layer_chart(base_df, title="Small LLMs", column="percent")
plot_layer_chart(large_df, title="Large LLMs", column="percent")
plot_layer_chart(resnet_df, title="ResNets", column="percent")
plot_layer_chart(rotated_df, title="Rotationally Invariant Models", column="percent")

In [None]:
kurtosis_line = functools.partial(draw_horizontal_line, y=3)
plot_layer_chart(base_df, title="Small LLMs", column="kurtosis", transform=kurtosis_line)
plot_layer_chart(large_df, title="Large LLMs", column="kurtosis", transform=kurtosis_line)
plot_layer_chart(resnet_df, title="ResNets", column="kurtosis", transform=kurtosis_line)
plot_layer_chart(rotated_df, title="Rotationally Invariant Models", column="kurtosis", transform=kurtosis_line)

In [None]:
def plot_kurtosis(results: pd.DataFrame, title: str = None):
    melted_results = pd.melt(results, id_vars=df.columns.difference(["kurtosis", "rotated-kurtosis"]), value_vars=["kurtosis", "rotated-kurtosis"], var_name="type", value_name="value")
    melted_results["kurtosis"] = melted_results["value"]
    plot_layer_chart(melted_results, title=title, column="value", split="type", transform=compose(add_values, functools.partial(draw_horizontal_line, y=3)))

In [None]:
plot_kurtosis(base_df, title="Small LLMs")
plot_kurtosis(large_df, title="Large LLMs")
plot_kurtosis(resnet_df, title="ResNets")
plot_kurtosis(rotated_df, title="Rotationally Invariant Models")

In [None]:
plot_layer_chart(base_df, title="Small LLMs", column="rms")
plot_layer_chart(large_df, title="Large LLMs", column="rms")
plot_layer_chart(resnet_df, title="ResNets", column="rms")
plot_layer_chart(rotated_df, title="Rotationally Invariant Models", column="rms")

In [None]:
def plot_accuracy(results: pd.DataFrame, title: str = None, num_classes: int = 10):
    melted_results = pd.melt(results, id_vars=df.columns.difference(["accuracy", "reversed-accuracy"]), value_vars=["accuracy", "reversed-accuracy"], var_name="direction", value_name="score")
    melted_results = melted_results.dropna(subset=["score"])
    melted_results["accuracy"] = melted_results["score"].apply(lambda scores: [score * 100 for score in scores])
    plot_layer_chart(melted_results, title=title, column="accuracy", split="direction", transform=compose(functools.partial(draw_horizontal_line, y=100 / num_classes), add_values))

In [None]:
def reverse(series: pd.Series) -> pd.Series:
    return series.map(compose(list, reversed), na_action="ignore")

In [None]:
base_df.loc[:, "accuracy"] = base_df["accuracy"]
base_df.loc[:, "reversed-accuracy"] = reverse(base_df["reversed-accuracy"])
plot_accuracy(base_df, title="Small LLMs", num_classes=3)

large_df.loc[:, "accuracy"] = large_df["accuracy"]
large_df.loc[:, "reversed-accuracy"] = reverse(large_df["reversed-accuracy"])
plot_accuracy(large_df, title="Large LLMs", num_classes=3)

resnet_df.loc[:, "accuracy"] = resnet_df["top1"]
resnet_df.loc[:, "reversed-accuracy"] = resnet_df["reversed-top1"]
plot_accuracy(resnet_df, title="ResNets", num_classes=1000)

resnet_df.loc[:, "accuracy"] = resnet_df["top5"]
resnet_df.loc[:, "reversed-accuracy"] = resnet_df["reversed-top5"]
plot_accuracy(resnet_df, title="ResNets", num_classes=200)

rotated_resnet_df = rotated_df[rotated_df["model"].str.match(r"resnet\d+")]
rotated_resnet_df.loc[:,"accuracy"] = rotated_resnet_df["top5"]
rotated_resnet_df.loc[:,"reversed-accuracy"] = rotated_resnet_df["reversed-top5"]
plot_accuracy(rotated_resnet_df, title="Rotationally Invariant ResNets", num_classes=200)