In [None]:
from matplotlib.image import NonUniformImage
import matplotlib.pyplot as plt
from glob import glob
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib
import json
import re

sns.set_theme(style="darkgrid")
matplotlib.rcParams['figure.figsize'] = (20, 10)

%matplotlib inline

In [None]:
results_files = sorted(glob(f"results/*.json"))
all_results = []
layers = {}
for file in results_files:
    with open(file, "r") as f:
        results = json.load(f)
    model = re.search(r"results/(.*?).json", file).group(1)
    layers[model] = results.pop("layers")
    for task_name, problem in results.items():
        all_results.append({
            "task": task_name,
            "model": model,
            "model/task": f"{model}/{task_name}"}
            |
            {
                problem_name: np.array(values)
                for problem_name, values in problem.items()
            }
        )
df = pd.DataFrame(all_results)
df.head()

In [None]:
df["percent"] = df["outliers"] * 100

In [None]:
resnet_df = df[df["model"].str.match(r"resnet\d+")]
base_df = df[df["model"].str.match(r"[a-z0-9]+-base")]
large_df = df[df["model"].str.match(r"[a-z0-9]+-large")]

In [None]:
def plot_layer_chart(results: pd.DataFrame, column: str, title: str = None, split="task", transform=None):
    splits = results[split].unique()
    models = results["model"].unique()
    fig, axes = plt.subplots(len(models))
    data = []
    for pivot in splits:
        for model in models:
            sub_df = results[(results[split] == pivot) & (results["model"] == model)]
            for i, row in sub_df.iterrows():
                values = row[column]
                for layer, value in enumerate(values):
                    data.append({
                        split: pivot,
                        "model": model,
                        "layer": layer,
                        column: value
                    })
    data = pd.DataFrame(data)

    for i, model in enumerate(models):
        ax = axes[i] if len(models) > 1 else axes
        sns.barplot(data[data["model"] == model], x="layer", y=column, hue=split, ax=ax)
        if transform is not None:
            transform(ax)
        ax.set_title(model)
        ax.set_xlabel("")
    plt.suptitle(title)


In [None]:
def add_values(ax: plt.Axes):
    for bar, line in zip(ax.patches, ax.lines):
        x = bar.get_x()
        width = bar.get_width()
        height = bar.get_height()
        y = max(line.get_ydata())

        ax.text(x + width / 2., y + 10, f"{height:.{int(height < 10)}f}", ha="center", va="bottom", size=10)

In [None]:
plot_layer_chart(base_df, title="base", column="percent")
plot_layer_chart(large_df, title="large", column="percent")

In [None]:
plot_layer_chart(base_df, title="base", column="kurtosis")
plot_layer_chart(large_df, title="large", column="kurtosis")

In [None]:
def plot_kurtosis(results: pd.DataFrame, title: str = None):
    melted_results = pd.melt(results, id_vars=df.columns.difference(["kurtosis", "rotated-kurtosis"]), value_vars=["kurtosis", "rotated-kurtosis"], var_name="type", value_name="value")
    melted_results["kurtosis"] = melted_results["value"]
    plot_layer_chart(melted_results, title=title, column="value", split="type", transform=add_values)

In [None]:
plot_kurtosis(base_df, title="base")
plot_kurtosis(large_df, title="large")

In [None]:
plot_layer_chart(base_df, title="base", column="rms")
plot_layer_chart(large_df, title="large", column="rms")