In [None]:
import json
from typing import Any, Dict
from matplotlib import pyplot
from matplotlib.axes._axes import Axes  # For typing purposes
import numpy as np
import pyperf
from scipy import stats

In [None]:
def get_data(result: Dict[str, Any]) -> Dict[str, np.ndarray]:
    """Parse data"""
    results = {}

    for benchmark in result["benchmarks"]:
        if "metadata" in benchmark:
            name = benchmark["metadata"]["name"]
        else:
            name = result["metadata"]["name"]
        data = []
        for run in benchmark["runs"]:
            data.extend(run.get("values", []))
        results[name] = np.array(data, dtype=np.float64)
        results[name].sort()

    return results

In [None]:
# Load data
run = "run4"

file1 = "python3-11-3-low-01"
file2 = "python3-11-3-low-02"

file1_path = f"results/{run}/bm-{file1}.json"
file2_path = f"results/{run}/bm-{file2}.json"

with open(file1_path) as fb, open(file2_path) as fh:
    run1 = get_data(json.load(fb))
    run2 = get_data(json.load(fh))

In [None]:
# Make sure that benchmarks match
if run1.keys() != run2.keys():
    raise Exception("The benchmarking suites are not the same size")

In [None]:
# Clean data
remove = []

for name in run1.keys():
    sig, _ = pyperf._utils.is_significant(run1[name], run2[name])

    # if not sig:
    #     remove.append(name)
    # else:
        # Remove outliers
    run1[name] = run1[name][:45]
    run2[name] = run2[name][:45]

# for name in remove:
#     del head[name]
#     del head[name]

In [None]:
def generate_dist_plot(ax: Axes, base: np.ndarray, head: np.ndarray) -> None:
    # The density set to `True` makes the integral of the histogram 1.
    ax.hist(base, alpha=0.5, label='Run 1', density=True)
    ax.hist(head, alpha=0.5, label='Run 2', density=True)

    ax.xaxis.set_major_formatter(lambda val, _: f"{val:.04f}s")


    x1 = np.linspace(base.mean() - 3 * base.std(), base.mean() + 3 * base.std(), 100)
    y1 = stats.norm.pdf(x1, base.mean(), base.std())
    ax.plot(x1, y1, label='Run 1 (Normal Dist.)', color="blue")

    x2 = np.linspace(head.mean() - 3 * head.std(), head.mean() + 3 * head.std(), 100)
    y2 = stats.norm.pdf(x2, head.mean(), head.std())
    ax.plot(x2, y2, label='Run 2 (Normal Dist.)', color="orange")

    ax.legend()


In [None]:
def generate_table(ax: Axes, base: np.ndarray, head: np.ndarray) -> None:
    c_labels = ["Run 1", "Run 2"]
    r_labels = [
        "Mean", 
        "Variance", 
        "Skewness", 
        "Variation", 
        "Minimum", 
        "Maximum",
    ]

    data = [
        [base.mean(), head.mean()],  # Empirical mean
        [base.var(), head.var()],  # Variance
        [stats.skew(base), stats.skew(head)],  # Sample Skewness
        [stats.variation(base), stats.variation(head)],  # Coefficient of variation
        [base.min(), head.min()],  # Minimum
        [base.max(), head.max()],  # Maximum
    ]
    # data = [[round(b, 4), round(h, 4)] for b, h in data]  # Round to 4 decimal points
    data = [[f"{b:4.1g}", f"{h:4.1g}"] for b, h in data]  # Round to 4 decimal points

    table = ax.table(cellText=data, loc='center', cellLoc='center', rowLabels=r_labels, colLabels=c_labels)
    table.set_fontsize(11)

    _, p_val = stats.ttest_ind(base, head)  # P-value
    p_val = f"{p_val:4.1g}"

    # I think matplotlib enforces the same number of cells in each row.
    # Therefore i use two cells and remove one border.
    table.add_cell(7, 0, text=p_val, loc='center', width=0.5, height=0.0981241).visible_edges = 'BTL'
    table.add_cell(7, 1, text='', width=0.5, height=0.0981241).visible_edges = 'BRT'
    table.add_cell(7, -1, text='P-value', width=0.5, height=0.0981241, loc='left')

    
    ax.axis("off")

In [None]:
debug = True

if debug:
    fig, axs = pyplot.subplots(3, 2, figsize=(10, 2.5 * 3), layout="constrained", width_ratios=[.7, .3])
else:
    fig, axs = pyplot.subplots(len(run1), 2, figsize=(10, 2.5 * len(run1)), layout="constrained", width_ratios=[.7, .3])

fig.suptitle(f"Comparision {file1} / {file2}", fontsize=16)

# Sort by p-value
pairs = []
for name in run1.keys():
    _, p_val = stats.ttest_ind(run1[name], run2[name])
    pairs.append((name, p_val))
pairs = sorted(pairs, key=lambda x: x[1])

ax_counter = 0
for name, _ in pairs:
    axs[ax_counter][0].set_title(name)
    generate_dist_plot(axs[ax_counter][0], run1[name], run2[name])
    
    generate_table(axs[ax_counter][1], run1[name], run2[name])
    ax_counter += 1

    if debug and ax_counter == 3:
        break


pyplot.show()
