In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np

from IPython.display import display

In [30]:
def read_data():
    df = pd.read_pickle("output/diff.pkl")
    df = df.loc[(df != 0).any(axis=1)]
    return df


# read_data().loc["func"].T

In [None]:
from depsurf import BuildVersion

df = read_data()


def plot_legend(fig: plt.Figure):
    labels_handles = {
        label: handle
        for ax in fig.axes
        for handle, label in zip(*ax.get_legend_handles_labels())
    }.items()
    labels, handles = zip(*labels_handles)

    fig.legend(
        handles,
        labels,
        loc="upper center",
        ncol=len(labels),
        bbox_to_anchor=(0.5, 1),
        handlelength=0.75,
        handletextpad=0.75,
        columnspacing=0.75,
        labelspacing=0.75,
    )


kind = "tp"

if kind == "tp":
    df1 = df.loc["tp_func"]
    df2 = df.loc["tp_event"]
    df2 = df2.drop(index=["Added", "Removed"])
    df = pd.concat([df1, df2], axis=0)
    df_old = df1.loc["Old"]
    # keep one column for added and removed
    # df = df.groupby(df.index).first()

else:
    df = df.loc[kind]
    df_old = df.loc["Old"]

df = df.drop(index=["Old", "New", "Common", "Changed"])
df = df.T

fig, (ax1, ax2, ax3) = plt.subplots(
    1, 3, figsize=(12, 5), dpi=200, width_ratios=[1, 4, 1.25], sharey=False
)

import matplotlib as mpl

cmap = mpl.cm.ScalarMappable(
    cmap=mpl.cm.Greens, norm=mpl.colors.Normalize(vmin=0, vmax=1)
)
cmap2 = mpl.cm.ScalarMappable(
    cmap=mpl.cm.Purples, norm=mpl.colors.Normalize(vmin=0, vmax=1)
)


color_map = {
    "Param added": cmap.to_rgba(0.3),
    "Param removed": cmap.to_rgba(0.5),
    "Param type changed": cmap.to_rgba(0.7),
    "Param reordered": cmap.to_rgba(0.9),
    "Field added": cmap2.to_rgba(0.3),
    "Field removed": cmap2.to_rgba(0.5),
    "Field type changed": cmap2.to_rgba(0.7),
    # "Added": "tab:green",
    # "Removed": "tab:red",
}


def plot_xticks(ax: plt.Axes, raw_labels, label_formatter):
    def parse_version(col):
        return list(map(BuildVersion.from_str, col.split("_")))

    def flatten_pairs(pairs):
        fst, snd = list(zip(*pairs))
        assert fst[1:] == snd[:-1]
        return fst + (snd[-1],)

    pairs = raw_labels.map(parse_version).to_list()
    versions = flatten_pairs(pairs)
    label_names = map(label_formatter, versions)
    xs = np.arange(len(versions)) - 0.5
    ax.set_xticks(xs, label_names, rotation=0)


def plot_subfig(ax: plt.Axes, df: pd.DataFrame, xlabel, label_formatter=None):
    bottom = np.zeros(len(df.index))
    xs = np.arange(len(df.index))
    for col in df.columns:
        if kind == "tp":
            color = color_map.get(col)
        ax.bar(xs, df[col], label=col, bottom=bottom, color=color)
        bottom += df[col].to_numpy()

    ax.autoscale_view()

    trans = ax.transData
    bottom = np.zeros(len(df.index))
    for col in df.columns:
        for i, v in enumerate(df[col]):
            text = ax.text(
                i, v / 2 + bottom[i], v, ha="center", va="center", fontsize=8
            )
            bar_height = (trans.transform((0, v)) - trans.transform((0, 0)))[1]
            text_height = text.get_window_extent().height
            if text_height > bar_height:
                text.remove()
        bottom += df[col].to_numpy()

    # explicitly add 0 labels
    for i, b in enumerate(bottom):
        if b == 0:
            ax.text(i, 0, "0", ha="center", va="bottom", fontsize=8)

    plot_xticks(ax, df.index, label_formatter)
    ax.set_xlabel(xlabel)


def bold(text):
    return f"$\\mathbf{{{text}}}$"


def label_formatter(v):
    return bold(v.short_version) if v.is_lts else v.short_version


plot_subfig(ax1, df.loc["lts"], "Versions w/ LTS (2 yr)", label_formatter)
plot_subfig(ax2, df.loc["all"], "Versions w/ Regular Releases (6 mo)", label_formatter)
plot_subfig(
    ax3, df.loc["rev"], f"Revisions in Kernel {bold(5.4)}", lambda v: v.revision
)

plot_legend(fig)

fig.suptitle(f"{kind.capitalize()} Changes by Kernel Version", y=1.03)
fig.supylabel("Number of Changes")

plt.tight_layout()

# display(df)

In [None]:
def dump_latex(df, path):
    df = pd.read_pickle("output/diff.pkl")
    df = df.astype(object)
    kinds = ["struct", "func", "tp_func", "lsm"]
    df = df.loc[kinds]

    for kind in kinds:
        for row in df.loc[kind].index:
            if row == "Old":
                continue
            for col in df.columns:
                val = df.loc[(kind, row), col]
                percentage = val / df.loc[(kind, "Old"), col] * 100
                df.loc[(kind, row), col] = f"\\val{{{val}}}{{{percentage:4.1f}}}"

    df = df.drop(index=["Old", "New", "Common"], level=1)

    df.to_latex("output/diff.tex")

    return df


dump_latex()