In [2]:
%reload_ext autoreload
%autoreload 2

In [3]:
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np

from IPython.display import display

In [79]:
from depsurf import BuildVersion


def read_data():
    df = pd.read_pickle("output/diff.pkl")
    df = df.loc[(df != 0).any(axis=1)]
    return df


# read_data().loc["func"].T

In [None]:
df = read_data()

kind = "tp"

if kind == "tp":
    df1 = df.loc["tp_func"]
    df2 = df.loc["tp_event"]
    df2 = df2.drop(index=["Added", "Removed"])
    df = pd.concat([df1, df2], axis=0)
    # keep one column for added and removed
    # df = df.groupby(df.index).first()

else:
    df = df.loc[kind]
    # df_old = df.loc["Old"]

df = df.drop(index=["Old", "New", "Common", "Changed"])
df.index = df.index.str.removeprefix("- ")
df = df.T

fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(12, 5), dpi=200, width_ratios=[1, 4, 1.25], sharey=False)

import matplotlib as mpl
cmap = mpl.cm.ScalarMappable(cmap=mpl.cm.Greens, norm=mpl.colors.Normalize(vmin=0, vmax=1))
cmap2 = mpl.cm.ScalarMappable(cmap=mpl.cm.Purples, norm=mpl.colors.Normalize(vmin=0, vmax=1))

def parse_version(col):
    x, y = col.split("|")
    v1 = BuildVersion.from_str(x)
    v2 = BuildVersion.from_str(y)
    return (v1, v2)

def plot_subfig(ax, df):
    bottom = np.zeros(len(df.index))
    xs = np.arange(len(df.index))
    for col in df.columns:
        if kind == "tp":
            color = {
                "Param added": cmap.to_rgba(0.3),
                "Param removed": cmap.to_rgba(0.5),
                "Param type changed": cmap.to_rgba(0.7),
                "Param reordered": cmap.to_rgba(0.9),
                "Field added": cmap2.to_rgba(0.3),
                "Field removed": cmap2.to_rgba(0.5),
                "Field type changed": cmap2.to_rgba(0.7),
                # "Added": "tab:green",
                # "Removed": "tab:red",
            }.get(col)
        df_col = df[col]
        ax.bar(xs, df_col, label=col, bottom=bottom, color=color)
        # for i, v in enumerate(df_col):
            # old = df_old.iloc[i]
            # p = f"{v / old * 100:.2f}%"
            # ax.text(i, v / 2 + bottom[i], f"{v}", ha="center", va="center", fontsize=8)
        bottom += df_col.to_numpy()

    pairs = [pair for pair in df.index.map(parse_version).to_list()]
    versions = [fst for fst, _ in pairs] + [pairs[-1][-1]]
    same_version = all(versions[0].version == v.version for v in versions)
    if same_version:
        label_names = [v.revision for v in versions]
    else:
        label_names = [v.short_version for v in versions]
    xs = np.arange(len(df.index) + 1) - 0.5
    ax.set_xticks(xs, label_names, rotation=0)
    if not same_version:
        for label, version in zip(ax.get_xticklabels(), versions):
            if version.is_lts:
                label.set_fontweight("bold")


plot_subfig(ax1, df.loc["lts"])
ax1.set_xlabel("Versions w/ LTS (2 yr)")
plot_subfig(ax2, df.loc["all"])
ax2.set_xlabel("Versions w/ Regular Releases (6 mo)")
plot_subfig(ax3, df.loc["revision"])
ax3.set_xlabel("Revisions in Kernel 5.4")


handles, labels = ax1.get_legend_handles_labels()
fig.legend(handles, labels, loc="upper center", ncol=len(labels), bbox_to_anchor=(0.5, 1),
        # make the spacing between the legend items smaller
        handlelength=0.75, 
        handletextpad=0.75,
        columnspacing=0.75,
        labelspacing=0.75)

fig.suptitle(f"{kind.capitalize()} Changes by Kernel Version", y=1.03)
# fig.supxlabel("Linux Version")
fig.supylabel("Number of Changes")

plt.tight_layout()

# display(df)

In [None]:
df = pd.read_pickle("output/diff.pkl")
df = df.astype(object)

kinds = ["struct", "func", "tp_func", "lsm"]
df = df.loc[kinds]

for kind in kinds:
    for row in df.loc[kind].index:
        if row == "Old":
            continue
        for col in df.columns:
            val = df.loc[(kind, row), col]
            percentage = val / df.loc[(kind, "Old"), col] * 100
            df.loc[(kind, row), col] = f"\\val{{{val}}}{{{percentage:4.1f}}}"

df = df.drop(index=["Old", "New", "Common"], level=1)

df.to_latex("output/diff.tex")

df