In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
from utils import load_df

df = load_df("src/summary").T

[        utils.py:160] INFO: NumExpr defaulting to 8 threads.
[           pd.py:53 ] INFO: Loaded df from /Users/szhong/Downloads/bpf-study/output/src/summary.pkl


In [9]:
from dataclasses import dataclass

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

from depsurf import DepKind, Versions, IssueEnum
from utils import (
    bold,
    get_legend_handles_labels,
    get_text_height,
    plot_yticks,
    save_fig,
    label_multiline_text,
    plot_bar,
)


group_labels = {
    Versions.LTS: "Kernel Versions w/ LTS (2 yr)",
    Versions.REGULAR: "Kernel Versions w/ Regular Releases (6 mo)",
    Versions.REV: f"Revisions for Kernel {bold(5.4)}",
}


@dataclass
class SubfigPlotter:
    ax: plt.Axes
    df: pd.DataFrame
    group: Versions
    kind: DepKind
    show_xlabels: bool
    fontsize: int = 8

    @property
    def columns(self):
        return self.df.columns.drop(
            [IssueEnum.CHANGE, IssueEnum.ADD, IssueEnum.REMOVE, "Old", "New"]
        )

    @property
    def num_bars(self):
        return len(self.df.index)

    def plot(self):
        self.plot_bar()
        self.plot_xticks()
        plot_yticks(self.ax)
        self.ax.tick_params(axis="both", labelsize=9)
        with plt.rc_context(
            {
                "font.family": "monospace",
                "font.size": self.fontsize,
                "font.stretch": "condensed",
            }
        ):
            # self.plot_val_labels()
            self.plot_top_labels()

    def plot_bar(self, x_pad=0.5):
        plot_bar(self.ax, self.df, self.columns)
        bottom = self.df[self.columns].sum(axis=1)
        self.ax.set_xlim(-x_pad, self.num_bars - 1 + x_pad + 0.01)
        self.ax.set_ylim(0, bottom.max() * 1.25)

    def plot_xticks(self):
        xs = np.arange(self.num_bars + 1) - 0.5
        self.ax.set_xticks(xs, self.group.version_labels)

        if self.group == Versions.REGULAR:
            for x, v in zip(xs, self.group):
                if v.lts:
                    self.ax.axvline(x, color="gray", linestyle="--", linewidth=1)

        if not self.show_xlabels:
            self.ax.set_xticklabels([])
        else:
            self.ax.set_xlabel(group_labels[self.group])

    def plot_val_labels(self):
        text_height = get_text_height(self.ax)

        def format_val(val):
            if val > 1000:
                return f"{val / 1000:.2g}k"
            return str(val)

        trans = self.ax.transData
        bottom = np.zeros(self.num_bars)
        for col in self.columns:
            for i, v in enumerate(self.df[col]):
                bar_height = (trans.transform((0, v)) - trans.transform((0, 0)))[1]
                if bar_height < text_height * 0.9:
                    continue
                h = v / 2 + bottom[i]
                self.ax.text(i, h, format_val(v), ha="center", va="center")
            bottom += self.df[col].to_numpy()

    def plot_top_labels(self):
        def format_k(val, num_digits=3):
            if val > 1000:
                return f"{val // 1000:{num_digits-1}d}k"
            return f"{val:{num_digits}d}"

        ymax = self.ax.get_ylim()[1]
        for i, (index, row) in enumerate(self.df.iterrows()):
            if self.group == Versions.REV:
                format_val = lambda v: f"{v:3.0f}"
            else:
                format_val = lambda v: f"{v / row['Old']:3.0%}"
            lines = [
                f"${k}${format_val(v)}" if v != 0 else ""
                for k, v in [
                    (r"\Delta", row[IssueEnum.CHANGE]),
                    (r"\minus", row[IssueEnum.REMOVE]),
                    (r"\plus", row[IssueEnum.ADD]),
                ]
            ]

            colors = ["darkgreen", "xkcd:dark orange", "blue"]

            lines.insert(0, f"$\\Sigma${format_k(row['Old'])}")
            colors.insert(0, "black")

            label_multiline_text(
                self.ax, i, ymax, lines, colors, fontsize=self.fontsize
            )



kinds = [DepKind.FUNC, DepKind.STRUCT, DepKind.TRACEPOINT]
kinds = [DepKind.LSM]
fig, axs = plt.subplots(
    len(kinds),
    3,
    figsize=(12, len(kinds) * 3.3),
    width_ratios=[4, 16, 5],
    gridspec_kw={"wspace": 0.15, "hspace": 0.075},
    squeeze=False,
)

for row_idx, (ax_rows, kind) in enumerate(zip(axs, kinds)):
    for col_idx, (ax, group) in enumerate(zip(ax_rows, group_labels.keys())):
        df_group = df[kind.value].loc[group]
        SubfigPlotter(
            ax=ax,
            df=df_group,
            kind=kind,
            group=group,
            show_xlabels=row_idx == len(axs) - 1,
        ).plot()

        if col_idx == 0:
            ax.set_ylabel(f"Number of {bold(kind.capitalize())} Changes")

handles, labels = ax[0].get_legend_handles_labels()
fig.legend(
    handles,
    labels,
    loc="upper center",
    ncol=5,
    bbox_to_anchor=(0.5, 1), # 0.9375),
    frameon=False,
)
save_fig(fig, "lsm")

[          mpl.py:79 ] INFO: Saved figure to /Users/szhong/Downloads/bpf-study/paper/figs/lsm.pdf
