In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
from typing import List, Tuple
from enum import StrEnum

from pathlib import Path
from depsurf import OUTPUT_PATH, Version, FuncEntry


class Functions:
    def __init__(self, data=None):
        self.data: List[Tuple[str, List[FuncEntry]]] = data if data is not None else []

    @property
    def num_funcs(self):
        return sum(len(group) for _, group in self.data)

    def add_group(self, name, group):
        self.data.append((name, group))

    def print_groups(self, file=None):
        print(f"{len(self.data)} groups, {self.num_funcs} functions", file=file)
        for name, group in sorted(self.data, key=lambda x: len(x[1]), reverse=True):
            print(f"{name} ({len(group)})", file=file)
            for func in group:
                if func.external:
                    print(f"  {func.file} (external)", file=file)
                else:
                    print(f"  {func.file}", file=file)

    def save_result(self, path: Path):
        path.parent.mkdir(parents=True, exist_ok=True)
        with open(path, "w") as f:
            self.print_groups(f)

    @classmethod
    def from_jsonl(cls, path):
        result = {}
        with open(path, "r") as f:
            for line in f:
                func = FuncEntry.from_json(line)
                result.setdefault(func.name, []).append(func)

        return cls(result.items())


class CollisionType(StrEnum):
    UNIQUE = "Unique"
    HEADER = "Header Duplication"
    STATIC = "Static-Static Collision"
    MIXED = "Static-Global Collision"


def count_dup(v: Version):
    print(f"Processing {v.name}")
    functions = Functions.from_jsonl(v.funcs_path)
    results = {c: Functions() for c in CollisionType}

    for name, group in functions.data:
        if len(group) == 1:
            results[CollisionType.UNIQUE].add_group(name, group)
            continue

        # Those are the functions that are declared in single header
        # but defined in multiple files
        if len({func.decl_loc for func in group}) == 1:
            results[CollisionType.HEADER].add_group(name, group)
            continue

        # External functions have name collision with static functions
        if any(func.external for func in group):
            results[CollisionType.MIXED].add_group(name, group)
            continue

        # Static functions have name collision with other static functions
        results[CollisionType.STATIC].add_group(name, group)

    for c in [CollisionType.HEADER, CollisionType.STATIC, CollisionType.MIXED]:
        results[c].save_result(OUTPUT_PATH / "dup" / v.name / f"{c.name.lower()}.txt")

    return {k: v.num_funcs for k, v in results.items()}


# count_dup(VERSION_54)

In [3]:
from enum import StrEnum


class Group(StrEnum):
    REG = "reg"
    ARCH = "arch"
    FLAVOR = "flavor"

    @property
    def versions(self):
        from depsurf import VERSIONS_REGULAR, VERSIONS_ARCH, VERSIONS_FLAVOR

        return {
            Group.REG: VERSIONS_REGULAR,
            Group.ARCH: VERSIONS_ARCH,
            Group.FLAVOR: VERSIONS_FLAVOR,
        }[self]

    @property
    def version_str(self):
        from depsurf.output import bold

        return {
            Group.REG: lambda v: bold(v.short_version) if v.lts else v.short_version,
            Group.ARCH: lambda v: v.arch_name,
            Group.FLAVOR: lambda v: v.flavor_name,
        }[self]

    @property
    def caption(self):
        return {
            Group.REG: "Kernel Version",
            Group.ARCH: "Arch for 5.4",
            Group.FLAVOR: "Flavor for 5.4",
        }[self]

In [4]:
results = {}
for group in Group:
    for v in group.versions:
        results[(group, v)] = count_dup(v)

Processing 4.4.0-21-generic-amd64
Processing 4.8.0-22-generic-amd64
Processing 4.10.0-19-generic-amd64
Processing 4.13.0-16-generic-amd64
Processing 4.15.0-20-generic-amd64
Processing 4.18.0-10-generic-amd64
Processing 5.0.0-13-generic-amd64
Processing 5.3.0-18-generic-amd64
Processing 5.4.0-26-generic-amd64
Processing 5.8.0-25-generic-amd64
Processing 5.11.0-16-generic-amd64
Processing 5.13.0-19-generic-amd64
Processing 5.15.0-25-generic-amd64
Processing 5.19.0-21-generic-amd64
Processing 6.2.0-20-generic-amd64
Processing 6.5.0-9-generic-amd64
Processing 6.8.0-22-generic-amd64
Processing 5.4.0-26-generic-arm64
Processing 5.4.0-26-generic-armhf
Processing 5.4.0-26-generic-ppc64el
Processing 5.4.0-26-lowlatency-amd64
Processing 5.4.0-1009-aws-amd64
Processing 5.4.0-1009-gcp-amd64
Processing 5.4.0-1010-azure-amd64


In [5]:
import pandas as pd

df = pd.DataFrame(results).T

df

Unnamed: 0,Unnamed: 1,Unique,Header Duplication,Static-Static Collision,Static-Global Collision
reg,4.4.0-21-generic-amd64,53339,53294,961,20
reg,4.8.0-22-generic-amd64,56472,58314,971,52
reg,4.10.0-19-generic-amd64,58060,61008,963,48
reg,4.13.0-16-generic-amd64,62501,63567,972,50
reg,4.15.0-20-generic-amd64,64740,65765,975,52
reg,4.18.0-10-generic-amd64,68195,71380,1012,52
reg,5.0.0-13-generic-amd64,69860,74093,1010,52
reg,5.3.0-18-generic-amd64,72549,79741,1019,52
reg,5.4.0-26-generic-amd64,74265,81585,1033,54
reg,5.8.0-25-generic-amd64,74670,87832,1061,52


In [9]:
from depsurf.output import (
    plot_yticks,
    get_legend_handles_labels,
    label_multiline_text,
    save_fig,
)

import matplotlib.pyplot as plt

import numpy as np

fig, axs = plt.subplots(
    1,
    3,
    figsize=(12, 3),
    sharey=True,
    width_ratios=[17, 3, 4],
    gridspec_kw={"wspace": 0.05, "hspace": 0.075},
)

ylim = df.sum(axis=1).max() * 1.15


def format_val(val):
    if val > 1000:
        return f"{val // 1000:2d}k"
    return f"{val:3d}"


def plot_subfig(ax, df, group: Group):
    xs = np.arange(len(df))
    bottom = np.zeros(len(df))
    for col in df.columns:
        ax.bar(xs, df[col], bottom=bottom, label=col)
        bottom += df[col]

    ax.tick_params(axis="both", labelsize=9)
    ax.set_xticks(xs, [group.version_str(v) for v in df.index], rotation=0)
    ax.set_xlabel(group.caption)
    ax.set_xlim(-0.5, len(df) - 0.5)
    ax.set_ylim(0, ylim)

    with plt.rc_context({"font.family": "monospace"}):
        for i, (index, row) in enumerate(df.iterrows()):
            lines = [f"{row[CollisionType.MIXED]}", f"{row[CollisionType.STATIC]}"]
            colors = ["C3", "darkgreen"]
            label_multiline_text(ax, i, ylim, lines, colors=colors, fontsize=9)


for ax, group in zip(axs, Group):
    plot_subfig(ax, df.loc[group], group)

plot_yticks(axs[0])
axs[0].set_ylabel("Number of Functions Compiled")

handles, labels = get_legend_handles_labels(fig)
fig.legend(handles, labels, loc="upper center", ncol=4)

save_fig(fig, "dup")

[      mpl.py:76 ] INFO: Saved figure to /Users/szhong/Downloads/bpf-study/paper/figs/dup.pdf
