In [1]:
%reload_ext autoreload
%autoreload 2

In [34]:
from collections import defaultdict
from typing import Dict

import pandas as pd

from depsurf import DepKind, IssueEnum, ReportDict
from utils import GRAY_DASH, load_pkl, rotate, save_latex

COUNT = "Count"

COLUMNS = {
    COUNT: "#",
    IssueEnum.ABSENT: r"$\varnothing$",
    IssueEnum.CHANGE: r"$\Delta$",
    IssueEnum.PARTIAL_INLINE: "P",
    IssueEnum.FULL_INLINE: "F",
    IssueEnum.RENAME: "R",
    IssueEnum.DUPLICATE: "D",
}

NUM_TOOLS = r"\texttt{#} tool"
PCT_TOOLS = r"\texttt{%} tool"
NUM_DEPS = r"\texttt{#} dep vec"
PCT_DEPS = r"\texttt{%} dep vec"

BOLD_SUMMARY = True
if BOLD_SUMMARY:
    NUM_DEPS = r"\textbf{" + NUM_DEPS + r"}"
    PCT_DEPS = r"\textbf{" + PCT_DEPS + r"}"
    NUM_TOOLS = r"\textbf{" + NUM_TOOLS + r"}"
    PCT_TOOLS = r"\textbf{" + PCT_TOOLS + r"}"

SUMMARY = [
    NUM_DEPS,
    # PCT_DEPS,
    NUM_TOOLS,
    # PCT_TOOLS,
]

KINDS = {
    DepKind.FUNC: "Function",
    DepKind.STRUCT: "Struct",
    DepKind.FIELD: "Field",
    DepKind.TRACEPOINT: "Tracept",
    DepKind.SYSCALL: "Syscall",
}

data: Dict[str, ReportDict] = load_pkl("bcc")
data[NUM_DEPS] = {dep: report for deps in data.values() for dep, report in deps.items()}

results = {}

num_tools = defaultdict(int)
for prog_name, deps in data.items():
    if len(deps) == 0:
        print(f"Program {prog_name} has no dependencies")
        continue

    result = defaultdict(int)
    for dep, report in deps.items():
        kind = dep.kind
        result[(kind, COUNT)] += 1

        num_absent = sum(1 for issues in report.values() if IssueEnum.ABSENT in issues)
        result[(kind, IssueEnum.ABSENT)] += min(num_absent, 1)

        if kind not in [DepKind.STRUCT, DepKind.SYSCALL]:
            num_delta = sum(
                1 for issues in report.values() if IssueEnum.CHANGE in issues
            )
            result[(kind, IssueEnum.CHANGE)] += min(num_delta, 1)

        if kind == DepKind.FUNC:
            for issue in [
                IssueEnum.PARTIAL_INLINE,
                IssueEnum.FULL_INLINE,
                IssueEnum.RENAME,
                IssueEnum.DUPLICATE,
            ]:
                num_issue = sum(1 for issues in report.values() if issue in issues)
                result[(kind, issue)] += min(num_issue, 1)

    for (kind, key), count in result.items():
        if prog_name != NUM_DEPS and count > 0:
            num_tools[(kind, key)] += 1
        # if key == COUNT:
        #     result[(kind, key)] = f"\\emph{{{count}}}"

    has_issue = False
    for (kind, key), count in result.items():
        if key == COUNT:
            continue
        if count != 0:
            has_issue = True
            break

    if not has_issue:
        prog_name = prog_name + r" \checkmark"

    results[prog_name] = result

results[NUM_TOOLS] = num_tools


def compute_percentage(key):
    counts = {}
    result = {}
    for (kind, key), count in results[key].items():
        if key == COUNT:
            counts[kind] = count
            result[(kind, key)] = ""
        else:
            result[(kind, key)] = count / counts[kind] * 100
    return result


if PCT_DEPS in SUMMARY:
    results[PCT_DEPS] = compute_percentage(NUM_DEPS)
if PCT_TOOLS in SUMMARY:
    results[PCT_TOOLS] = compute_percentage(NUM_TOOLS)


results = {
    tool: {(KINDS[kind], COLUMNS[key]): count for (kind, key), count in report.items()}
    for tool, report in results.items()
}

df = pd.DataFrame(results).T

KINDS_LIST = list({v: k for k, v in KINDS.items()})
df = df.reindex(sorted(df.columns, key=lambda x: KINDS_LIST.index(x[0])), axis=1)
df = df.sort_values(by=df.columns.tolist(), ascending=False)
df = df.loc[[r for r in df.index if r not in SUMMARY] + SUMMARY]


def f(x):
    if type(x) == str:
        return x
    if pd.isna(x):
        return 0
    return int(x)


df = df.map(f)
df_latex = df.map(lambda x: GRAY_DASH if x == 0 else x)
latex = df_latex.to_latex(
    multicolumn_format="c|",
    column_format="l|rrrrrrr|rr|rrr|rrr|rrr",
)
latex = latex.replace(NUM_DEPS, r"\midrule" + "\n" + NUM_DEPS)

save_latex(latex, "bcc")

df

[          pkl.py:18 ] INFO: Loding bcc from /Users/szhong/Downloads/bpf-study/output/bcc.pkl
[        latex.py:56 ] INFO: Saved bcc to /Users/szhong/Downloads/bpf-study/paper/tabs/bcc.tex


Program bashreadline has no dependencies
Program funclatency has no dependencies
Program gethostlatency has no dependencies
Program javagc has no dependencies
Program ksnoop has no dependencies


Unnamed: 0_level_0,Function,Function,Function,Function,Function,Function,Function,Struct,Struct,Field,Field,Field,Tracept,Tracept,Tracept,Syscall,Syscall
Unnamed: 0_level_1,#,$\varnothing$,$\Delta$,P,F,R,D,#,$\varnothing$,#,$\varnothing$,$\Delta$,#,$\varnothing$,$\Delta$,#,$\varnothing$
klockstat,14,3,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0
biotop,5,2,2,2,3,0,0,3,0,7,2,1,2,2,0,0,0
cachestat,5,2,2,2,0,0,0,0,0,0,0,0,2,2,1,0,0
fsdist,5,2,1,2,0,2,0,0,0,0,0,0,0,0,0,0,0
vfsstat,5,0,2,5,0,0,0,0,0,0,0,0,0,0,0,0,0
tcptracer,5,0,1,0,0,3,0,6,0,14,0,0,0,0,0,0,0
readahead,4,3,1,3,2,1,1,0,0,0,0,0,0,0,0,0,0
fsslower,4,1,0,2,0,1,0,5,0,6,0,0,0,0,0,0,0
filelife,4,0,3,2,0,0,0,5,1,6,2,0,0,0,0,0,0
biostacks,3,1,2,3,2,0,0,3,0,5,2,0,2,2,0,0,0
