In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
from typing import Dict, Tuple, List

import pandas as pd

from depsurf import DepKind, IssueEnum, ReportDict
from utils import GRAY_DASH, load_pkl, save_latex, text_color, bold

COUNT = "Count"

KEYS = {
    COUNT: r"$\Sigma$",
    IssueEnum.ABSENT: r"$\varnothing$",
    IssueEnum.CHANGE: r"$\Delta$",
    IssueEnum.PARTIAL_INLINE: "P",
    IssueEnum.FULL_INLINE: "F",
    IssueEnum.RENAME: "R",
    IssueEnum.DUPLICATE: "D",
}

KINDS = {
    DepKind.FUNC: "Function",
    DepKind.STRUCT: "Struct",
    DepKind.FIELD: "Field",
    DepKind.TRACEPOINT: "Tracept",
    DepKind.SYSCALL: "Syscall",
}

COLUMNS = [
    (DepKind.FUNC, COUNT),
    (DepKind.FUNC, IssueEnum.ABSENT),
    (DepKind.FUNC, IssueEnum.CHANGE),
    (DepKind.FUNC, IssueEnum.PARTIAL_INLINE),
    (DepKind.FUNC, IssueEnum.FULL_INLINE),
    (DepKind.FUNC, IssueEnum.RENAME),
    (DepKind.FUNC, IssueEnum.DUPLICATE),
    (DepKind.STRUCT, COUNT),
    (DepKind.STRUCT, IssueEnum.ABSENT),
    (DepKind.FIELD, COUNT),
    (DepKind.FIELD, IssueEnum.ABSENT),
    (DepKind.FIELD, IssueEnum.CHANGE),
    (DepKind.TRACEPOINT, COUNT),
    (DepKind.TRACEPOINT, IssueEnum.ABSENT),
    (DepKind.TRACEPOINT, IssueEnum.CHANGE),
    (DepKind.SYSCALL, COUNT),
    (DepKind.SYSCALL, IssueEnum.ABSENT),
]

PROG_NAMES = {
    "tracee": "Tracee",
}

NUM_DEPS = r"\textbf{# uniq dep}"

data: Dict[str, ReportDict] = load_pkl("bcc")
data[NUM_DEPS] = {
    dep: report for prog_name, deps in data.items() for dep, report in deps.items()
}

table: Dict[str, Dict[Tuple[DepKind, str], int]] = {}

for prog_name, deps in data.items():
    if len(deps) == 0:
        print(f"Program {prog_name} has no dependencies")
        continue

    if prog_name in PROG_NAMES:
        prog_name = PROG_NAMES[prog_name]

    row = {col: 0 for col in COLUMNS}
    for dep, report in deps.items():
        kind = dep.kind
        row[(kind, COUNT)] += 1

        num_absent = sum(1 for issues in report.values() if IssueEnum.ABSENT in issues)
        row[(kind, IssueEnum.ABSENT)] += min(num_absent, 1)

        if kind not in [DepKind.STRUCT, DepKind.SYSCALL]:
            num_delta = sum(
                1 for issues in report.values() if IssueEnum.CHANGE in issues
            )
            row[(kind, IssueEnum.CHANGE)] += min(num_delta, 1)

        if kind == DepKind.FUNC:
            for issue in [
                IssueEnum.PARTIAL_INLINE,
                IssueEnum.FULL_INLINE,
                IssueEnum.RENAME,
                IssueEnum.DUPLICATE,
            ]:
                num_issue = sum(1 for issues in report.values() if issue in issues)
                row[(kind, issue)] += min(num_issue, 1)

    has_issue = False
    for (kind, key), count in row.items():
        if key == COUNT:
            continue
        if count != 0:
            has_issue = True
            continue

    if not has_issue:
        prog_name = text_color(prog_name, "blue")
    elif prog_name in ("biotop", "readahead"):
        prog_name = bold(prog_name)

    table[prog_name] = row

num_deps = table.pop(NUM_DEPS)
num_progs = {col: 0 for col in COLUMNS}
for prog_name, row in table.items():
    for (kind, key), count in row.items():
        if count != 0:
            num_progs[(kind, key)] += 1


df = pd.DataFrame(
    {
        tool: {(KINDS[kind], KEYS[key]): count for (kind, key), count in row.items()}
        for tool, row in table.items()
    }
).T

df = df.sort_values(by=df.columns.tolist(), ascending=False)
df = df.map(lambda x: GRAY_DASH if pd.isna(x) or x == 0 else int(x))
df.index.name = r"\multirow{2}{*}{Program}"
df = df.reset_index()

latex = df.to_latex(
    multicolumn_format="c|",
    column_format=r"l|rrrrrrr|rr|rrr|rrr|rr",
    index=False,
)

save_latex(latex, "bcc", midrule=False)

[ utils_pickle.py:18 ] INFO: Loding bcc from /Users/szhong/Code/DepSurf/output/bcc.pkl
[  utils_latex.py:118] INFO: Saved bcc to /Users/szhong/Code/DepSurf/paper/tabs/bcc.tex


Program javagc has no dependencies
Program gethostlatency has no dependencies
Program funclatency has no dependencies
Program ksnoop has no dependencies
Program bashreadline has no dependencies


In [4]:
from utils import mini_bar, multirow

KINDS = {
    **KINDS,
    DepKind.FUNC: "Func",
}

MULTIROW = (DepKind.FIELD, IssueEnum.ABSENT)

ISSUES = {
    (DepKind.FUNC, IssueEnum.ABSENT): [3687, 3695, 3692, 3747, 4337, 4885],
    (DepKind.FUNC, IssueEnum.CHANGE): [1911, 3360, 4339, 4340, 4346],
    (DepKind.FUNC, IssueEnum.PARTIAL_INLINE): [703, 1667, 2252, 2373, 3913],
    (DepKind.FUNC, IssueEnum.FULL_INLINE): [4261, 4478, 4638, 4700, 5115],
    (DepKind.FUNC, IssueEnum.RENAME): [1754, 3293, 3339, 3315, 4937],
    (DepKind.STRUCT, IssueEnum.ABSENT): [4340, 4339],
    (DepKind.FIELD, IssueEnum.ABSENT): [
        1384,
        3612,
        3647,
        3650,
        3658,
        3672,
        3680,
        3859,
        3903,
        3926,
        3936,
    ],
    (DepKind.FIELD, IssueEnum.CHANGE): [3845, 3865],
    (DepKind.TRACEPOINT, IssueEnum.ABSENT): [1636, 2816, 4384],
    (DepKind.TRACEPOINT, IssueEnum.CHANGE): [3317, 3338, 4076, 4476],
    (DepKind.SYSCALL, IssueEnum.ABSENT): [3012, 3668, 3843, 4287],
}


PROG = "# Program"
UNIQ_DEP = "# Uniq Dep"
BUG = "Reported Issue No."

COLORS = {
    PROG: "prog",
    UNIQ_DEP: "dep",
}

table = {}
for name, data in [(PROG, num_progs), (UNIQ_DEP, num_deps)]:
    row = {}
    for (kind, key), count in data.items():
        text = mini_bar(
            text=count,
            percent=count / data[(kind, COUNT)],
            total_width=1.5,
            color=COLORS[name],
            bg_color="bg",
        )
        if (kind, key) == MULTIROW:
            row[(KINDS[kind], multirow(KEYS[key]))] = multirow(text)
            row[(KINDS[kind], "")] = ""
        else:
            row[(KINDS[kind], KEYS[key])] = text
    table[name] = row


def bccissues(issues: List[int]) -> str:
    return ", ".join(f"\\bccissue{{{issue}}}" for issue in issues)


row = {}
for (kind, key), issues in ISSUES.items():
    if (kind, key) == MULTIROW:
        n = (len(issues) + 1) // 2
        row[(KINDS[kind], multirow(KEYS[key]))] = bccissues(issues[:n])
        row[(KINDS[kind], "")] = bccissues(issues[n:])
    else:
        row[(KINDS[kind], KEYS[key])] = bccissues(issues)
table[BUG] = row


df = pd.DataFrame(table)
df = df.fillna(GRAY_DASH)
latex = df.to_latex(column_format="llrrr")
save_latex(latex, "summary", rotate=False)

[  utils_latex.py:118] INFO: Saved summary to /Users/szhong/Code/DepSurf/paper/tabs/summary.tex
