In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
from depsurf import OUTPUT_PATH
import pickle

with open(OUTPUT_PATH / "bcc.pkl", "rb") as f:
    data = pickle.load(f)

data.keys()

dict_keys(['bashreadline', 'bindsnoop', 'biolatency', 'biopattern', 'biosnoop', 'biostacks', 'biotop', 'bitesize', 'cachestat', 'capable', 'cpudist', 'cpufreq', 'drsnoop', 'execsnoop', 'exitsnoop', 'filelife', 'filetop', 'fsdist', 'fsslower', 'funclatency', 'futexctn', 'gethostlatency', 'hardirqs', 'javagc', 'klockstat', 'ksnoop', 'llcstat', 'mdflush', 'memleak', 'mountsnoop', 'numamove', 'offcputime', 'oomkill', 'opensnoop', 'profile', 'readahead', 'runqlat', 'runqlen', 'runqslower', 'sigsnoop', 'slabratetop', 'softirqs', 'solisten', 'statsnoop', 'syncsnoop', 'syscount', 'tcpconnect', 'tcpconnlat', 'tcplife', 'tcppktlat', 'tcprtt', 'tcpstates', 'tcpsynbl', 'tcptop', 'tcptracer', 'vfsstat', 'wakeuptime'])

In [13]:
from depsurf import (
    DepStatus,
    DepDelta,
    IssueEnum,
    Versions,
    DepKind,
    DepReport,
    TAB_PATH,
)
from collections import defaultdict
import pandas as pd

results = {}


for prog_name, deps in data.items():
    if prog_name in (
        "bashreadline",
        "ksnoop",
        "javagc",
        "gethostlatency",
        "funclatency",
    ):
        continue

    result = defaultdict(float)
    for (kind, name), report in deps.items():
        report: DepReport
        result[(kind, "Count")] += 1

        num_absent = sum(
            1 for e in report.status.values() if IssueEnum.ABSENT in e.enums
        )
        # num_absent /= len(report.status)
        result[(kind, IssueEnum.ABSENT)] += min(num_absent, 1)

        num_delta = sum(1 for e in report.delta.values() if e.changes)
        # num_delta /= len(report.delta)
        result[(kind, IssueEnum.CHANGE)] += min(num_delta, 1)

        if kind == DepKind.FUNC:
            for issue in [
                IssueEnum.PARTIAL_INLINE,
                IssueEnum.FULL_INLINE,
                IssueEnum.RENAME,
                IssueEnum.DUPLICATE,
            ]:
                num_issue = sum(1 for e in report.status.values() if issue in e.enums)
                # num_issue /= len(report.status)
                result[(kind, issue)] += min(num_issue, 1)

    results[prog_name] = result

df = pd.DataFrame(results).T.fillna(0)
# map values to int if the entire column is int
for col in df.columns:
    if df[col].eq(df[col].astype(int)).all():
        df[col] = df[col].astype(int)

column_order = [
    DepKind.FUNC,
    DepKind.STRUCT,
    DepKind.FIELD,
    DepKind.TRACEPOINT,
    DepKind.SYSCALL,
]
df = df.reindex(sorted(df.columns, key=lambda x: column_order.index(x[0])), axis=1)


df = df.round(2)


def rename_cols(col):
    kind, issue = col
    if kind == DepKind.TRACEPOINT:
        kind = "Tracept"
    if kind == DepKind.SYSCALL:
        kind = "Syscall"
    return (kind, issue)


df.columns = df.columns.map(rename_cols)
df = df.sort_values(by=df.columns.tolist(), ascending=False)


df_latex = df.copy()
df_latex = df_latex.replace(0, r"\color{lightgray}{-}")
df_latex.columns = df_latex.columns.map(lambda x: (x[0], f"\\head{{{x[1]}}}"))
with open(TAB_PATH / "bcc.tex", "w") as f:
    df_latex.to_latex(
        f,
        multicolumn_format="c|",
        column_format="l|rrrrrrr|rrr|rrr|rrr|rrr",
        float_format="%.2f",
    )

# df

In [4]:
df_total = pd.DataFrame(df.sum())
print(df_total.to_latex())

\begin{tabular}{llr}
\toprule
 &  & 0 \\
\midrule
\multirow[t]{7}{*}{Function} & Count & 76 \\
 & Absent & 16 \\
 & Changed & 18 \\
 & Par. Inline & 30 \\
 & Full Inline & 9 \\
 & Renamed & 17 \\
 & Duplicate & 1 \\
\cline{1-3}
\multirow[t]{3}{*}{Struct} & Count & 135 \\
 & Absent & 20 \\
 & Changed & 96 \\
\cline{1-3}
\multirow[t]{3}{*}{Field} & Count & 272 \\
 & Absent & 69 \\
 & Changed & 9 \\
\cline{1-3}
\multirow[t]{3}{*}{TP} & Count & 52 \\
 & Absent & 17 \\
 & Changed & 34 \\
\cline{1-3}
\multirow[t]{3}{*}{SC} & Count & 15 \\
 & Absent & 5 \\
 & Changed & 0 \\
\cline{1-3}
\bottomrule
\end{tabular}



In [5]:
from depsurf import BCC_OBJ_PATH, BPFObject, DepKind
from collections import defaultdict

by_dep = defaultdict(lambda: defaultdict(int))
by_tool = defaultdict(lambda: defaultdict(int))
num_tools = 0


for path in sorted(BCC_OBJ_PATH.glob("*.bpf.o")):
    bpf = BPFObject(path)
    if bpf.name in (
        "bashreadline",
        "ksnoop",
        "javagc",
        "gethostlatency",
        "funclatency",
    ):
        continue
    for d in bpf.deps:
        if d.kind in (DepKind.UPROBE, DepKind.PERF_EVENT):
            continue
        by_dep[d.kind][d.name] += 1
        by_tool[d.kind][bpf.name] += 1
    num_tools += 1

import pandas as pd

df = pd.DataFrame(
    {
        k: {
            "Number of unique": len(by_dep[k]),
            "Total number": sum(by_dep[k].values()),
            "Number of tools used": len(by_tool[k]),
            "Percentage of tools": f"{len(by_tool[k]) / num_tools * 100:.0f}\%",
            "Average per tool used": f"{sum(by_tool[k].values()) / len(by_tool[k]):.1f}",
            "Max per tool used": max(by_tool[k].values()),
        }
        for k in by_dep | by_tool
    },
)
print(df.to_latex())

\begin{tabular}{llllll}
\toprule
 & Function & Field & Struct & Tracepoint & Syscall \\
\midrule
Number of unique & 59 & 136 & 57 & 35 & 15 \\
Total number & 76 & 272 & 135 & 52 & 15 \\
Number of tools used & 24 & 41 & 41 & 24 & 7 \\
Percentage of tools & 46\% & 79\% & 79\% & 46\% & 13\% \\
Average per tool used & 3.2 & 6.6 & 3.3 & 2.2 & 2.1 \\
Max per tool used & 14 & 17 & 11 & 10 & 5 \\
\bottomrule
\end{tabular}

