# Call Graph Coverage Demo

This notebook shows how to load one or more exported call graphs and compute
syscall coverage metrics with the helper utilities added in the project.
Adjust the `paths` list below to explore different DLLs.

In [1]:
from pathlib import Path

from call_graph_win11.analysis.graph_loader import load_call_graph, merge_call_graphs
from call_graph_win11.analysis.graph_queries import (
    build_syscall_reachability_report,
    find_minimal_hook_set,
    functions_without_syscalls,
)

preferred_paths = [
    Path("data/interim/call_graphs/System32/advapi32.dll.callgraph.json"),
    Path("data/interim/call_graphs/System32/ntdll.dll.callgraph.json"),
]
existing = [p for p in preferred_paths if p.exists()]

if len(existing) < 2:
    for cand in sorted(Path("data/interim/call_graphs").rglob("*.callgraph.json")):
        if cand not in existing:
            existing.append(cand)
        if len(existing) >= 2:
            break

if not existing:
    raise FileNotFoundError("No call graph JSON files found under data/interim/call_graphs.")

graph = merge_call_graphs(existing) if len(existing) > 1 else load_call_graph(existing[0])
graph


<networkx.classes.digraph.DiGraph at 0x1b9281be8a0>

In [2]:
syscall_prefixes = ("Nt", "Zw")

coverage_report = build_syscall_reachability_report(
    graph,
    syscall_prefix=syscall_prefixes,
    syscall_program_hint=None,
)
len(coverage_report)

83

In [3]:
print("Top API candidates and the number of reachable syscalls:\n")
for entry in coverage_report[:10]:
    print(f"{entry.label:<40} -> {len(entry.coverage)}")

orphan_apis = functions_without_syscalls(
    graph,
    syscall_prefix=syscall_prefixes,
    syscall_program_hint=None,
)
print(f"\nAPIs without syscall reachability: {len(orphan_apis)}")

Top API candidates and the number of reachable syscalls:

LsaNtStatusToWinError                    -> 0
ControlTraceW                            -> 0
ProcessTrace                             -> 0
EnumerateTraceGuidsEx                    -> 0
EnableTraceEx2                           -> 0
CredEnumerateW                           -> 0
CredFree                                 -> 0
OpenTraceW                               -> 0
CloseTrace                               -> 0
StartTraceW                              -> 0

APIs without syscall reachability: 83


In [4]:
hook_plan = find_minimal_hook_set(
    graph,
    target_syscalls=None,
    syscall_prefix=syscall_prefixes,
    syscall_program_hint=None,
)

print("Recommended hooks (greedy covering set):\n")
for hook in hook_plan.hooks:
    print(f"{hook.label:<40} covers {len(hook.coverage)} syscalls")

print(f"\nUncovered syscalls: {len(hook_plan.uncovered_syscalls)}")

Recommended hooks (greedy covering set):


Uncovered syscalls: 551


In [5]:
from pathlib import Path
import csv

output_dir = Path("docs/analytics")
output_dir.mkdir(parents=True, exist_ok=True)

coverage_csv = output_dir / "demo_syscall_coverage.csv"
with coverage_csv.open("w", newline="", encoding="utf-8") as handle:
    writer = csv.writer(handle)
    writer.writerow(["label", "program", "coverage"])
    for entry in coverage_report:
        writer.writerow([entry.label, entry.program, len(entry.coverage)])

orphans_csv = output_dir / "demo_orphan_apis.csv"
with orphans_csv.open("w", newline="", encoding="utf-8") as handle:
    writer = csv.writer(handle)
    writer.writerow(["label", "program"])
    for entry in orphan_apis:
        writer.writerow([entry.label, entry.program])

coverage_csv, orphans_csv


(WindowsPath('docs/analytics/demo_syscall_coverage.csv'),
 WindowsPath('docs/analytics/demo_orphan_apis.csv'))