# KLL Sketch Benchmark Plots

Load the CSV outputs from `benchmarks/bench_kll.py`, generate a few diagnostic plots, and summarise accuracy statistics.


In [None]:
from pathlib import Path
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import display

plt.style.use('seaborn-v0_8-darkgrid')
outdir = Path('bench_out')
accuracy = pd.read_csv(outdir / 'accuracy.csv')
throughput = pd.read_csv(outdir / 'update_throughput.csv')
latency = pd.read_csv(outdir / 'query_latency.csv')
merge = pd.read_csv(outdir / 'merge.csv')
accuracy.head()


In [None]:
single_accuracy = accuracy[accuracy['mode'] == 'single']
dists = sorted(single_accuracy['distribution'].unique())
cols = min(3, max(1, len(dists)))
rows = math.ceil(len(dists) / cols)
fig, axes = plt.subplots(rows, cols, figsize=(6 * cols, 4 * rows), sharex=True, sharey=True)
axes = np.array(axes, dtype=object).reshape(rows, cols)
for idx, dist in enumerate(dists):
    ax = axes[idx // cols, idx % cols]
    dist_data = single_accuracy[single_accuracy['distribution'] == dist]
    for capacity, cap_df in dist_data.groupby('capacity'):
        cap_df = cap_df.sort_values('q')
        ax.plot(cap_df['q'], cap_df['abs_error'], marker='o', label=f'K={capacity}')
    ax.set_title(dist)
    ax.set_xlabel('q')
    ax.set_ylabel('|error|')
    ax.legend()
for idx in range(len(dists), rows * cols):
    axes[idx // cols, idx % cols].axis('off')
plt.suptitle('Absolute value error by quantile (single pass)')
plt.tight_layout()
plt.show()


In [None]:
fig, ax = plt.subplots(figsize=(7, 4))
for capacity, cap_df in throughput.groupby('capacity'):
    cap_df = cap_df.sort_values('N')
    ax.plot(cap_df['N'], cap_df['updates_per_sec'], marker='o', label=f'K={capacity}')
ax.set_xscale('log')
ax.set_xlabel('N')
ax.set_ylabel('updates/sec')
ax.set_title('Update throughput by problem size')
ax.legend()
plt.tight_layout()
plt.show()


In [None]:
fig, ax = plt.subplots(figsize=(7, 4))
for q, q_df in latency.groupby('q'):
    q_df = q_df.sort_values('capacity')
    ax.plot(q_df['capacity'], q_df['latency_us'], marker='o', label=f'q={q}')
ax.set_xlabel('capacity')
ax.set_ylabel('latency (µs)')
ax.set_title('Query latency by quantile')
ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()


In [None]:
fig, ax = plt.subplots(figsize=(7, 4))
for (dist, N), grp in merge.groupby(['distribution', 'N']):
    grp = grp.sort_values('shards')
    label = f'{dist}, N={N}'
    ax.plot(grp['shards'], grp['merge_time_s'], marker='o', label=label)
ax.set_xlabel('shards')
ax.set_ylabel('merge time (s)')
ax.set_title('Merge time vs shards')
ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()


In [None]:
summary = (
    single_accuracy.groupby(['distribution', 'capacity'])
    .agg(mean_abs_error=('abs_error', 'mean'), median_abs_error=('abs_error', 'median'))
    .reset_index()
)
summary


In [None]:
best = (
    accuracy.sort_values('abs_error')
    .groupby(['distribution', 'capacity', 'mode'], as_index=False)
    .first()
)
worst = (
    accuracy.sort_values('abs_error', ascending=False)
    .groupby(['distribution', 'capacity', 'mode'], as_index=False)
    .first()
)
print('Best quantiles (lowest abs error):')
display(best[['distribution', 'capacity', 'mode', 'q', 'abs_error']])
print('
Worst quantiles (highest abs error):')
display(worst[['distribution', 'capacity', 'mode', 'q', 'abs_error']])
