In [1]:
import os
import uuid
import ast
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Load & clean
df = pd.read_csv(
"/Users/omid/PycharmProjects/benchmarker_omid/_output/results/USSR_vs_MAIN_n_unique200/2025-07-21-13-35-15/run.csv"
)

df["data_config"] = df["data_config"].apply(ast.literal_eval)
config_df = pd.json_normalize(df["data_config"])
df = pd.concat([df.drop(columns=["data_config"]), config_df], axis=1)

# Prepare output directory
output_dir = "thesis_plots/Variable_length_aggregate"
os.makedirs(output_dir, exist_ok=True)

for query in df["query"].unique():
    sub = df[df["query"] == query]

    pivot = sub.pivot_table(
        index="string_length",
        columns="system_version",
        values="avg_runtime",
        aggfunc="mean"
    )

    sorted_idx = sorted(pivot.index, key=int)
    pivot = pivot.loc[sorted_idx]

    # Create plot
    fig, ax = plt.subplots(figsize=(10, 6))

    n_versions = len(pivot.columns)
    width = 0.8 / n_versions
    x = np.arange(len(pivot.index))

    for i, ver in enumerate(pivot.columns):
        ax.bar(x + i * width, pivot[ver].values, width, label=ver)

    center_offset = (n_versions - 1) * width / 2
    ax.set_xticks(x + center_offset)
    ax.set_xticklabels(pivot.index, ha="center")

    ax.set_xlabel("String Length")
    ax.set_ylabel("Average Runtime (s)")
    ax.legend(title="System Version", loc="upper left")
    plt.subplots_adjust(bottom=0.25, right=0.85)

    # Save plot
    random_id = uuid.uuid4().hex[:8]
    safe_query = query.replace(" ", "_")
    fname = f"{safe_query}_{random_id}.png"
    path = os.path.join(output_dir, fname)
    fig.savefig(path, dpi=300, bbox_inches="tight", pad_inches=0.1)
    plt.close(fig)

    print(f"Saved plot for '{query}' → {path}")

    # ---- Speed-up Reporting ----
    print(f"\nSpeed-up Report for Query '{query}':")
    for string_length, row in pivot.iterrows():
        if row.count() == 2:
            versions = row.index.tolist()
            times = row.values.tolist()

            if times[0] > times[1]:
                speedup = times[0] / times[1]
                percent = (times[0] - times[1]) / times[0] * 100
                print(f"  string_length={string_length}: {versions[1]} is {speedup:.2f}× faster than {versions[0]} ({percent:.1f}% faster)")
            else:
                speedup = times[1] / times[0]
                percent = (times[1] - times[0]) / times[1] * 100
                print(f"  string_length={string_length}: {versions[0]} is {speedup:.2f}× faster than {versions[1]} ({percent:.1f}% faster)")
        else:
            print(f"  string_length={string_length}: Skipped speed-up (need exactly 2 systems)")


Saved plot for 'double_column_groupby' → thesis_plots/Variable_length_aggregate/double_column_groupby_b294e898.png

Speed-up Report for Query 'double_column_groupby':
  string_length=16: unified_string_dictionary is 1.38× faster than duckdb_latest_main (27.8% faster)
  string_length=32: unified_string_dictionary is 1.69× faster than duckdb_latest_main (40.7% faster)
  string_length=64: unified_string_dictionary is 2.26× faster than duckdb_latest_main (55.8% faster)
  string_length=128: unified_string_dictionary is 3.58× faster than duckdb_latest_main (72.1% faster)
  string_length=256: unified_string_dictionary is 5.75× faster than duckdb_latest_main (82.6% faster)
