In [1]:
import os
import uuid
import ast
import pandas as pd
import matplotlib.pyplot as plt

# Load the dataset
df = pd.read_csv("/Users/omid/PycharmProjects/benchmarker_omid/_output/results/USSR_vs_baseline_zipf_grid_two_tables/2025-07-03-09-14-31/run.csv")  # Replace with your actual file path

# Parse the 'data_config' column to allow future extensions
df["data_config"] = df["data_config"].apply(ast.literal_eval)

# Output directory
output_dir = "thesis_plots/Simple_benchmarks_no_grouping"
os.makedirs(output_dir, exist_ok=True)

# Plot per query (no grouping by string_length or n_unique)
for query in df["query"].unique():
    sub = df[df["query"] == query]

    # Prepare data
    system_versions = sub["system_version"].tolist()
    avg_runtimes = sub["avg_runtime"].tolist()

    fig, ax = plt.subplots(figsize=(8, 5))
    x = range(len(system_versions))

    ax.bar(x, avg_runtimes, width=0.6, tick_label=system_versions)

    ax.set_xlabel("System Version")
    ax.set_ylabel("Average Runtime (s)")
    ax.set_title(f"Avg Runtime (Query: {query})")

    # Optional: annotate bars
    for i, val in enumerate(avg_runtimes):
        ax.text(i, val + 0.005, f"{val:.3f}", ha='center', va='bottom')

    # Save figure
    random_id = uuid.uuid4().hex[:8]
    safe_query = query.replace(" ", "_")
    fname = f"{safe_query}_comparison_{random_id}.png"
    path = os.path.join(output_dir, fname)
    fig.savefig(path, dpi=300, bbox_inches="tight", pad_inches=0.1)
    plt.close(fig)

    print(f"Saved simple plot for '{query}' → {path}")


Saved simple plot for 'join_on_integer_keys' → thesis_plots/Simple_benchmarks_no_grouping/join_on_integer_keys_comparison_1f8ecdbc.png
Saved simple plot for 'join_on_integer_keys_limited_result' → thesis_plots/Simple_benchmarks_no_grouping/join_on_integer_keys_limited_result_comparison_981afd7b.png


In [1]:
import os
import uuid
import ast
import pandas as pd
import matplotlib.pyplot as plt

# Load the dataset
df = pd.read_csv("/Users/omid/PycharmProjects/benchmarker_omid/_output/results/USSR_vs_baseline_tpch_sf30/2025-07-04-13-32-45/run.csv")  # Replace with your actual file path

# Parse the 'data_config' column
df["data_config"] = df["data_config"].apply(ast.literal_eval)

# Output directory
output_dir = "thesis_plots/Simple_benchmarks_no_grouping"
os.makedirs(output_dir, exist_ok=True)

# Plot per query
for query in df["query"].unique():
    sub = df[df["query"] == query]

    # Sort to ensure consistent order
    sub = sub.sort_values("system_version")

    # Extract systems and runtimes
    system_versions = sub["system_version"].tolist()
    avg_runtimes = sub["avg_runtime"].tolist()

    fig, ax = plt.subplots(figsize=(8, 5))
    x = range(len(system_versions))

    ax.bar(x, avg_runtimes, width=0.6, tick_label=system_versions)

    ax.set_xlabel("System Version")
    ax.set_ylabel("Average Runtime (s)")
    ax.set_title(f"Avg Runtime (Query: {query})")

    for i, val in enumerate(avg_runtimes):
        ax.text(i, val + 0.005, f"{val:.3f}", ha='center', va='bottom')

    # Save figure
    random_id = uuid.uuid4().hex[:8]
    safe_query = query.replace(" ", "_")
    fname = f"{safe_query}_comparison_{random_id}.png"
    path = os.path.join(output_dir, fname)
    fig.savefig(path, dpi=300, bbox_inches="tight", pad_inches=0.1)
    plt.close(fig)

    print(f"Saved simple plot for '{query}' → {path}")

    # Compute and print speed-up (assumes 2 systems only)
    if len(avg_runtimes) == 2:
        rt0, rt1 = avg_runtimes
        name0, name1 = system_versions

        if rt0 > rt1:
            speedup = rt0 / rt1
            percent = (rt0 - rt1) / rt0 * 100
            print(f"[{query}] {name1} is {speedup:.2f}× faster than {name0} ({percent:.1f}% faster)")
        else:
            speedup = rt1 / rt0
            percent = (rt1 - rt0) / rt1 * 100
            print(f"[{query}] {name0} is {speedup:.2f}× faster than {name1} ({percent:.1f}% faster)")
    else:
        print(f"[{query}] Skipped speed-up computation: expected exactly 2 systems.")



Saved simple plot for 'tpch16' → thesis_plots/Simple_benchmarks_no_grouping/tpch16_comparison_67c1154e.png
[tpch16] Skipped speed-up computation: expected exactly 2 systems.
