In [None]:
%pip install pandas
%pip install matplotlib
%pip install seaborn

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import glob
import re
import os
from pathlib import Path

In [None]:
sns.set(style="whitegrid")

# Grab all result CSV files
files = glob.glob("../results/*.csv")

records = []

for filepath in files:
    filename = Path(filepath).stem  # e.g., basic_dmalloc_amount1000_size1

    # Extract metadata from filename
    match = re.match(r"(?P<benchmark>\w+)_(?P<allocator>\w+)_amount(?P<amount>\d+)(?:_size(?P<size>\d+))?", filename)
    if not match:
        print(f"Skipping unrecognized file: {filename}")
        continue

    meta = match.groupdict()
    benchmark = meta["benchmark"]
    allocator = meta["allocator"]
    amount = int(meta["amount"])
    size = int(meta["size"]) if meta["size"] else None

    # Read CSV, skip command row
    df = pd.read_csv(filepath)

    row = df.iloc[0]  # Only one row per file

    records.append({
        "benchmark": benchmark,
        "allocator": allocator,
        "amount": amount,
        "size": size,
        "mean": float(row["mean"]),
        "stddev": float(row["stddev"]),
        "min": float(row["min"]),
        "max": float(row["max"]),
    })

# Create DataFrame
df = pd.DataFrame(records)

# For plotting: treat missing sizes as a separate case (e.g. varying_allocs)
df["size"] = df["size"].fillna(-1)

In [None]:
allocators = df["allocator"].unique()
palette = sns.color_palette("tab10", n_colors=len(allocators))
color_mapping = dict(zip(sorted(allocators), palette))

for benchmark in df["benchmark"].unique():
    bench_df = df[df["benchmark"] == benchmark]

    for size in sorted(bench_df["size"].unique()):
        size_df = bench_df[bench_df["size"] == size]

        plt.figure(figsize=(10, 6))
        sns.lineplot(
            data=size_df,
            x="amount",
            y="mean",
            hue="allocator",
            marker="o",
            palette=color_mapping,
        )

        # Error band
        # for allocator in size_df["allocator"].unique():
        #     alloc_df = size_df[size_df["allocator"] == allocator]
        #     plt.fill_between(
        #         alloc_df["amount"],
        #         alloc_df["mean"] - alloc_df["stddev"],
        #         alloc_df["mean"] + alloc_df["stddev"],
        #         alpha=0.2,
        #         color=color_mapping[allocator]
        #     )


        title_size = " (varying sizes)" if size == -1 else f" (size={size} bytes)"
        plt.title(f"{benchmark}{title_size}")
        plt.xlabel("Amount of Allocations")
        plt.ylabel("Mean Time (s)")
        plt.legend(title="Allocator")
        plt.tight_layout()

        # Save plot
        size_str = "varying" if size == -1 else str(size)


In [None]:
# Configuration
RESULTS_DIR = "../results"
BENCHMARK = "basic"
ALLOCATORS = ["dmalloc", "malloc"]
SIZES = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048]

# Function to extract max resident set size from time -v logs
def extract_memory_usage(filepath):
    with open(filepath, "r") as f:
        for line in f:
            if "Maximum resident set size" in line:
                return int(re.findall(r"\d+", line)[0])
    return None

# Iterate over each size and plot
for size in SIZES:
    memory_usages = {alloc: [] for alloc in ALLOCATORS}
    amounts_set = set()

    for filename in os.listdir(RESULTS_DIR):
        if not filename.startswith(f"mem_{BENCHMARK}_") or f"size{size}" not in filename:
            continue

        match = re.search(r"amount(\d+)", filename)
        if not match:
            continue

        amount = int(match.group(1))
        amounts_set.add(amount)

        allocator = next((a for a in ALLOCATORS if a in filename), None)
        if not allocator:
            continue

        filepath = os.path.join(RESULTS_DIR, filename)
        mem = extract_memory_usage(filepath)

        if mem is not None:
            memory_usages[allocator].append((amount, mem))

    # Skip if there's no data
    if not any(memory_usages[a] for a in ALLOCATORS):
        continue

    # Sort data by amount
    for alloc in ALLOCATORS:
        memory_usages[alloc].sort()

    # Plot
    plt.figure(figsize=(10, 6))
    for allocator in ALLOCATORS:
        if memory_usages[allocator]:
            x, y = zip(*memory_usages[allocator])
            plt.plot(x, y, marker='o', label=allocator)

    plt.title(f"Memory Usage: Benchmark '{BENCHMARK}', Size {size} bytes")
    plt.xlabel("Amount of allocations")
    plt.ylabel("Peak Memory Usage (KB)")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()


NameError: name 'os' is not defined