In [None]:
%pip install pandas
%pip install matplotlib
%pip install seaborn

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import glob
import re
import os
from pathlib import Path

In [None]:
sns.set(style="whitegrid")

# Grab all result CSV files
files = glob.glob("../results/*.csv")

records = []

for filepath in files:
    filename = Path(filepath).stem  # e.g., basic_dmalloc_amount1000_size1

    # Extract metadata from filename
    match = re.match(r"(?P<benchmark>\w+)_(?P<allocator>\w+)_amount(?P<amount>\d+)(?:_size(?P<size>\d+))?", filename)
    if not match:
        print(f"Skipping unrecognized file: {filename}")
        continue

    meta = match.groupdict()
    benchmark = meta["benchmark"]
    allocator = meta["allocator"]
    amount = int(meta["amount"])
    size = int(meta["size"]) if meta["size"] else None

    # Read CSV, skip command row
    df = pd.read_csv(filepath)

    row = df.iloc[0]  # Only one row per file

    records.append({
        "benchmark": benchmark,
        "allocator": allocator,
        "amount": amount,
        "size": size,
        "mean": float(row["mean"]),
        "stddev": float(row["stddev"]),
        "min": float(row["min"]),
        "max": float(row["max"]),
    })

# Create DataFrame
df = pd.DataFrame(records)

# For plotting: treat missing sizes as a separate case (e.g. varying_allocs)
df["size"] = df["size"].fillna(-1)

In [None]:
import os
from datetime import datetime

# Set up the output directory
today = datetime.now().strftime("%Y-%m-%d")
output_dir = f"./benchmark_result_graphs/{today}/time"
os.makedirs(output_dir, exist_ok=True)

allocators = df["allocator"].unique()
palette = sns.color_palette("tab10", n_colors=len(allocators))
color_mapping = dict(zip(sorted(allocators), palette))

for benchmark in df["benchmark"].unique():
    bench_df = df[df["benchmark"] == benchmark]

    for size in sorted(bench_df["size"].unique()):
        size_df = bench_df[bench_df["size"] == size]

        plt.figure(figsize=(10, 6))
        sns.lineplot(
            data=size_df,
            x="amount",
            y="mean",
            hue="allocator",
            marker="o",
            palette=color_mapping,
        )

        title_size = " (varying sizes)" if size == -1 else f" (size={size} bytes)"
        plt.title(f"{benchmark}{title_size}")
        plt.xlabel("Amount of Allocations")
        plt.ylabel("Mean Time (s)")
        plt.legend(title="Allocator")
        plt.tight_layout()

        # Prepare filename-safe benchmark name
        safe_benchmark = benchmark.replace("/", "_").replace(" ", "_")
        size_str = "varying" if size == -1 else str(size)
        file_name = f"{safe_benchmark}_size_{size_str}.png"
        file_path = os.path.join(output_dir, file_name)

        # Save the figure
        plt.show()
        plt.savefig(file_path)
        plt.close()


NameError: name 'df' is not defined

In [None]:
import os
import re
import matplotlib.pyplot as plt
from collections import defaultdict

# --- CONFIGURATION ---
RESULTS_DIR = "../results"
ALLOCATORS = ["dmalloc", "malloc"]

today = datetime.now().strftime("%Y-%m-%d")
output_dir = f"./benchmark_result_graphs/{today}/time"
os.makedirs(output_dir, exist_ok=True)

# --- FUNCTION TO PARSE massif FILES ---
def extract_peak_heap_from_massif(filepath):
    peak = 0
    with open(filepath, "r") as f:
        for line in f:
            if line.startswith("mem_heap_B="):
                usage = int(line.strip().split("=")[1])
                peak = max(peak, usage)
    return peak // 1024  # convert to KB

# --- DATA COLLECTION ---
# Structure: data[benchmark][size][allocator] = [(amount, peak_kb), ...]
data = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))

for filename in os.listdir(RESULTS_DIR):
    if not filename.startswith("massif_") or not filename.endswith(".out"):
        continue

    # Example filename: massif_basic_malloc_amount1000_size64.out
    match = re.match(
        r"massif_([a-zA-Z0-9]+)_([a-zA-Z0-9]+)_amount(\d+)(_size(\d+))?\.out",
        filename,
    )
    if not match:
        continue

    benchmark = match.group(1)
    allocator = match.group(2)
    amount = int(match.group(3))
    size = int(match.group(5)) if match.group(5) else 0  # use 0 for "varying"

    filepath = os.path.join(RESULTS_DIR, filename)
    peak_kb = extract_peak_heap_from_massif(filepath)

    data[benchmark][size][allocator].append((amount, peak_kb))

# --- PLOTTING ---
for benchmark in sorted(data.keys()):
    for size in sorted(data[benchmark].keys()):
        mem_data = data[benchmark][size]

        plt.figure(figsize=(10, 6))
        for allocator in ALLOCATORS:
            if allocator in mem_data:
                # Sort by amount
                mem_data[allocator].sort()
                x, y = zip(*mem_data[allocator])
                plt.plot(x, y, marker='o', label=allocator)

        size_label = f"size {size} bytes" if size != 0 else "varying sizes"
        safe_benchmark = benchmark.replace("/", "_").replace(" ", "_")
        file_name = f"{safe_benchmark}_size_{size_label}.png"
        file_path = os.path.join(output_dir, file_name)
        plt.title(f"Massif Heap Usage — Benchmark: '{benchmark}', {size_label}")
        plt.xlabel("Amount of allocations")
        plt.ylabel("Peak Heap Usage (KB)")
        plt.legend()
        plt.grid(True)
        plt.tight_layout()
        plt.show()
        plt.savefig(file_path)
        plt.close()
