In [1]:
# Cell 1: Imports
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path


In [7]:
# Cell 2: Load CSV (adjust path if needed)
csv_path = Path("/home/cc/dLoRA-artifact/ttft_latency_loras_results.csv")
df = pd.read_csv(csv_path)

# Drop fully empty rows (your file has a blank line of commas)
df = df.dropna(how="all")

# Ensure gpu_loras is numeric
df["gpu_loras"] = pd.to_numeric(df["gpu_loras"], errors="coerce")
df = df.dropna(subset=["gpu_loras"])
df["gpu_loras"] = df["gpu_loras"].astype(int)

df.head()


Unnamed: 0,run_timestamp,total_loras,gpu_loras,prompt_regime,arrival_rate,duration_seconds,total_requests,successful,errors,timeouts,...,ttft_mean,ttft_p90,ttft_p99,ttft_max,latency_min,latency_median,latency_mean,latency_p90,latency_p99,latency_max
0,2025-12-22 06:29:07,128,2,short,3.0,60.0,180,180,0,0,...,0.633372,0.675977,0.75555,0.779182,0.557312,0.63358,0.63512,0.677921,0.757788,0.78064
1,2025-12-22 06:31:46,128,2,short,3.0,60.0,180,180,0,0,...,0.617535,0.659331,0.735045,0.739858,0.540205,0.620388,0.619586,0.660922,0.737244,0.742132
2,2025-12-22 06:33:39,128,2,short,3.0,60.0,180,180,0,0,...,0.626591,0.671839,0.721147,0.742785,0.560628,0.625729,0.628318,0.673423,0.722779,0.745256
3,2025-12-22 06:35:47,128,4,short,3.0,60.0,180,180,0,0,...,0.646493,0.689557,0.822809,0.843023,0.56752,0.642193,0.648364,0.691437,0.82458,0.845296
4,2025-12-22 06:37:16,128,4,short,3.0,60.0,180,180,0,0,...,0.646485,0.688276,0.806192,0.835438,0.561915,0.644436,0.64824,0.689956,0.807957,0.837291


In [None]:
# Cell 3: (Optional) Filter to a specific subset if you want clean comparisons
# Example: keep only short prompts and a specific arrival_rate
# df = df[(df["prompt_regime"] == "short") & (df["arrival_rate"] == 3.0)]

# If you want to see what values exist:
# print("prompt_regime:", sorted(df["prompt_regime"].dropna().unique()))
# print("arrival_rate:", sorted(df["arrival_rate"].dropna().unique()))


In [None]:
# Cell 4: Aggregate by gpu_loras (average across repeated runs/settings)
metrics = [
    "ttft_min", "ttft_median", "ttft_mean", "ttft_p90", "ttft_p99", "ttft_max",
    "latency_min", "latency_median", "latency_mean", "latency_p90", "latency_p99", "latency_max"
]

# Keep only metrics that actually exist in the file (robust to missing columns)
metrics = [m for m in metrics if m in df.columns]

agg = (
    df.groupby("gpu_loras", as_index=False)[metrics]
      .mean(numeric_only=True)
      .sort_values("gpu_loras")
)

agg


In [None]:
# Cell 5: Plot helper
def plot_metric_vs_gpu_loras(agg_df: pd.DataFrame, metric: str, save_dir: str = "figs"):
    assert metric in agg_df.columns, f"Metric '{metric}' not in aggregated dataframe."

    x = agg_df["gpu_loras"].values
    y = agg_df[metric].values

    plt.figure()
    plt.plot(x, y, marker="o")  # matplotlib default color (no explicit color)
    plt.xlabel("LoRA Adapters in HBM")
    plt.ylabel(metric)
    plt.title(f"{metric} vs LoRA Adapters in HBM")
    plt.grid(True, which="both", linestyle="--", linewidth=0.5, alpha=0.5)
    plt.tight_layout()

    Path(save_dir).mkdir(parents=True, exist_ok=True)
    out_path = Path(save_dir) / f"{metric}_vs_gpu_loras.png"
    plt.savefig(out_path, dpi=200)
    plt.show()

    return out_path


In [None]:
# Cell 6: Plot one metric (edit `metric_to_plot`)
metric_to_plot = "ttft_mean"  # change to any metric in the list
plot_metric_vs_gpu_loras(agg, metric_to_plot)


In [None]:
# Cell 7: Plot ALL metrics (creates 12 figures, each saved to ./figs/)
saved = []
for m in metrics:
    saved.append(plot_metric_vs_gpu_loras(agg, m))

saved
