Assumes DeepInfra and Phoenix are processed, and their respective csv's exist.

In [14]:
# compare_llm_load_results.ipynb
import io
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from reportlab.lib.utils import ImageReader

# --- CONFIG ---
CSV_DEEPINFRA = "llm_load_results_deepinfra.csv"
CSV_PHOENIX = "llm_load_results_phoenix.csv"
PDF_OUTPUT = "llm_load_comparison.pdf"

# --- LOAD DATA ---
deepinfra = pd.read_csv(CSV_DEEPINFRA)
phoenix = pd.read_csv(CSV_PHOENIX)
deepinfra["provider"] = "DeepInfra"
phoenix["provider"] = "Phoenix"
df = pd.concat([deepinfra, phoenix], ignore_index=True)

# --- TIMESTAMPS & RELATIVE TIME ---
for c in ["timestamp_start", "timestamp_end"]:
    if c in df.columns:
        df[c] = pd.to_datetime(df[c], errors="coerce")

df = df.sort_values("timestamp_start")
df["relative_s"] = df.groupby("provider")["timestamp_start"].transform(
    lambda x: (x - x.min()).dt.total_seconds()
)
df["second"] = df["relative_s"].round().astype(int)

# --- Ensure failure column exists and is numeric ---
if "failed" not in df.columns:
    df["failed"] = (df["status_code"] >= 400).astype(int)
else:
    df["failed"] = df["failed"].astype(int)

# --- GROUP PER SECOND ---
rps = (
    df.groupby(["provider", "second"])
    .agg(
        requests_per_s=("status_code", "count"),
        failures_per_s=("failed", "sum"),
        input_tokens=("input_tokens", "sum"),
        output_tokens=("output_tokens", "sum"),
        avg_concurrency=("concurrency_at_start", "mean"),
    )
    .reset_index()
)

# --- Apply 30-second moving average per provider ---
def smooth(g):
    return g.assign(
        requests_per_s_ma30=g["requests_per_s"].rolling(window=30, min_periods=1).mean(),
        failures_per_s_ma30=g["failures_per_s"].rolling(window=30, min_periods=1).mean(),
        input_tokens_ma30=g["input_tokens"].rolling(window=30, min_periods=1).mean(),
        output_tokens_ma30=g["output_tokens"].rolling(window=30, min_periods=1).mean(),
        concurrency_ma30=g["avg_concurrency"].rolling(window=30, min_periods=1).mean(),
    )

rps = rps.groupby("provider", group_keys=False).apply(smooth)

# --- Helper: plot → BytesIO ---
def plot_to_bytes(fn):
    buf = io.BytesIO()
    plt.tight_layout()
    fn()
    plt.savefig(buf, format="png", dpi=200, bbox_inches="tight")
    plt.close()
    buf.seek(0)
    return buf

plots = []

# 1️⃣ Requests and Failures per Second (30-s MA)
def p_requests_failures():
    plt.figure(figsize=(10, 4))
    color_cycle = plt.rcParams["axes.prop_cycle"].by_key()["color"]
    for i, (prov, g) in enumerate(rps.groupby("provider")):
        color = color_cycle[i % len(color_cycle)]
        plt.plot(
            g["second"], g["requests_per_s_ma30"],
            label=f"{prov} Requests/sec (30s MA)",
            color=color, linewidth=1.8
        )
        plt.plot(
            g["second"], g["failures_per_s_ma30"],
            label=f"{prov} Failures/sec (30s MA)",
            color=color, linewidth=1.5, linestyle="--"
        )
    plt.title("📊 Requests and Failures per Second (30-Second Moving Average)")
    plt.xlabel("Elapsed Time (s)")
    plt.ylabel("Count per Second (smoothed)")
    plt.legend()
    plt.grid(True, alpha=0.3)
plots.append(("Requests and Failures per Second", plot_to_bytes(p_requests_failures)))

# 2️⃣ Input Tokens (30-s MA)
def p_input_tokens():
    plt.figure(figsize=(10, 4))
    for prov, g in rps.groupby("provider"):
        plt.plot(g["second"], g["input_tokens_ma30"], label=prov, linewidth=1.8)
    plt.title("Input Tokens Sent Over Time (30-Second Moving Average)")
    plt.xlabel("Elapsed Time (s)")
    plt.ylabel("Input Tokens / sec (smoothed)")
    plt.legend()
    plt.grid(True, alpha=0.3)
plots.append(("Input Tokens Over Time", plot_to_bytes(p_input_tokens)))

# 3️⃣ Output Tokens (30-s MA)
def p_output_tokens():
    plt.figure(figsize=(10, 4))
    for prov, g in rps.groupby("provider"):
        plt.plot(g["second"], g["output_tokens_ma30"], label=prov, linewidth=1.8)
    plt.title("Output Tokens Received Over Time (30-Second Moving Average)")
    plt.xlabel("Elapsed Time (s)")
    plt.ylabel("Output Tokens / sec (smoothed)")
    plt.legend()
    plt.grid(True, alpha=0.3)
plots.append(("Output Tokens Over Time", plot_to_bytes(p_output_tokens)))

# 4️⃣ Concurrency (30-s MA)
def p_concurrency():
    plt.figure(figsize=(10, 4))
    for prov, g in rps.groupby("provider"):
        plt.plot(g["second"], g["concurrency_ma30"], label=prov, linewidth=1.8)
    plt.title("Concurrency Over Time (30-Second Moving Average)")
    plt.xlabel("Elapsed Time (s)")
    plt.ylabel("Concurrent Requests (smoothed)")
    plt.legend()
    plt.grid(True, alpha=0.3)
plots.append(("Concurrency Over Time", plot_to_bytes(p_concurrency)))

# --- PDF Assembly ---
c = canvas.Canvas(PDF_OUTPUT, pagesize=letter)
width, height = letter
y = height - 50

c.setFont("Helvetica-Bold", 16)
c.drawString(50, y, "LLM Load Test Comparison (DeepInfra vs Phoenix)")
y -= 30
c.setFont("Helvetica", 12)
c.drawString(50, y, f"Generated on {datetime.now():%Y-%m-%d %H:%M:%S}")
y -= 40

for title, img in plots:
    if y < 250:
        c.showPage()
        y = height - 50
    c.setFont("Helvetica-Bold", 12)
    c.drawString(50, y, title)
    y -= 220
    c.drawImage(ImageReader(img), 50, y, width=500, height=200)
    y -= 40

c.save()
print(f"✅ PDF report generated: {PDF_OUTPUT}")


  rps = rps.groupby("provider", group_keys=False).apply(smooth)
  plt.savefig(buf, format="png", dpi=200, bbox_inches="tight")


✅ PDF report generated: llm_load_comparison.pdf


<Figure size 640x480 with 0 Axes>