In [3]:
#!/usr/bin/env python3
import subprocess, sys, requests, time, numpy as np, os, argparse

# ── discover API Gateway URL (CLI → env → terraform) ────────────────
def get_api_url():
    p = argparse.ArgumentParser(description="Latency test for /predict")
    p.add_argument("--url", help="Full invoke URL (overrides everything)")
    p.add_argument("--user-id", type=int, default=7,
                   help="User ID to query (default: 7)")
    args, _ = p.parse_known_args()

    if args.url:                                # 1. CLI
        base = args.url
    elif (u := os.getenv("API_GATEWAY_URL")):   # 2. env var
        base = u
    else:                                       # 3. terraform output
        raw = subprocess.check_output(
            ["terraform", "output", "-raw", "api_gateway_url"],
            stderr=subprocess.STDOUT,
        )
        base = raw.decode()

    return base.rstrip("/") + "/", args.user_id

# ── latency helper — now sends {"user_id": N} ───────────────────────
def measure_latency(url, user_id, n_requests=100, warmup=50):
    session = requests.Session()            # keep-one TCP/TLS
    payload = {"user_id": user_id}

    # warm‑up (untimed)
    for _ in range(warmup):
        session.post(url, json=payload)

    latencies = []
    for _ in range(n_requests):
        start = time.perf_counter()
        r = session.post(url, json=payload)
        if not r.ok:
            print("Error:", r.status_code, r.text)
            break
        latencies.append((time.perf_counter() - start) * 1000)

    for pct in (10, 50, 90, 95, 99):
        print(f"P{pct}: {np.percentile(latencies, pct):.2f} ms")

# ── main ────────────────────────────────────────────────────────────
if __name__ == "__main__":
    api_url, uid = get_api_url()
    print("Testing API Gateway:", api_url, "with user_id:", uid)
    measure_latency(api_url, uid, n_requests=100)


Testing API Gateway: https://wsvo0n5k16.execute-api.us-east-1.amazonaws.com/ with user_id: 7
P10: 130.29 ms
P50: 136.89 ms
P90: 148.85 ms
P95: 158.02 ms
P99: 167.48 ms
