curl -X POST https://9jgdg7ysyh.execute-api.us-east-1.amazonaws.com/prod/ \
     -H "Content-Type: application/json" \
     -d '{"instances":[[0.5,-1.2,3.3,0.0,2.1,-0.7,4.4,5.5]]}'

In [7]:
#!/usr/bin/env python3
import subprocess
import json
import requests
import time
import numpy as np
import sys

def get_api_url():
    """Read the base invoke URL from Terraform outputs."""
    try:
        raw = subprocess.check_output(
            ["terraform", "output", "-raw", "api_gateway_url"],
            stderr=subprocess.STDOUT
        )
    except subprocess.CalledProcessError as e:
        print("❌ Failed to read Terraform output:", e.output.decode(), file=sys.stderr)
        sys.exit(1)
    return raw.decode().strip().rstrip("/") + "/"

def measure_latency(url, features, n_requests=500, warmup=50):
    payload = {"features": features}
    # 1) warm-up phase (no timing)
    for _ in range(warmup):
        requests.post(url, json=payload)

    # 2) actual measurement
    latencies = []
    for _ in range(n_requests):
        start = time.perf_counter()
        resp  = requests.post(url, json=payload)
        elapsed_ms = (time.perf_counter() - start) * 1000
        latencies.append(elapsed_ms)
        if not resp.ok:
            print("Error:", resp.status_code, resp.text)
            break

    for pct in (50, 90, 95, 99):
        print(f"P{pct} latency: {np.percentile(latencies, pct):.2f} ms")

In [8]:


if __name__ == "__main__":
    api_url = get_api_url()
    print("Testing API Gateway URL:", api_url)

    # match your real input shape
    test_features = [0.5, -1.2, 3.3, 0.0, 2.1, -0.7, 4.4, 5.5]
    measure_latency(api_url, test_features, n_requests=100)


Testing API Gateway URL: https://lnv66n9evf.execute-api.us-east-1.amazonaws.com/
P50 latency: 331.56 ms
P90 latency: 344.77 ms
P95 latency: 350.75 ms
P99 latency: 414.45 ms
