curl -X POST https://9jgdg7ysyh.execute-api.us-east-1.amazonaws.com/prod/ \
     -H "Content-Type: application/json" \
     -d '{"instances":[[0.5,-1.2,3.3,0.0,2.1,-0.7,4.4,5.5]]}'

In [20]:
#!/usr/bin/env python3
import subprocess
import sys
import requests
import time
import numpy as np

def get_api_url():
    """Read the base invoke URL from Terraform outputs."""
    try:
        raw = subprocess.check_output(
            ["terraform", "output", "-raw", "api_gateway_url"],
            stderr=subprocess.STDOUT
        )
    except subprocess.CalledProcessError as e:
        print("❌ Failed to read Terraform output:", e.output.decode(), file=sys.stderr)
        sys.exit(1)
    return raw.decode().strip().rstrip("/") + "/"

def measure_latency(url, features, n_requests=100, warmup=50):
    """
    Sends `warmup` unmeasured requests, then `n_requests` timed requests
    over a single HTTP connection, and prints P10/P50/P90/P95.
    """
    session = requests.Session()   # reuse TCP/TLS connection
    payload = {"features": features}

    # warm-up (no timing)
    for _ in range(warmup):
        session.post(url, json=payload)

    # measured run
    latencies = []
    for _ in range(n_requests):
        start = time.perf_counter()
        r = session.post(url, json=payload)
        if not r.ok:
            print("Error:", r.status_code, r.text)
            break
        latencies.append((time.perf_counter() - start) * 1000)

    for pct in (10, 50, 90, 95, 99):
        print(f"P{pct} latency: {np.percentile(latencies, pct):.2f} ms")

if __name__ == "__main__":
    url = get_api_url()
    print("Testing API Gateway URL:", url)
    # example features—adjust to match your model
    test_features = [0.5, -1.2, 3.3, 0.0, 2.1, -0.7, 4.4, 5.5]
    measure_latency(url, test_features,n_requests=100)





Testing API Gateway URL: https://u7a3rzm4q2.execute-api.us-east-1.amazonaws.com/
P10 latency: 124.24 ms
P50 latency: 127.87 ms
P90 latency: 132.28 ms
P95 latency: 135.04 ms
P99 latency: 139.10 ms
