# Module 2 · Thème 4 — API requests, pagination, rate limit

Ce notebook appelle le Mock API KORYXA (auth `X-API-Key`), suit `next`, gère les 429, logge chaque requête puis produit - `m2t4_transactions_raw.csv`
- `m2t4_transactions_clean.csv`
- `m2t4_request_log.csv`
- `m2t4_run_report.json`
- `m2t4_api_contract.md`

Le code suit strictement : pagination via `next`, retries intelligents, timeout + `raise_for_status()`, log d’erreurs.

In [None]:
import os, time, json
import requests
import pandas as pd
from datetime import datetime
from urllib.parse import urljoin, urlparse, parse_qs

BASE = os.getenv("KORYXA_LAB_API_BASE", "http://localhost:8000").rstrip("/")
API_KEY = os.getenv("KORYXA_LAB_API_KEY", "CHANGE_ME")

START_URL = f"{BASE}/api/labs/mock-api/v1/transactions"
PARAMS = {"page": 1, "page_size": 100, "start_date": "2026-01-01", "end_date": "2026-01-31"}

session = requests.Session()
session.headers.update({
    "Accept": "application/json",
    "X-API-Key": API_KEY,
})

request_log = []
all_rows = []
n_429 = 0

def log_req(url, params, status_code, elapsed_ms, bytes_len, error=""):
    request_log.append({
        "ts": datetime.utcnow().isoformat() + "Z",
        "url": url,
        "params": json.dumps(params or {}, ensure_ascii=False),
        "status_code": status_code,
        "elapsed_ms": int(elapsed_ms),
        "bytes": int(bytes_len),
        "error": error
    })

def sleep_backoff(attempt, base=0.5, cap=8.0):
    t = min(cap, base * (2 ** attempt))
    time.sleep(t)

url = START_URL
params = PARAMS.copy()
max_pages = 1000
page_count = 0
while True:
    page_count += 1
    if page_count > max_pages:
        raise RuntimeError("Pagination safety stop: too many pages (possible loop).")
    for attempt in range(6):
        t0 = time.time()
        try:
            r = session.get(url, params=params, timeout=15)
            elapsed_ms = (time.time() - t0) * 1000
            log_req(url, params, r.status_code, elapsed_ms, len(r.content or b""))

            if r.status_code == 429:
                n_429 += 1
                retry_after = int(r.headers.get("Retry-After", "2"))
                time.sleep(min(max(retry_after, 1), 10))
                continue

            if 500 <= r.status_code < 600:
                sleep_backoff(attempt)
                continue

            r.raise_for_status()
            payload = r.json()
            break

        except (requests.Timeout, requests.ConnectionError) as e:
            elapsed_ms = (time.time() - t0) * 1000
            log_req(url, params, -1, elapsed_ms, 0, error=f"{type(e).__name__}: {e}")
            sleep_backoff(attempt)
            continue

        except ValueError as e:
            elapsed_ms = (time.time() - t0) * 1000
            log_req(url, params, r.status_code if 'r' in locals() else -1, elapsed_ms, len(getattr(r, 'content', b'') or b''), error=f"JSONDecodeError: {e}")
            raise
    else:
        raise RuntimeError("Failed after retries (page fetch).")

    all_rows.extend(payload.get("results", []))
    nxt = payload.get("next")
    if not nxt:
        break

    url = urljoin(BASE + "/", nxt.lstrip("/"))
    parsed = urlparse(url)
    qs = parse_qs(parsed.query)
    params = {k: (v[0] if isinstance(v, list) and v else v) for k, v in qs.items()}
    url = f"{parsed.scheme}://{parsed.netloc}{parsed.path}"
    time.sleep(0.2)
df_raw = pd.DataFrame(all_rows)
df_raw.to_csv("m2t4_transactions_raw.csv", index=False)
df = df_raw.copy()
if not df.empty:
    df["amount"] = pd.to_numeric(df.get("amount"), errors="coerce")
    df["created_at"] = pd.to_datetime(df.get("created_at"), errors="coerce", utc=True)
    df["country"] = df.get("country", "").astype(str).str.strip().str.title()
    df["channel"] = df.get("channel", "").astype(str).str.strip().str.lower()
    df["currency"] = df.get("currency", "").astype(str).str.strip().str.upper()
df.to_csv("m2t4_transactions_clean.csv", index=False)
pd.DataFrame(request_log).to_csv("m2t4_request_log.csv", index=False)
run_report = {
  "created_at": datetime.utcnow().isoformat() + "Z",
  "requests_made": int(len(request_log)),
  "rows_fetched": int(len(df_raw)),
  "n_429": int(n_429),
  "date_min": None if df.empty or df["created_at"].isna().all() else df["created_at"].min().isoformat(),
  "date_max": None if df.empty or df["created_at"].isna().all() else df["created_at"].max().isoformat()
}
with open("m2t4_run_report.json", "w", encoding="utf-8") as f:
    json.dump(run_report, f, ensure_ascii=False, indent=2)
contract = f"""# API Contract — Theme 4

## Endpoint
GET /api/labs/mock-api/v1/transactions

## Auth
X-API-Key: (hidden)

## Params
page, page_size, start_date, end_date, country (optional)

## Pagination
Follow `next` until null.

## Rate limit
429 Too Many Requests + Retry-After

## Evidence
rows_fetched: {run_report['rows_fetched']}
requests_made: {run_report['requests_made']}
n_429: {run_report['n_429']}
"""
with open("m2t4_api_contract.md", "w", encoding="utf-8") as f:
    f.write(contract)
print("✅ Exports generated (raw/clean/log/report/contract).")
