# Batch fetch Job **logs** and **status** to files

Paste your job IDs, run all cells, and this notebook will:
1. Call:
   - `https://acadcodegen-production.up.railway.app/api/job/<JOB_ID>/logs`
   - `https://acadcodegen-production.up.railway.app/api/job/<JOB_ID>/status`
2. Save results under `outputs/` as JSON (and raw text if needed)
3. Produce a `summary.csv` with high‑level info

**Tip:** If an endpoint returns non‑JSON (e.g. plain text logs), this notebook saves both a JSON wrapper and a `*.txt` raw copy.

In [None]:
#@title 🔧 Setup
import os, json, time, csv
from typing import Any, Dict, Tuple
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
import pandas as pd

BASE = "https://acadcodegen-production.up.railway.app/api/job"  # no trailing slash
OUTDIR = "outputs"
os.makedirs(OUTDIR, exist_ok=True)

def make_session(total_retries: int = 3, backoff: float = 0.5, timeout: int = 30):
    retry = Retry(
        total=total_retries,
        read=total_retries,
        connect=total_retries,
        backoff_factor=backoff,
        status_forcelist=(429, 500, 502, 503, 504),
        allowed_methods=("GET",),
        raise_on_status=False,
    )
    s = requests.Session()
    s.headers.update({"Accept": "*/*", "User-Agent": "colab-job-fetcher/1.0"})
    s.mount("https://", HTTPAdapter(max_retries=retry))
    s.mount("http://", HTTPAdapter(max_retries=retry))
    s.request_timeout = timeout
    return s

SESSION = make_session()

def robust_get(url: str) -> Tuple[int, str, Any]:
    """GET url, return (status_code, content_type, parsed_or_text)."""
    try:
        r = SESSION.get(url, timeout=SESSION.request_timeout)
        ctype = r.headers.get("Content-Type", "")
        # Attempt JSON first, fallback to text
        data: Any
        if "application/json" in ctype:
            try:
                data = r.json()
            except Exception:
                data = r.text
        else:
            # Some log endpoints return text/plain; keep raw text as well
            try:
                data = r.json()
            except Exception:
                data = r.text
        return r.status_code, ctype, data
    except requests.RequestException as e:
        return 0, "", {"error": str(e)}

In [None]:
#@title ✏️ Paste your Job IDs here
job_ids = [
    # Examples from your message (you can replace/extend this list):
    "ai_pipeline_d9de2314-d984-45b6-b130-2bc0d0886fa2",  # BadgeRegistry (Error: context is not defined)
    "ai_pipeline_b8c58f70-7f3d-4e4b-96c1-cad82372af3b",  # ContentRegistry (Error: context is not defined)
    "ai_pipeline_0d8a9752-b54b-4fd8-a6a6-e607b3c21206",  # WhitelistRegistry (Error: context is not defined)
    "ai_pipeline_9d3e8c13-7fde-4d5d-ae19-846fcf66a905",  # SimpleLottery (Error: context is not defined)
]

# Optional small delay between requests to be gentle on the API
per_request_sleep_seconds = 0.1  #@param {type:"number"}

print(f"Loaded {len(job_ids)} job IDs.")

In [None]:
#@title 🚀 Fetch logs & status for each job and save to files
summary_rows = []

for job_id in job_ids:
    logs_url = f"{BASE}/{job_id}/logs"
    status_url = f"{BASE}/{job_id}/status"

    print(f"\n==> {job_id}")
    print("GET", logs_url)
    sc_logs, ct_logs, data_logs = robust_get(logs_url)
    print("   status:", sc_logs, "content-type:", ct_logs)

    # Save logs (JSON if possible) + raw text fallback
    logs_json_path = os.path.join(OUTDIR, f"{job_id}_logs.json")
    logs_txt_path = os.path.join(OUTDIR, f"{job_id}_logs.txt")
    try:
        with open(logs_json_path, "w", encoding="utf-8") as f:
            json.dump({"url": logs_url, "status_code": sc_logs, "content_type": ct_logs, "data": data_logs}, f, ensure_ascii=False, indent=2)
        if isinstance(data_logs, str):
            with open(logs_txt_path, "w", encoding="utf-8") as f:
                f.write(data_logs)
    except Exception as e:
        print("   ⚠️ Failed to save logs:", e)

    time.sleep(per_request_sleep_seconds)

    print("GET", status_url)
    sc_status, ct_status, data_status = robust_get(status_url)
    print("   status:", sc_status, "content-type:", ct_status)

    status_json_path = os.path.join(OUTDIR, f"{job_id}_status.json")
    status_txt_path = os.path.join(OUTDIR, f"{job_id}_status.txt")
    try:
        with open(status_json_path, "w", encoding="utf-8") as f:
            json.dump({"url": status_url, "status_code": sc_status, "content_type": ct_status, "data": data_status}, f, ensure_ascii=False, indent=2)
        if isinstance(data_status, str):
            with open(status_txt_path, "w", encoding="utf-8") as f:
                f.write(data_status)
    except Exception as e:
        print("   ⚠️ Failed to save status:", e)

    # Best-effort extraction of a status field if JSON
    status_value = None
    if isinstance(data_status, dict):
        for key in ("status", "state", "job_status", "result"):
            if key in data_status:
                status_value = data_status[key]
                break

    summary_rows.append({
        "job_id": job_id,
        "logs_http": sc_logs,
        "status_http": sc_status,
        "status_value": status_value,
        "logs_file": os.path.basename(logs_json_path),
        "status_file": os.path.basename(status_json_path),
    })

print("\nSaving summary...")
summary_csv = os.path.join(OUTDIR, "summary.csv")
with open(summary_csv, "w", newline="", encoding="utf-8") as f:
    import csv
    fieldnames = list(summary_rows[0].keys()) if summary_rows else [
        "job_id","logs_http","status_http","status_value","logs_file","status_file"
    ]
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    writer.writeheader()
    for row in summary_rows:
        writer.writerow(row)

import pandas as pd
df = pd.DataFrame(summary_rows)
df

In [None]:
#@title 📦 (Optional) Zip the outputs/ folder for download
import shutil
zip_path = shutil.make_archive("job_results", "zip", root_dir=OUTDIR)
print("Created:", zip_path)

## Troubleshooting
- If you see HTTP 401/403, your endpoint may require auth; add headers/tokens where the session is created.
- If logs come back as text, check the `*_logs.txt` alongside the JSON.
- Increase `total_retries` or `timeout` in `make_session` for flaky connections.
- If your base URL changes, edit the `BASE` constant in the setup cell.