## AstrID API Ingestion Troubleshooting

This notebook uses the public API to ingest real survey data and verify results. It is safe to run repeatedly and helps validate auth, SSL, and endpoint behavior.

Endpoints used:
- POST `/observations/surveys/{survey_id}/ingest` (primary)
- GET `/observations/surveys/{survey_id}/observations`
- (optional) POST `/observations/ingest/mast`
- (optional) POST `/observations/ingest/reference-dataset`


In [6]:
# Setup
import os
import json
import time
import certifi
import requests
from pathlib import Path

# Base API
API_BASE = os.getenv("ASTRID_API_BASE", "http://127.0.0.1:8000")

# Auth
try:
    from src.core.constants import FULL_ACCESS_API_KEY
    API_KEY = FULL_ACCESS_API_KEY
except Exception:
    # fallback to env
    API_KEY = os.getenv("ASTRID_API_KEY", "")

AUTH_HEADERS = {
    "X-API-Key": API_KEY,
    "Content-Type": "application/json",
}

REQUESTS_KW = {"verify": certifi.where(), "timeout": 120}

print(f"API_BASE: {API_BASE}")
print("✅ Requests configured with certifi CA bundle")


API_BASE: http://127.0.0.1:8000
✅ Requests configured with certifi CA bundle


In [7]:
# Helpers

def api_post(path: str, payload: dict):
    url = f"{API_BASE}{path}"
    r = requests.post(url, headers=AUTH_HEADERS, json=payload, **REQUESTS_KW)
    try:
        print(r.text)
        r.raise_for_status()
    except Exception as e:
        print("❌ POST", url)
        print("Status:", r.status_code)
        print("Body:", r.text[:500])
        raise
    data = r.json()
    return data.get("data", data)


def api_get(path: str, params: dict | None = None):
    url = f"{API_BASE}{path}"
    r = requests.get(url, headers=AUTH_HEADERS, params=params or {}, **REQUESTS_KW)
    try:
        r.raise_for_status()
    except Exception as e:
        print("❌ GET", url)
        print("Status:", r.status_code)
        print("Body:", r.text[:500])
        raise
    data = r.json()
    return data.get("data", data)

print("✅ API helpers ready")


✅ API helpers ready


In [8]:
# Configure target survey
# Prefer SURVEY_NAME + per-survey env UUIDs; fallback to ASTRID_SURVEY_ID
SURVEY_NAME = os.getenv("ASTRID_SURVEY_NAME", "hst").lower()
SURVEY_ID = os.getenv("ASTRID_SURVEY_ID", "")

ENV_MAP = {
    "hst": os.getenv("ASTRID_HST_SURVEY_ID", ""),
    "jwst": os.getenv("ASTRID_JWST_SURVEY_ID", ""),
    "dss2": os.getenv("ASTRID_DSS2_SURVEY_ID", ""),
    "tess": os.getenv("ASTRID_TESS_SURVEY_ID", ""),
}
if not SURVEY_ID:
    SURVEY_ID = ENV_MAP.get(SURVEY_NAME, "")

if not SURVEY_ID:
    print("⚠️ Set ASTRID_SURVEY_ID or a per-survey env var (e.g., ASTRID_HST_SURVEY_ID)")
else:
    print(f"Using SURVEY_NAME={SURVEY_NAME}, SURVEY_ID={SURVEY_ID}")


Using SURVEY_NAME=hst, SURVEY_ID=05e6090c-bac5-4b78-8d7d-ae15a7dde50f


In [9]:
# 1) Ingest observations (API-key friendly)
# Use batch-random endpoint which accepts X-API-Key
if not SURVEY_ID:
    raise RuntimeError("SURVEY_ID is required. Set ASTRID_SURVEY_ID or proper per-survey env var.")

payload = {
    "survey_id": SURVEY_ID,
    "count": 10,
    "missions": ["HST" if SURVEY_NAME == "hst" else SURVEY_NAME.upper()],
    "avoid_galactic_plane": True,
}

print("POST /observations/ingest/batch-random ->", payload)
res = api_post("/observations/ingest/batch-random", payload)
print("Ingested:", len(res))
# Keep IDs for follow-up steps
observation_ids = [o.get("id") for o in res if isinstance(o, dict) and o.get("id")]
print("Observation IDs:", observation_ids[:5])


POST /observations/ingest/batch-random -> {'survey_id': '05e6090c-bac5-4b78-8d7d-ae15a7dde50f', 'count': 10, 'missions': ['HST'], 'avoid_galactic_plane': True}
{"status":"success","data":[{"id":"8a040dd3-3402-4b55-8bc0-dd7412eb5fda","survey":"05e6090c-bac5-4b78-8d7d-ae15a7dde50f","observation_id":"mock_obs_231.5337485636403_-7.314540982100667","ra":231.5337486,"dec":-7.314541,"observation_time":"2025-09-24T09:35:51.288312+00:00","filter_band":"F814W","exposure_time":600.0,"fits_url":"https://mast.stsci.edu/mock/...","status":"ingested","created_at":"2025-09-24T09:35:54.412033+00:00","updated_at":"2025-09-24T09:35:54.412033+00:00"},{"id":"c141aef3-de1a-4eae-8662-4e60df211777","survey":"05e6090c-bac5-4b78-8d7d-ae15a7dde50f","observation_id":"mock_obs_91.80514764316993_-5.60760053522182","ra":91.8051476,"dec":-5.6076005,"observation_time":"2025-09-24T09:35:51.454192+00:00","filter_band":"F814W","exposure_time":600.0,"fits_url":"https://mast.stsci.edu/mock/...","status":"ingested","created

In [10]:
# 2) Verify observations under survey
ver = api_get(f"/observations/surveys/{SURVEY_ID}/observations", params={"limit": 10})
print("Total:", ver.get("total"), "Showing:", len(ver.get("observations", [])))
for o in ver.get("observations", [])[:3]:
    print(o["observation_id"], o["ra"], o["dec"], o["status"])


❌ GET http://127.0.0.1:8000/observations/surveys/05e6090c-bac5-4b78-8d7d-ae15a7dde50f/observations
Status: 403
Body: {"detail":"Not authenticated"}


HTTPError: 403 Client Error: Forbidden for url: http://127.0.0.1:8000/observations/surveys/05e6090c-bac5-4b78-8d7d-ae15a7dde50f/observations?limit=10

In [None]:
# Optional: single position ingest (MAST)
payload_mast = {
    "survey_id": SURVEY_ID,
    "ra": 83.633,
    "dec": 22.0145,
    "radius": 0.1,
    "missions": ["HST" if SURVEY_NAME == "hst" else SURVEY_NAME.upper()],
}

print("POST /observations/ingest/mast ->", payload_mast)
res_mast = api_post("/observations/ingest/mast", payload_mast)
print("MAST ingested:", len(res_mast))


In [None]:
# Optional: reference dataset (SkyView/HiPS cutouts)
payload_ref = {
    "survey_id": SURVEY_ID,
    "ra": 83.633,
    "dec": 22.0145,
    "size": 0.25,
    "pixels": 512,
    "surveys": ["DSS2 Red"],
}

print("POST /observations/ingest/reference-dataset ->", payload_ref)
try:
    res_ref = api_post("/observations/ingest/reference-dataset", payload_ref)
    print("Reference dataset created:", list(res_ref.keys()))
except Exception as e:
    print("Reference dataset creation failed (skip):", e)
