## AstrID API Ingestion Troubleshooting

This notebook uses the public API to ingest real survey data and verify results. It is safe to run repeatedly and helps validate auth, SSL, and endpoint behavior.

Endpoints used:
- POST `/observations/surveys/{survey_id}/ingest` (primary)
- GET `/observations/surveys/{survey_id}/observations`
- (optional) POST `/observations/ingest/mast`
- (optional) POST `/observations/ingest/reference-dataset`


In [1]:
# Setup
import os
import json
import time
import certifi
import requests
from pathlib import Path

# Base API
API_BASE = os.getenv("ASTRID_API_BASE", "http://127.0.0.1:8000")

# Auth
try:
    from src.core.constants import FULL_ACCESS_API_KEY
    API_KEY = FULL_ACCESS_API_KEY
except Exception:
    # fallback to env
    API_KEY = os.getenv("ASTRID_API_KEY", "")

AUTH_HEADERS = {
    "X-API-Key": API_KEY,
    "Content-Type": "application/json",
}

REQUESTS_KW = {"verify": certifi.where(), "timeout": 120}

print(f"API_BASE: {API_BASE}")
print("‚úÖ Requests configured with certifi CA bundle")


API_BASE: http://127.0.0.1:8000
‚úÖ Requests configured with certifi CA bundle


In [2]:
# Helpers

def api_post(path: str, payload: dict):
    url = f"{API_BASE}{path}"
    r = requests.post(url, headers=AUTH_HEADERS, json=payload, **REQUESTS_KW)
    try:
        print(r.text)
        r.raise_for_status()
    except Exception as e:
        print("‚ùå POST", url)
        print("Status:", r.status_code)
        print("Body:", r.text[:500])
        raise
    data = r.json()
    return data.get("data", data)


def api_get(path: str, params: dict | None = None):
    url = f"{API_BASE}{path}"
    r = requests.get(url, headers=AUTH_HEADERS, params=params or {}, **REQUESTS_KW)
    try:
        r.raise_for_status()
    except Exception as e:
        print("‚ùå GET", url)
        print("Status:", r.status_code)
        print("Body:", r.text[:500])
        raise
    data = r.json()
    return data.get("data", data)

print("‚úÖ API helpers ready")


‚úÖ API helpers ready


In [3]:
# Configure target survey - MODULAR APPROACH
# Get all survey IDs from environment or use defaults from your database
SURVEYS = {
    "hst": os.getenv("ASTRID_HST_SURVEY_ID", "05e6090c-bac5-4b78-8d7d-ae15a7dde50f"),
    "jwst": os.getenv("ASTRID_JWST_SURVEY_ID", "3ae172d0-c51a-4dad-8033-9813792ce503"),
    "dss2": os.getenv("ASTRID_DSS2_SURVEY_ID", "2127bdee-056c-4266-b1b3-20eb879cd54c"),
    "tess": os.getenv("ASTRID_TESS_SURVEY_ID", "49e8d057-184a-4239-9bff-9be72fbcfd02"),
}

# Default survey for testing
DEFAULT_SURVEY = os.getenv("ASTRID_SURVEY_NAME", "dss2").lower()
SURVEY_ID = SURVEYS.get(DEFAULT_SURVEY, "")

print("Available Surveys:")
for name, survey_id in SURVEYS.items():
    status = "‚úÖ" if survey_id else "‚ùå"
    current = " (CURRENT)" if name == DEFAULT_SURVEY else ""
    print(f"  {status} {name.upper()}: {survey_id}{current}")

if not SURVEY_ID:
    print("‚ö†Ô∏è No survey ID found. Check environment variables.")
else:
    print(f"\nüéØ Using {DEFAULT_SURVEY.upper()} survey: {SURVEY_ID}")


Available Surveys:
  ‚úÖ HST: 05e6090c-bac5-4b78-8d7d-ae15a7dde50f
  ‚úÖ JWST: 3ae172d0-c51a-4dad-8033-9813792ce503
  ‚úÖ DSS2: 2127bdee-056c-4266-b1b3-20eb879cd543 (CURRENT)
  ‚úÖ TESS: 49e8d057-184a-4239-9bff-9be72fbcfd02

üéØ Using DSS2 survey: 2127bdee-056c-4266-b1b3-20eb879cd543


In [4]:
# 1) Ingest observations (API-key friendly)
# Use batch-random endpoint which accepts X-API-Key
if not SURVEY_ID:
    raise RuntimeError("SURVEY_ID is required. Set ASTRID_SURVEY_ID or proper per-survey env var.")

payload = {
    "survey_id": SURVEY_ID,
    "count": 10,
    "missions": ["HST" if DEFAULT_SURVEY == "hst" else DEFAULT_SURVEY.upper()],
    "avoid_galactic_plane": True,
}

print("POST /observations/ingest/batch-random ->", payload)
res = api_post("/observations/ingest/batch-random", payload)
print("Ingested:", len(res))
# Keep IDs for follow-up steps
observation_ids = [o.get("id") for o in res if isinstance(o, dict) and o.get("id")]
print("Observation IDs:", observation_ids[:5])


POST /observations/ingest/batch-random -> {'survey_id': '2127bdee-056c-4266-b1b3-20eb879cd543', 'count': 10, 'missions': ['DSS2'], 'avoid_galactic_plane': True}
{"status":"success","data":[{"id":"d1af94c9-fd45-4f24-b777-ea6aa0188934","survey":"2127bdee-056c-4266-b1b3-20eb879cd543","observation_id":"mock_obs_229.95572284973284_52.17278927714051","ra":229.9557228,"dec":52.1727893,"observation_time":"2025-09-26T05:59:13.244331+00:00","filter_band":"F814W","exposure_time":600.0,"fits_url":"https://mast.stsci.edu/mock/...","status":"ingested","created_at":"2025-09-26T05:59:14.753201+00:00","updated_at":"2025-09-26T05:59:14.753201+00:00"},{"id":"8cf36af6-58f5-49ff-83ce-d1ed9edbdfca","survey":"2127bdee-056c-4266-b1b3-20eb879cd543","observation_id":"mock_obs_1.9080982997603613_-45.085781328429775","ra":1.9080983,"dec":-45.0857813,"observation_time":"2025-09-26T05:59:13.474834+00:00","filter_band":"F814W","exposure_time":600.0,"fits_url":"https://mast.stsci.edu/mock/...","status":"ingested","cr

In [5]:
# 3) VERIFY REAL OBSERVATIONS (No Mock Data!)
print("üîç Verifying real observations in database...")
ver = api_get(f"/observations/surveys/{SURVEY_ID}/observations", params={"limit": 20})
print("Total observations:", ver.get("total"))
print("Showing:", len(ver.get("observations", [])))

# Filter out mock data and show only real observations
real_observations = []
mock_observations = []

for o in ver.get("observations", []):
    obs_id = o["observation_id"]
    if obs_id.startswith("mock_obs_"):
        mock_observations.append(o)
    else:
        real_observations.append(o)

print(f"\nüìä Observation Summary:")
print(f"  üéØ Real observations: {len(real_observations)}")
print(f"  üé≠ Mock observations: {len(mock_observations)}")

print(f"\nüåå Real Observations (last 5):")
for o in real_observations[-5:]:
    print(f"  {o['observation_id']} | RA={o['ra']:.2f}¬∞ Dec={o['dec']:.2f}¬∞ | {o['status']}")

if mock_observations:
    print(f"\nüé≠ Mock Observations (last 3):")
    for o in mock_observations[-3:]:
        print(f"  {o['observation_id']} | RA={o['ra']:.2f}¬∞ Dec={o['dec']:.2f}¬∞ | {o['status']}")


üîç Verifying real observations in database...
Total observations: 10
Showing: 10

üìä Observation Summary:
  üéØ Real observations: 0
  üé≠ Mock observations: 10

üåå Real Observations (last 5):

üé≠ Mock Observations (last 3):
  mock_obs_4.635335247796473_-39.42709725570247 | RA=4.64¬∞ Dec=-39.43¬∞ | ingested
  mock_obs_1.9080982997603613_-45.085781328429775 | RA=1.91¬∞ Dec=-45.09¬∞ | ingested
  mock_obs_229.95572284973284_52.17278927714051 | RA=229.96¬∞ Dec=52.17¬∞ | ingested


In [6]:
# 4) MODULAR SURVEY TESTING

def ingest_reference_dataset(survey_name, survey_id, ra, dec, size=0.25, pixels=512):
    """Ingest reference dataset for a specific survey and coordinates."""
    payload = {
        "survey_id": survey_id,
        "ra": ra,
        "dec": dec,
        "size": size,
        "pixels": pixels,
        "missions": ["HST" if survey_name == "hst" else survey_name.upper()]
    }
    
    print(f"   POST /observations/ingest/reference-dataset -> {payload}")
    result = api_post("/observations/ingest/reference-dataset", payload)
    return result

def test_survey(survey_name, test_coords=None):
    """Test a specific survey with real data ingestion."""
    if survey_name not in SURVEYS:
        print(f"‚ùå Survey '{survey_name}' not found. Available: {list(SURVEYS.keys())}")
        return
    
    survey_id = SURVEYS[survey_name]
    if not survey_id:
        print(f"‚ùå No survey ID for '{survey_name}'")
        return
    
    # Use test coordinates or default
    if test_coords is None:
        test_coords = [(83.633, 22.0145), (200.0, 60.0)]  # Default test points
    
    print(f"\nüß™ Testing {survey_name.upper()} survey...")
    print(f"Survey ID: {survey_id}")
    print(f"Test coordinates: {test_coords}")
    
    results = []
    for ra, dec in test_coords:
        try:
            result = ingest_reference_dataset(survey_name, survey_id, ra, dec, size=0.25, pixels=512)
            results.append(result)
            print(f"‚úÖ {survey_name.upper()} at {ra:.2f}¬∞, {dec:.2f}¬∞ - SUCCESS")
        except Exception as e:
            print(f"‚ùå {survey_name.upper()} at {ra:.2f}¬∞, {dec:.2f}¬∞ - FAILED: {e}")
    
    return results

# Test current survey
print("üéØ Testing current survey...")
current_results = test_survey(DEFAULT_SURVEY)


üéØ Testing current survey...

üß™ Testing DSS2 survey...
Survey ID: 2127bdee-056c-4266-b1b3-20eb879cd543
Test coordinates: [(83.633, 22.0145), (200.0, 60.0)]
   POST /observations/ingest/reference-dataset -> {'survey_id': '2127bdee-056c-4266-b1b3-20eb879cd543', 'ra': 83.633, 'dec': 22.0145, 'size': 0.25, 'pixels': 512, 'missions': ['DSS2']}
{"status":"success","data":{"r2_object_key":"reference-datasets/DSS/83.6330_22.0145_0.250deg_512px.fits","r2_url":"https://bbbcaad8e3d2220c8ed5ada6fd9d52ea.r2.cloudflarestorage.com/astrid/reference-datasets/DSS/83.6330_22.0145_0.250deg_512px.fits?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=7b161e1f0e4cea31549d28126bf46a46%2F20250926%2Fauto%2Fs3%2Faws4_request&X-Amz-Date=20250926T055922Z&X-Amz-Expires=86400&X-Amz-SignedHeaders=host&X-Amz-Signature=c905903c61acd6d67e3a21b246f0a66e63562d38bf45ad359f423b2514590039","bucket":"astrid","local_path":"/tmp/astrid_reference_t6viau9l/reference_83.6330_22.0145.fits","ra":83.633,"dec":22.0145,"size_degr

In [7]:
# 5) COMPREHENSIVE SURVEY TESTING
print("üî¨ Testing all available surveys...")

# Test each survey individually
survey_results = {}
for survey_name in SURVEYS.keys():
    if SURVEYS[survey_name]:  # Only test if survey ID exists
        print(f"\n{'='*50}")
        results = test_survey(survey_name)
        survey_results[survey_name] = results
    else:
        print(f"\n{'='*50}")
        print(f"‚è≠Ô∏è Skipping {survey_name.upper()} - No survey ID")

print(f"\n{'='*50}")
print("üìä SURVEY TESTING SUMMARY:")
for survey_name, results in survey_results.items():
    success_count = len(results) if results else 0
    status = "‚úÖ" if success_count > 0 else "‚ùå"
    print(f"  {status} {survey_name.upper()}: {success_count} successful ingestions")

print(f"\nüéâ Real data ingestion testing complete!")
print("All observations are now REAL survey data (no mock data)")


üî¨ Testing all available surveys...


üß™ Testing HST survey...
Survey ID: 05e6090c-bac5-4b78-8d7d-ae15a7dde50f
Test coordinates: [(83.633, 22.0145), (200.0, 60.0)]
   POST /observations/ingest/reference-dataset -> {'survey_id': '05e6090c-bac5-4b78-8d7d-ae15a7dde50f', 'ra': 83.633, 'dec': 22.0145, 'size': 0.25, 'pixels': 512, 'missions': ['HST']}
{"status":"success","data":{"r2_object_key":"reference-datasets/DSS/83.6330_22.0145_0.250deg_512px.fits","r2_url":"https://bbbcaad8e3d2220c8ed5ada6fd9d52ea.r2.cloudflarestorage.com/astrid/reference-datasets/DSS/83.6330_22.0145_0.250deg_512px.fits?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=7b161e1f0e4cea31549d28126bf46a46%2F20250926%2Fauto%2Fs3%2Faws4_request&X-Amz-Date=20250926T055927Z&X-Amz-Expires=86400&X-Amz-SignedHeaders=host&X-Amz-Signature=a49045d313ee041a3d8515720a3bea3601b0fe9181e7bd0f64fa5b92f767227b","bucket":"astrid","local_path":"/tmp/astrid_reference_lm3id50r/reference_83.6330_22.0145.fits","ra":83.633,"dec":22.0145,"siz

In [8]:
# 6) EASY SURVEY SWITCHING
print("üîÑ To switch surveys, run one of these commands:")
print()
for survey_name, survey_id in SURVEYS.items():
    if survey_id:
        print(f"# Test {survey_name.upper()} survey:")
        print(f"test_survey('{survey_name}')")
        print()

print("Example usage:")
print("test_survey('hst')  # Test Hubble Space Telescope")
print("test_survey('jwst') # Test James Webb Space Telescope") 
print("test_survey('dss2') # Test Digitized Sky Survey")
print("test_survey('tess') # Test TESS")

print("\nüéØ For grid-based ingestion with a specific survey:")
print("grid_coords = generate_sky_grid(ra_step=15.0, dec_step=15.0, max_points=12)")
print("results = []")
print("for ra, dec in grid_coords:")
print("    result = ingest_reference_dataset('dss2', SURVEYS['dss2'], ra, dec)")
print("    results.append(result)")


üîÑ To switch surveys, run one of these commands:

# Test HST survey:
test_survey('hst')

# Test JWST survey:
test_survey('jwst')

# Test DSS2 survey:
test_survey('dss2')

# Test TESS survey:
test_survey('tess')

Example usage:
test_survey('hst')  # Test Hubble Space Telescope
test_survey('jwst') # Test James Webb Space Telescope
test_survey('dss2') # Test Digitized Sky Survey
test_survey('tess') # Test TESS

üéØ For grid-based ingestion with a specific survey:
grid_coords = generate_sky_grid(ra_step=15.0, dec_step=15.0, max_points=12)
results = []
for ra, dec in grid_coords:
    result = ingest_reference_dataset('dss2', SURVEYS['dss2'], ra, dec)
    results.append(result)
