# Notebook 1: SpaceX Launch History Data Acquisition

This notebook downloads SpaceX launch metadata from the public SpaceX API,
cleans and normalizes the results, and produces a CSV suitable for merging
with ERA5 reanalysis and forecast data in later notebooks.

Outputs from this notebook are used directly by Notebooks 2â€“5.

In [None]:
from __future__ import annotations

import requests
import pandas as pd
from pathlib import Path


In [None]:
SPACEX_API = "https://api.spacexdata.com/v4/launches"

resp = requests.get(SPACEX_API, timeout=60)
resp.raise_for_status()

launches_raw = resp.json()
print(f"Retrieved {len(launches_raw)} launches from SpaceX API")


In [None]:
df = pd.json_normalize(launches_raw)

print("Raw columns:")
print(sorted(df.columns))


In [None]:
keep_cols = {
    "id": "id",
    "name": "name",
    "date_utc": "date_utc",
    "rocket": "rocket_id",
    "launchpad": "launchpad",
    "success": "success",
    "details": "details",
}

df = df[list(keep_cols.keys())].rename(columns=keep_cols)

df["date_utc"] = pd.to_datetime(df["date_utc"], utc=True)

df["year"] = df["date_utc"].dt.year


In [None]:
KSC_LAUNCHPADS = {
    "5e9e4501f509094ba4566f84",  # LC-39A
    "5e9e4502f509092b78566f87",  # SLC-40
}

df = df[df["launchpad"].isin(KSC_LAUNCHPADS)].copy()

print(f"Launches at KSC/Cape Canaveral: {len(df)}")


In [None]:
rocket_resp = requests.get("https://api.spacexdata.com/v4/rockets", timeout=60)
rocket_resp.raise_for_status()
rockets = rocket_resp.json()

rocket_map = {r["id"]: r["name"] for r in rockets}

df["rocket_name"] = df["rocket_id"].map(rocket_map)


In [None]:
# launched_flag: did the mission eventually launch?
df["launched_flag"] = df["success"].notna().astype(float)

# weather_scrub_flag: placeholder (true labeling happens later via NLP)
df["weather_scrub_flag"] = False


In [None]:
df = df.sort_values("date_utc").reset_index(drop=True)

df = df[
    [
        "id",
        "name",
        "date_utc",
        "year",
        "launchpad",
        "rocket_name",
        "launched_flag",
        "weather_scrub_flag",
        "details",
    ]
]

df.head()


In [None]:
data_dir = Path("data")
data_dir.mkdir(exist_ok=True)

out_path = data_dir / "spacex_launches_ksc_2010_2024.csv"
df.to_csv(out_path, index=False)

out_path


In [None]:
print(df["rocket_name"].value_counts())
print(df["year"].value_counts().sort_index())