## Load libraries

In [5]:
# Install geospatial dependencies (needed only on a fresh runtime)
!pip install -q pygeohydro geopandas shapely

import os
import zipfile
from pathlib import Path
from typing import Union, Tuple

import requests
import pandas as pd
import geopandas as gpd
from shapely.geometry import box
from tqdm import tqdm
from pygeohydro import WBD          # HyRiver stack (USGS web-service helpers)

## Define parameters for Climate Engine API

In [9]:
# Base URL & personal API token for Climate Engine
# API parameters
ROOT_URL = "https://api.climateengine.org/"
API_KEY  = (
    "key_here"
)
HEADERS = {"Authorization": API_KEY}
USER_EMAIL = "xxxxx@xxx.com"

## Create list of HUC10 watersheds in Colorado and select a subset

In [7]:
# Retrieve Colorado HUC‑10 geometries with PyGeoHydro and pick a sample set
# Bounding box that covers Colorado (lon/lat, WGS84).
co_bbox: Tuple[float, float, float, float] = (
    -109.060253, 36.992426,  # min lon, min lat
    -102.041524, 41.003444,  # max lon, max lat
)

# Query the USGS Watershed Boundary Dataset
wbd = WBD("huc10")                     # you MUST specify the layer
huc10_gdf: gpd.GeoDataFrame = wbd.bygeom(co_bbox)  # returns GeoDataFrame

# Pick rows 100-125 (inclusive of 100, exclusive of 126) — same as in R
huc10_ids = huc10_gdf["huc10"].iloc[100:126].tolist()
huc10_names = (
    huc10_gdf.set_index("huc10").loc[huc10_ids, "name"].str.slice(0, 35).tolist()
)

  resp = self._cleanup_resp(resp, payloads)


## Generate Site Characterization reports in batch for each HUC10 watershed

In [10]:
# Helper that builds parameters & submits a single Site Characterization request
# Climate-Engine request
def generate_report(huc_id: str, name: str) -> dict:
    """
    Call CE /reports/site_characterization/feature_collection for one HUC-10.
    Returns the parsed JSON response.
    """
    endpoint = "reports/site_characterization/feature_collection"
    url = f"{ROOT_URL}{endpoint}"

    params = {
        "user_email": USER_EMAIL,
        "site_name": name,
        "site_type": f"HUC10 {huc_id}",
        "site_description": "Report for Watershed Condition Assessment",
        "mask_ownership": "None",             # change if you want 'BLM', etc.
        "mask_landcover": "True",
        "feature_collection_asset_id": "USGS/WBD/2017/HUC10",
        "sub_choices": huc_id,
        "filter_by": "huc10",
        "batch": "True",
    }

    r = requests.get(url, params=params, headers=HEADERS, timeout=180)
    r.raise_for_status()
    return r.json()

In [11]:
# Submit requests for each HUC‑10 and collect JSON responses
# Loop over all selected HUCs and collect responses
responses = []
print(f"Requesting {len(huc10_ids)} HUC-10 reports from Climate Engine …")
for huc_id, name in tqdm(list(zip(huc10_ids, huc10_names))):
    responses.append(generate_report(huc_id, name))

Requesting 26 HUC-10 reports from Climate Engine …


100%|██████████| 26/26 [03:03<00:00,  7.05s/it]


## Parse the results from the API into a dataframe

In [17]:
# Normalize nested JSON into a tidy DataFrame of download URLs & metadata
# Flatten nested JSON → DataFrame
records = []
for item in responses:
    data = item["Data"]
    records.append(
        {
            "message":          data["Message"],
            "report_link":      data["Report link"],
            "site_description": data["Site description"],
            "site_name":        data["Site name"],
            "site_type":        data["Site type"],
        }
    )

response_df = pd.DataFrame(records)
response_df

Unnamed: 0,message,report_link,site_description,site_name,site_type
0,Report initiated,https://storage.googleapis.com/reports-site/cu...,Report for Watershed Condition Assessment,Sidney Draw,HUC10 1019001702
1,Report initiated,https://storage.googleapis.com/reports-site/cu...,Report for Watershed Condition Assessment,Brule Canyon-South Platte River,HUC10 1019001803
2,Report initiated,https://storage.googleapis.com/reports-site/cu...,Report for Watershed Condition Assessment,Sand Draw,HUC10 1025000501
3,Report initiated,https://storage.googleapis.com/reports-site/cu...,Report for Watershed Condition Assessment,City of Venango,HUC10 1025000601
4,Report initiated,https://storage.googleapis.com/reports-site/cu...,Report for Watershed Condition Assessment,Town of Brandon,HUC10 1025000603
5,Report initiated,https://storage.googleapis.com/reports-site/cu...,Report for Watershed Condition Assessment,City of Grant,HUC10 1025000604
6,Report initiated,https://storage.googleapis.com/reports-site/cu...,Report for Watershed Condition Assessment,Little Crow Creek,HUC10 1019000902
7,Report initiated,https://storage.googleapis.com/reports-site/cu...,Report for Watershed Condition Assessment,Glade Creek-Dolores River,HUC10 1403000206
8,Report initiated,https://storage.googleapis.com/reports-site/cu...,Report for Watershed Condition Assessment,Beaver Creek-San Miguel River,HUC10 1403000303
9,Report initiated,https://storage.googleapis.com/reports-site/cu...,Report for Watershed Condition Assessment,Salt Creek Wash,HUC10 1408010506


## Download all reports

In [24]:
# Download each ZIP archive and record whether it succeeded
# Download every ZIP and unzip it into Reports_Out/
download_dir = Path("Reports_Out")
download_dir.mkdir(exist_ok=True)

def download_file(url: str, folder: Union[str, Path]) -> Union[Path, None]:
    """Stream-download *url* into *folder*. Return Path or None on failure."""
    fname = folder / Path(url).name
    try:
        with requests.get(url, stream=True, timeout=180) as r:
            r.raise_for_status()
            with open(fname, "wb") as f:
                for chunk in r.iter_content(8192):
                    f.write(chunk)
        return fname
    except requests.RequestException:
        return None

response_df["local_path"] = [
    download_file(u, download_dir) for u in tqdm(response_df.report_link, desc="Downloading")
]
response_df["download_success"] = response_df.local_path.notna()

print("\nDownload summary:")
print(response_df[["site_name", "download_success"]])

Downloading: 100%|██████████| 26/26 [00:08<00:00,  2.93it/s]


Download summary:
                              site_name  download_success
0                           Sidney Draw              True
1       Brule Canyon-South Platte River              True
2                             Sand Draw              True
3                       City of Venango              True
4                       Town of Brandon              True
5                         City of Grant              True
6                     Little Crow Creek              True
7             Glade Creek-Dolores River              True
8         Beaver Creek-San Miguel River              True
9                       Salt Creek Wash              True
10           Marble Wash-San Juan River              True
11  Sangre de Cristo Creek-Trinchera Cr              True
12                        Pioneer Creek              True
13           Headwaters La Garita Creek              True
14             Headwaters Conejos River              True
15                 Outlet Conejos River              




## Unzip all reports

In [25]:
# Extract every successfully downloaded ZIP into its own folder
# Unzip
for zpath in response_df.loc[response_df.download_success, "local_path"]:
    out_dir = zpath.with_suffix("")  # strip .zip
    out_dir.mkdir(exist_ok=True)
    with zipfile.ZipFile(zpath) as zf:
        zf.extractall(out_dir)
    print(f"✓ Extracted {zpath.name} → {out_dir}")

print("\nAll done! Reports are in ./Reports_Out/")

✓ Extracted sidney_draw.zip → Reports_Out/sidney_draw
✓ Extracted brule_canyon-south_platte_river.zip → Reports_Out/brule_canyon-south_platte_river
✓ Extracted sand_draw.zip → Reports_Out/sand_draw
✓ Extracted city_of_venango.zip → Reports_Out/city_of_venango
✓ Extracted town_of_brandon.zip → Reports_Out/town_of_brandon
✓ Extracted city_of_grant.zip → Reports_Out/city_of_grant
✓ Extracted little_crow_creek.zip → Reports_Out/little_crow_creek
✓ Extracted glade_creek-dolores_river.zip → Reports_Out/glade_creek-dolores_river
✓ Extracted beaver_creek-san_miguel_river.zip → Reports_Out/beaver_creek-san_miguel_river
✓ Extracted salt_creek_wash.zip → Reports_Out/salt_creek_wash
✓ Extracted marble_wash-san_juan_river.zip → Reports_Out/marble_wash-san_juan_river
✓ Extracted sangre_de_cristo_creek-trinchera_cr.zip → Reports_Out/sangre_de_cristo_creek-trinchera_cr
✓ Extracted pioneer_creek.zip → Reports_Out/pioneer_creek
✓ Extracted headwaters_la_garita_creek.zip → Reports_Out/headwaters_la_garit