In [3]:
import h5py
import requests

def download_github_release(url: str = None, file_path: str = None):
    """Downloads a file to the dataset's file path.

    Args:
        url (str): The url to download.
    """

    # "release://" is a special protocol for downloading from GitHub releases
    # e.g. release://policyengine/policyengine-us/cps-2023/cps_2023.h5
    # release://org/repo/release_tag/file_path
    # Use the GitHub API to get the download URL for the release asset

    org, repo, release_tag, file_path = url.split("/")[2:]
    url = f"https://api.github.com/repos/{org}/{repo}/releases/tags/{release_tag}"
    response = requests.get(url)
    if response.status_code != 200:
        raise ValueError(
            f"Invalid response code {response.status_code} for url {url}."
        )
    assets = response.json()["assets"]
    for asset in assets:
        if asset["name"] == file_path:
            url = asset["url"]
            break
    else:
        raise ValueError(
            f"File {file_path} not found in release {release_tag} of {org}/{repo}."
        )

    response = requests.get(
        url,
        headers={
            "Accept": "application/octet-stream",
        },
    )

    if response.status_code != 200:
        raise ValueError(
            f"Invalid response code {response.status_code} for url {url}."
        )

    with open(file_path, "wb") as f:
        f.write(response.content)

download_github_release("release://policyengine/policyengine-us/cps-2023/cps_2023.h5", "cps_2023.h5")