In [None]:
!python -m pip install -r requirements.txt

In [None]:
from typing import NamedTuple

from census import Census
from us import states

import pandas as pd

from diabetes_prevention.data.insurance_status import (
    get_state_county_data,
    get_variable_groups,
)

In [None]:
CENSUS_API_KEY = "XXX"

Get API key from the [US Census website](https://api.census.gov/data/key_signup.html)

In [None]:
c = Census(CENSUS_API_KEY)

In [None]:
from itertools import chain
from time import sleep
import numpy as np

In [None]:
var_groups = get_variable_groups()

In [None]:
def download_census_var_data(
    var_names: tuple[str], state_fips: str, county_fips: str, delay: bool = True
):
    def delay_time():
        return 2.0 + np.random.uniform(-0.5, 0.5)

    if delay:
        sleep(delay_time())
    return c.acs5.get(
        var_names,
        geo={"for": "tract:*", "in": f"state: {state_fips} county: {county_fips}"},
    )

In [None]:
state_fips = "50"
census_variable_names = tuple(
    f"{v.name}{'E'}" for v in chain(*(vg.variables for vg in var_groups))
)
dat_df = pd.concat(
    [
        (
            pd.DataFrame.from_records(
                download_census_var_data(census_variable_names, state_fips, county_fips)
            )
            .assign(
                census_tract=lambda df: df["state"]
                .str.cat(df["county"])
                .str.cat(df["tract"])
            )
            .drop(columns=["state", "county", "tract"])
        )
        for county_fips in get_state_county_data(state_fips).county_fips
    ]
)

In [None]:
raw_var_cols = [x for x in dat_df if x != "census_tract"]
for vg in var_groups:
    dat_df = dat_df.pipe(
        lambda df: df.assign(
            **{vg.name: df[[f"{v.name}{'E'}" for v in vg.variables]].sum(1)}
        )
    )

In [None]:
dat_df