In [1]:
import os
import sys
import pandas as pd
import numpy as np
import getpass

user = getpass.getuser()

DVUTILS_LOCAL_CLONE_PATH = f"/Users/{user}/Documents/GitHub/dvutils"
sys.path.insert(0, DVUTILS_LOCAL_CLONE_PATH)
from utils_io import *

In [2]:
# get census api key
api_key = os.environ.get("CENSUS_API_KEY")

In [4]:
def pull_acs_5_year_est_data(
    census_api_key,
    acs_year=2019,
    tbl_prof_type="Detailed",
    table_id=None,
    select_table_vars=None,
    drop_anno_cols=True,
    drop_margin_cols=True,
):
    """
    Pull American Community Survey (ACS) 5 year estimate data. Data can be pulled for an entire
    table or for select table variables.

    !Must include a table_id or list to select_table_vars parameters!

    Parameters
    -------------------
    census_api_key (String):
    Your secret census api key.

    acs_year (Integer):
    Year for acs estimates, default is 2019 which is latest year 5 year data is available.

    tbl_prof_type (String):
    Table or profile type. These include the following types: Detailed, Subject, Data, or Comparison.

    table_id (String):
    ACS table id. Example 'B01001'

    select_table_vars (List):
    provide a list of ACS table variables as strings. Example: ['B01001_001E','B01001_002E']

    drop_anno_cols (Boolean):
    Used if table_id provided. Drops annotation of margin of error and annotation of estimate
    columns.

    drop_margin_cols (Boolean):
    Used if table_id provided. Drops margin of error columns.

    Returns
    -------------------
    Geodataframe object

    Author: Joshua Croff
    Variable Reference: https://www.census.gov/data/developers/data-sets/acs-5year.html
    """
    import requests
    import pandas as pd

    if table_id:
        var = f"group({table_id})"
    else:
        var = ",".join(select_table_vars)

    counties = "001,013,041,055,075,081,085,095,097"
    state = "06"
    # set base url
    if tbl_prof_type not in ["Detailed", "Subject", "Data", "Comparison"]:
        return "Please provide the following table types: Detailed, Subject, Data, or Comparison"
    elif tbl_prof_type == "Detailed":
        base_url = f"https://api.census.gov/data/{acs_year}/acs/acs5?"
    elif tbl_prof_type == "Subject":
        base_url = f"https://api.census.gov/data/{acs_year}/acs/acs5/subject?"
        # https://api.census.gov/data/2020/acs/acs5/subject?get=NAME,S0101_C01_001E&for=county:037&in=state:06&key=YOUR_KEY_GOES_HERE
    elif tbl_prof_type == "Data":
        base_url = f"https://api.census.gov/data/{acs_year}/acs/acs5/profile?"
    else:
        base_url = f"https://api.census.gov/data/{acs_year}/acs/acs5/cprofile?"

    # set query params
    query_params = {
        "get": var,
        "for": "tract:*",
        "in": [
            f"county:{counties}",
            f"state:{state}",
        ],
        "key": census_api_key,
    }
    rq = requests.get(base_url, params=query_params)
    data = rq.json()
    acs_df = pd.DataFrame(data[1:], columns=data[0])
    # Cast numeric columns to numeric types
    cols = acs_df.columns.to_list()

    if table_id:
        str_cols = ["GEO_ID", "NAME", "state", "county", "tract"]
    else:
        str_cols = ["state", "county", "tract"]
    num_cols = list(set(cols) - set(str_cols))
    acs_df[num_cols] = acs_df[num_cols].apply(pd.to_numeric)

    # Drop annotation columns
    if drop_anno_cols:
        acs_df = acs_df.loc[
            :, ~((acs_df.columns.str.endswith("EA")) | (acs_df.columns.str.endswith("MA")))
        ].copy()

    if drop_margin_cols:
        acs_df = acs_df.loc[:, ~acs_df.columns.str.endswith("M").copy()]

    # add tract id column
    acs_df["tract_geoid"] = acs_df["state"] + acs_df["county"] + acs_df["tract"]

    # rename columns
    acs_df = acs_df.rename(columns={"county": "fipco"})

    # drop redundent columns
    if table_id:
        acs_df = acs_df.drop(columns=["GEO_ID", "NAME", "state", "tract"])
    else:
        acs_df = acs_df.drop(columns=["state", "tract"])

    return acs_df

## Pull data

In [22]:
acs_2017 = pull_acs_5_year_est_data(census_api_key=api_key, acs_year=2017, table_id="B03002")
acs_2022 = pull_acs_5_year_est_data(census_api_key=api_key, acs_year=2022, table_id="B03002")

## Rename columns

In [23]:
rename_dict = {
    "B03002_001E": "total_population",
    "B03002_002E": "not_hispanic_or_latino",
    "B03002_003E": "white",
    "B03002_004E": "black_or_african_american",
    "B03002_005E": "american_indian_and_alaska_native",
    "B03002_006E": "asian",
    "B03002_007E": "native_hawaiian_and_other_pacific_islander",
    "B03002_008E": "other_race",
    "B03002_009E": "two_or_more_races",
    "B03002_012E": "hispanic_or_latino_any_race",
}

In [24]:
# rename columns
acs_2017.rename(columns=rename_dict, inplace=True)
acs_2022.rename(columns=rename_dict, inplace=True)

In [28]:
# create other race column and asian and pacific islander column
other_races = [
    "american_indian_and_alaska_native",
    "other_race",
    "two_or_more_races",
]

asian_and_pacific_islander = [
    "asian",
    "native_hawaiian_and_other_pacific_islander",
]

# sum columns and create other race column
acs_2017["other"] = acs_2017[other_races].sum(axis=1)
acs_2022["other"] = acs_2022[other_races].sum(axis=1)

# sum columns and create asian and pacific islander column
acs_2017["asian_and_pacific_islander"] = acs_2017[asian_and_pacific_islander].sum(axis=1)
acs_2022["asian_and_pacific_islander"] = acs_2022[asian_and_pacific_islander].sum(axis=1)

In [29]:
# check that adding up race and origin columns equals total population
check_cols = [
    "white",
    "asian_and_pacific_islander",
    "hispanic_or_latino_any_race",
    "black_or_african_american",
    "other",
]

# 2017
acs_2017["pop_check"] = acs_2017[check_cols].sum(axis=1)

# 2022
acs_2022["pop_check"] = acs_2022[check_cols].sum(axis=1)

In [30]:
rev_cols = [
    "total_population",
    "pop_check",
    "white",
    "asian_and_pacific_islander",
    "hispanic_or_latino_any_race",
    "black_or_african_american",
    "other",
]
acs_2017[rev_cols].head()

Unnamed: 0,total_population,pop_check,white,asian_and_pacific_islander,hispanic_or_latino_any_race,black_or_african_american,other
0,2723,2723,1191,387,365,459,321
1,4949,4949,899,2285,1107,484,174
2,6347,6347,1774,246,2785,1264,278
3,6186,6186,532,4609,811,62,172
4,4414,4414,324,2612,1220,180,78


In [31]:
# drop all columns except rev_cols
analysis_cols = [
    "total_population",
    "white",
    "asian_and_pacific_islander",
    "hispanic_or_latino_any_race",
    "black_or_african_american",
    "other",
]

acs_2017 = acs_2017[analysis_cols].copy()
acs_2022 = acs_2022[analysis_cols].copy()

In [67]:
## summarize data to regional level

acs_2017_sum = (
    acs_2017.aggregate("sum")
    .reset_index()
    .rename(columns={"index": "race_and_ethnicity", 0: "population_2017"})
)

acs_2022_sum = (
    acs_2022.aggregate("sum")
    .reset_index()
    .rename(columns={"index":"race_and_ethnicity", 0: "population_2022"})
)

In [68]:
# merge dataframes

acs_2017_2022 = pd.merge(acs_2017_sum, acs_2022_sum, on="race_and_ethnicity")

In [69]:
# calculate change in population
acs_2017_2022["change"] = acs_2017_2022["population_2022"] - acs_2017_2022["population_2017"]

# calculate percent change
acs_2017_2022["percent_change"] = np.round((acs_2017_2022["change"] / acs_2017_2022["population_2017"]) * 100, 2)

In [70]:
acs_2017_2022

Unnamed: 0,race_and_ethnicity,population_2017,population_2022,change,percent_change
0,total_population,7629975,7685888,55913,0.73
1,white,3060349,2830488,-229861,-7.51
2,asian_and_pacific_islander,1966957,2164610,197653,10.05
3,hispanic_or_latino_any_race,1804345,1818897,14552,0.81
4,black_or_african_american,450274,438048,-12226,-2.72
5,other,348050,433845,85795,24.65


In [71]:
# calculate percent of total population
acs_2017_2022["percent_of_total_2017"] = np.round(
    (acs_2017_2022["population_2017"] / acs_2017_2022["population_2017"].drop(index=0).sum()) * 100,
    2,
)
acs_2017_2022["percent_of_total_2022"] = np.round(
    (acs_2017_2022["population_2022"] / acs_2017_2022["population_2022"].drop(index=0).sum()) * 100,
    2,
)

In [72]:
acs_2017_2022.columns.to_list()

['race_and_ethnicity',
 'population_2017',
 'population_2022',
 'change',
 'percent_change',
 'percent_of_total_2017',
 'percent_of_total_2022']

In [73]:
reorder_cols = [
    "race_and_ethnicity",
    "population_2017",
    "percent_of_total_2017",
    "population_2022",
    "percent_of_total_2022",
    "change",
    "percent_change",
]
acs_2017_2022 = acs_2017_2022[reorder_cols].copy()

In [74]:
acs_2017_2022

Unnamed: 0,race_and_ethnicity,population_2017,percent_of_total_2017,population_2022,percent_of_total_2022,change,percent_change
0,total_population,7629975,100.0,7685888,100.0,55913,0.73
1,white,3060349,40.11,2830488,36.83,-229861,-7.51
2,asian_and_pacific_islander,1966957,25.78,2164610,28.16,197653,10.05
3,hispanic_or_latino_any_race,1804345,23.65,1818897,23.67,14552,0.81
4,black_or_african_american,450274,5.9,438048,5.7,-12226,-2.72
5,other,348050,4.56,433845,5.64,85795,24.65
