# Overview of Data Variables and Sources

In [1]:
# Loading libraries
import os
import requests
import pandas as pd

In [2]:
def query_acs(var_code, api_key):
    results = None
    return None

## US Census Variables

These variables were taken from the US Census API, specifically from the [American Community Survey](https://www.census.gov/data/developers/data-sets/acs-5year.html). Where possible all variables are collected at a census track level.

In [3]:
# List of variables taken from US CENSUS bureau
census_vars = {
    "total_pop": "B01001_001E", # Total population
    "white_no_hispanic": "B01001H_001E", # White Alone, not Hispanic or Latino
    "year_built": "B25035_001E", # Median Year structure was built
    "internet_access": "B28002_013E", # Number of households with No Internet Access
    "total_households": "B09019_003E", # Total number of households
    "labor_force_rate": "S2301_C02_001E", # Labor Force Participation Rate for population 16 years or older
    "pop_16_older": "S2301_C01_001E", # Population 16 years or older
    "less_than_hs": "S2301_C01_032E", # Population 25 to 64 years that are Less than high school graduate
    "insured": "S2701_C02_001E", # Insured population
    "senior_living_alone": "S1101_C01_013E", # Number of households - householder 65+ years old living alone
    "above_200_poverty": "S0501_C01_106E", # Total population at or above 200% poverty level
    "occupied_housing_units": "S0102_C01_090E", # Total number of housing units
    "owner_occupied": "S0102_C01_091E", # Number of owner occupied housing units
    "renter_occupied": "S0102_C01_092E", # Number of renter occupied housing units
    "disabled": "S0102_C01_044E" # Population with any disability
}

In [7]:
# Function that queries US Census Bureau for a specific variable and state
def query_acs(var_name, var_code, state_fips, api_key):
    
    # General format for ACS data retrieval for all census tracts in a state
    ACS_BASE_URL = 'https://api.census.gov/data/2020/acs/acs5?get=NAME,'
    ACS_SUBJECT_URL = 'https://api.census.gov/data/2020/acs/acs5/subject?get=NAME,'
    ACS_TRACTS = '&for=tract:*&in=state:'
    ACS_KEY = '&key=' + api_key
    
    # Check if this is a detailed table variable or subject table variable
    base_url = ""
    if (var_code[0] == "S"):
        base_url = ACS_SUBJECT_URL
    elif(var_code[0] == "B"):
        base_url = ACS_BASE_URL
    else:
        print(f"Error Unknown type of variable {var_code}")
        return None
    
    # Builds request for US Census API
    req_url = base_url + var_code + ACS_TRACTS + state_fips + ACS_KEY
    
    # Recieve and process response from US Census API
    resp = requests.get(req_url)
    if ((resp.status_code) != 200):
        print(f"Error response code {resp.status_code} for request url\n{req_url}")
        return None
    
    dat = resp.json()
    df = pd.DataFrame(dat[1:], columns=dat[0])
    df["GEOID"] = df['state'].astype(str) + df['county'].astype(str) + df['tract'].astype(str)
    
    # Rename variable with a name instead of code
    df = df.rename(columns={var_code: var_name})
    
    return df

## Building US Census Dataset
Please find an example of how you could build a dataset with the US Census Datasets

In [10]:
# Read in US Census API Key
census_api_key = ""
f = open("api_keys/us_census_api_key.txt", "r")
census_api_key = f.read().rstrip("\n")
f.close()

# Directory structure
outdir = os.path.join("data", "acs")
states_outdir = os.path.join(outdir, "states")

# Create directories if needed
if not (os.path.isdir(outdir)):
    os.mkdir(outdir)
    
if not (os.path.isdir(states_outdir)):
    os.mkdir(states_outdir)

# Get list of State FIPS codes
states = pd.read_csv("data/support/state_fips.csv", dtype={"STATE": str})
states_fips = list(states["STATE"])

results = None

# Iterate through all states
for fip in states_fips:
    
    state_abbr = states[states["STATE"] == fip]["STUSAB"].values[0]
    print(f"Current State: {state_abbr}")
    
    state_results = None 
    # Iterate through all variables
    for var_name, var_code in census_vars.items():
        print(f"Current Variable: {var_name}")
        
        curr_df = query_acs(var_name, var_code, fip, census_api_key)
        print(f"{curr_df.shape}")
        
        # Collect all variables for a given state in the results df
        if not (curr_df is None):
            if not (state_results is None):
                state_results = pd.merge(state_results, curr_df[["GEOID", var_name]], on="GEOID")
            else:
                state_results = curr_df
        
    # Write State results to file
    state_filename = f"{state_abbr}.csv"
    state_results.to_csv(os.path.join(states_outdir, state_filename), index=False)
    print(f"Wrote {state_abbr} results")
    break
                
    # Collect state results into overall result dataframe
    if not (state_results is None):
        if not (results is None):
            results = pd.concat([results, state_results])
        else:
            results = state_results
    
print("Done getting data")
results.to_csv(os.path.join(outdir, "acs_data.csv"), index=False)
print("Done writing data")

Current State: AL
Current Variable: total_pop
(1437, 6)
Current Variable: white_no_hispanic
(1437, 6)
Current Variable: year_built
(1437, 6)
Current Variable: internet_access
(1437, 6)
Current Variable: total_households
(1437, 6)
Current Variable: labor_force_rate
(1437, 6)
Current Variable: pop_16_older
(1437, 6)
Current Variable: less_than_hs
(1437, 6)
Current Variable: insured
(1437, 6)
Current Variable: senior_living_alone
(1437, 6)
Current Variable: above_200_poverty
(1437, 6)
Current Variable: occupied_housing_units
(1437, 6)
Current Variable: owner_occupied
(1437, 6)
Current Variable: renter_occupied
(1437, 6)
Current Variable: disabled
(1437, 6)
Wrote AL results
Done getting data


AttributeError: 'NoneType' object has no attribute 'to_csv'