# Downloading Data from Census API for Health and Nutrition

I wanted to use the package "censusdis" and/or the package "census" to download census data 
using the API, but we don't have access to it in EDL. I asked Brock Webb about it.

Brock said, "That can be done via a remedy request to install the package. 
The bad thing is... even if you do that, I didn't think EDL had open Internet access, 
so it doesn't work. That was a struggle I had with the python packages. They made it 
very easy to get the data as demonstrated today. However, I couldn't do that, so I 
had to get it manually from data.census.gov and figure out how to get what I wanted 
into a table so I could import the file. "

So instead of using a package, I'm using the following code.

If there is time, these are the things I would add to the code:

1. Add more detailed health insurance types
2. Where 1-year data are available, use 1-year instead of 5-year (applies to county and tribal areas)
3. Add ACS block group data
4. Add more "loops" and "if-else" code so I don't need as many functions
5. Make adjustments for geo changes over time
6. Make adjustments for Puerto Rico
7. Make adjustments for tribal area name
8. Additional QC

In [1]:
# Setting up libraries
import requests
import pandas as pd
import numpy as np

#Create a blank dataframe for appending data
health = pd.DataFrame()
disability = pd.DataFrame()
rural = pd.DataFrame()
hospital = pd.DataFrame()

# year loop for different geos
vin = ["2022","2021","2019","2018","2017","2016","2015"] #removes 2020
vina = ["2022","2021","2020","2019","2018","2017","2016","2015"] #all years
vinb = ["2022","2021","2020","2019"] #tribal areas
vinc = ["2018","2017","2016","2015"] #tribal areas
vind = ["2022","2021","2019","2018","2017"] #internet
vine = ["2018","2017"] #tribal areas / internet

#create base column names for acs data profiles
col_names_dp = ['Geo_name', 'Percent_with_disability', 'Percent_with_disability_moe',
                 'Percent_with_food_stamps', 'Percent_with_food_stamps_moe',
                 'Fertility_rate', 'Fertility_rate_moe',
                 "Percent_private_health_insurance", "Percent_private_health_insurance_moe",
                 "Percent_public_coverage", "Percent_public_coverage_moe"]

#create base column names for acs subject tables
col_names_sub = ['Geo_name', 'Percent_with_hearing_difficulty', 'Percent_with_hearing_difficulty_moe',
                 'Percent_with_vision_difficulty', 'Percent_with_vision_difficulty_moe',
                 'Percent_with_cognitive_difficulty', 'Percent_with_cognitive_difficulty_moe',
                 "Percent_with_ambulatory_difficulty", "Percent_with_ambulatory_difficulty_moe",
                 "Percent_with_selfcare_difficulty", "Percent_with_selfcare_difficulty_moe",
                 "Percent_with_independent_living_difficulty", "Percent_with_independent_living_difficulty_moe"]

# variable names for subject tables
get_vars_sub = ["NAME", "S1810_C03_019E", "S1810_C03_019M", #hearing
                "S1810_C03_029E", "S1810_C03_029M", #vision
                "S1810_C03_039E", "S1810_C03_039M", #cognitive
                "S1810_C03_047E", "S1810_C03_047M", #ambulatory
                "S1810_C03_055E", "S1810_C03_055M", #self-care
                'S1810_C03_063E', 'S1810_C03_063M', #independent living
                "GEO_ID"]

#create base column names for county business patterns
col_names_cbp = ['Geo_name', 'Number_of_hospitals']

#create variable names for county business patterns
get_vars_cbp = ["NAME", "ESTAB", "GEO_ID"] 

In [2]:
#This is the function for the nation

def api_us(year, dataset, get_vars):
    global health

    # Build base URL
    HOST = "https://api.census.gov/data"
    base_url = "/".join([HOST, year, dataset])

    # Specify Census variables and other predicates
    predicates = {}
    predicates["get"] = ",".join(get_vars)
    predicates["for"] = "us:*"

    # Execute the request, examine text of response object
    r = requests.get(base_url, params=predicates)

    # Construct the DataFrame
    col_names = col_names_dp + ['GEO_ID',"fips"]
    df = pd.DataFrame(columns = col_names, data = r.json()[1:])
                
    # Create additional variables for file
    df["year"] = year
    df["area_type"] = "national"
    df["state_fips"] = ""
    df["county_fips"] = ""
    df["tract_fips"] = ""
    df["GEO_ID"] = "us"
    df.drop('fips', axis=1, inplace=True)
   
    #append data
    health = pd.concat([health,df], ignore_index=True)

In [3]:
# Nation (variable names change based on the table for that year)

api_us(year = "2022", dataset = "acs/acs1/profile?", get_vars = 
       ["NAME", "DP02_0072PE", "DP02_0072PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0040E", "DP02_0040M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

api_us(year = "2021", dataset = "acs/acs1/profile?", get_vars = 
       ["NAME", "DP02_0072PE", "DP02_0072PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0040E", "DP02_0040M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

api_us(year = "2020", dataset = "acs/acs5/profile?", get_vars = 
       ["NAME", "DP02_0072PE", "DP02_0072PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0040E", "DP02_0040M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

api_us(year = "2019", dataset = "acs/acs1/profile?", get_vars = 
       ["NAME", "DP02_0072PE", "DP02_0072PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0040E", "DP02_0040M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

api_us(year = "2018", dataset = "acs/acs1/profile?", get_vars = 
       ["NAME", "DP02_0071PE", "DP02_0071PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0039E", "DP02_0039M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

api_us(year = "2017", dataset = "acs/acs1/profile?", get_vars = 
       ["NAME", "DP02_0071PE", "DP02_0071PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0039E", "DP02_0039M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

api_us(year = "2016", dataset = "acs/acs1/profile?", get_vars = 
       ["NAME", "DP02_0071PE", "DP02_0071PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0039E", "DP02_0039M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

api_us(year = "2015", dataset = "acs/acs1/profile?", get_vars = 
       ["NAME", "DP02_0071PE", "DP02_0071PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0039E", "DP02_0039M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [4]:
#This is the function for region and division

def api_rd(year, dataset, geo_for, area_type, geo_id, get_vars):
    global health

    # Build base URL
    HOST = "https://api.census.gov/data"
    base_url = "/".join([HOST, year, dataset])

    # Specify Census variables and other predicates
    predicates = {}
    predicates["get"] = ",".join(get_vars)
    predicates["for"] = geo_for

    # Execute the request, examine text of response object
    r = requests.get(base_url, params=predicates)

    # Construct the DataFrame
    col_names = col_names_dp + ['GEO_ID',"fips"]
    df = pd.DataFrame(columns = col_names, data = r.json()[1:])
                
    # Create additional variables for file
    df["year"] = year
    df["area_type"] = area_type
    df["state_fips"] = ""
    df["county_fips"] = ""
    df["tract_fips"] = ""
    df["GEO_ID"] = df[geo_id]  
    df.drop('fips', axis=1, inplace=True)
   
    #append data
    health = pd.concat([health,df], ignore_index=True)

In [5]:
#region
api_rd(year = "2022", dataset = "acs/acs1/profile?", geo_for = "region:*", area_type="region", geo_id = ["fips"], get_vars =
       ["NAME", "DP02_0072PE", "DP02_0072PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0040E", "DP02_0040M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

api_rd(year = "2021", dataset = "acs/acs1/profile?", geo_for = "region:*", area_type="region", geo_id = ["fips"], get_vars = 
       ["NAME", "DP02_0072PE", "DP02_0072PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0040E", "DP02_0040M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

api_rd(year = "2020", dataset = "acs/acs5/profile?", geo_for = "region:*", area_type="region", geo_id = ["fips"], get_vars = 
       ["NAME", "DP02_0072PE", "DP02_0072PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0040E", "DP02_0040M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

api_rd(year = "2019", dataset = "acs/acs1/profile?", geo_for = "region:*", area_type="region", geo_id = ["fips"], get_vars = 
       ["NAME", "DP02_0072PE", "DP02_0072PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0040E", "DP02_0040M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

api_rd(year = "2018", dataset = "acs/acs1/profile?", geo_for = "region:*", area_type="region", geo_id = ["fips"], get_vars = 
       ["NAME", "DP02_0071PE", "DP02_0071PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0039E", "DP02_0039M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

api_rd(year = "2017", dataset = "acs/acs1/profile?", geo_for = "region:*", area_type="region", geo_id = ["fips"], get_vars = 
       ["NAME", "DP02_0071PE", "DP02_0071PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0039E", "DP02_0039M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

api_rd(year = "2016", dataset = "acs/acs1/profile?", geo_for = "region:*", area_type="region", geo_id = ["fips"], get_vars = 
       ["NAME", "DP02_0071PE", "DP02_0071PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0039E", "DP02_0039M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

api_rd(year = "2015", dataset = "acs/acs1/profile?", geo_for = "region:*", area_type="region", geo_id = ["fips"], get_vars = 
       ["NAME", "DP02_0071PE", "DP02_0071PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0039E", "DP02_0039M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [6]:
#division
api_rd(year = "2022", dataset = "acs/acs1/profile?", geo_for = "division:*", area_type="division", geo_id = ["fips"], get_vars =
       ["NAME", "DP02_0072PE", "DP02_0072PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0040E", "DP02_0040M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

api_rd(year = "2021", dataset = "acs/acs1/profile?", geo_for = "division:*", area_type="division", geo_id = ["fips"], get_vars =
       ["NAME", "DP02_0072PE", "DP02_0072PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0040E", "DP02_0040M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

api_rd(year = "2020", dataset = "acs/acs5/profile?", geo_for = "division:*", area_type="division", geo_id = ["fips"], get_vars =
       ["NAME", "DP02_0072PE", "DP02_0072PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0040E", "DP02_0040M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

api_rd(year = "2019", dataset = "acs/acs1/profile?", geo_for = "division:*", area_type="division", geo_id = ["fips"], get_vars = 
       ["NAME", "DP02_0072PE", "DP02_0072PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0040E", "DP02_0040M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

api_rd(year = "2018", dataset = "acs/acs1/profile?", geo_for = "division:*", area_type="division", geo_id = ["fips"], get_vars = 
       ["NAME", "DP02_0071PE", "DP02_0071PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0039E", "DP02_0039M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

api_rd(year = "2017", dataset = "acs/acs1/profile?", geo_for = "division:*", area_type="division", geo_id = ["fips"], get_vars =
       ["NAME", "DP02_0071PE", "DP02_0071PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0039E", "DP02_0039M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

api_rd(year = "2016", dataset = "acs/acs1/profile?", geo_for = "division:*", area_type="division", geo_id = ["fips"], get_vars =
       ["NAME", "DP02_0071PE", "DP02_0071PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0039E", "DP02_0039M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

api_rd(year = "2015", dataset = "acs/acs1/profile?", geo_for = "division:*", area_type="division", geo_id = ["fips"], get_vars = 
       ["NAME", "DP02_0071PE", "DP02_0071PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0039E", "DP02_0039M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [7]:
#This is the function for state

def api_st(year, dataset, get_vars):
    global health

    # Build base URL
    HOST = "https://api.census.gov/data"
    base_url = "/".join([HOST, year, dataset])

    # Specify  predicates
    predicates = {}
    predicates["get"] = ",".join(get_vars)
    predicates["for"] = "state:*"

    # Execute the request
    r = requests.get(base_url, params=predicates)

    # Construct the DataFrame
    col_names = col_names_dp + ["fips", 'GEO_ID']
    df = pd.DataFrame(columns = col_names, data = r.json()[1:])
                
    # Create additional variables for file
    df["year"] = year
    df["area_type"] = "state"
    df["state_fips"] = df['fips'].str[9:11]
    df["county_fips"] = ""
    df["tract_fips"] = ""
    df["GEO_ID"] = df['fips'].str[9:11]
    df.drop('fips', axis=1, inplace=True)
   
    #append data
    health = pd.concat([health,df], ignore_index=True)

In [8]:
api_st(year = "2022", dataset = "acs/acs1/profile?", get_vars = 
       ["NAME", "DP02_0072PE", "DP02_0072PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0040E", "DP02_0040M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [9]:
api_st(year = "2021", dataset = "acs/acs1/profile?", get_vars = 
       ["NAME", "DP02_0072PE", "DP02_0072PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0040E", "DP02_0040M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [10]:
api_st(year = "2020", dataset = "acs/acs5/profile?", get_vars = 
       ["NAME", "DP02_0072PE", "DP02_0072PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0040E", "DP02_0040M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [11]:
api_st(year = "2019", dataset = "acs/acs1/profile?", get_vars = 
       ["NAME", "DP02_0072PE", "DP02_0072PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0040E", "DP02_0040M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [12]:
api_st(year = "2018", dataset = "acs/acs1/profile?", get_vars = 
       ["NAME", "DP02_0071PE", "DP02_0071PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0039E", "DP02_0039M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [13]:
api_st(year = "2017", dataset = "acs/acs1/profile?", get_vars = 
       ["NAME", "DP02_0071PE", "DP02_0071PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0039E", "DP02_0039M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [14]:
api_st(year = "2016", dataset = "acs/acs1/profile?", get_vars = 
       ["NAME", "DP02_0071PE", "DP02_0071PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0039E", "DP02_0039M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [15]:
api_st(year = "2015", dataset = "acs/acs1/profile?", get_vars = 
       ["NAME", "DP02_0071PE", "DP02_0071PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0039E", "DP02_0039M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [16]:
#This is the function for county

def api_cty(year, dataset, get_vars):
    global health

    # Build base URL
    HOST = "https://api.census.gov/data"
    base_url = "/".join([HOST, year, dataset])

    # Specify predicates
    predicates = {}
    predicates["get"] = ",".join(get_vars)
    predicates["for"] = "county:*"

    # Execute the request
    r = requests.get(base_url, params=predicates)

    # Construct the DataFrame
    col_names = col_names_dp + ['GEO_ID',"state_fips","county_fips"]
    df = pd.DataFrame(columns = col_names, data = r.json()[1:])
                
    # Create additional variables for file
    df["year"] = year
    df["area_type"] = "county"
    df["tract_fips"] = ""
    df["GEO_ID"] = df['GEO_ID'].str[9:14]

    #append data
    health = pd.concat([health,df], ignore_index=True)

In [17]:
api_cty(year = "2022", dataset = "acs/acs5/profile?", get_vars = 
       ["NAME", "DP02_0072PE", "DP02_0072PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0040E", "DP02_0040M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [18]:
api_cty(year = "2021", dataset = "acs/acs5/profile?", get_vars = 
       ["NAME", "DP02_0072PE", "DP02_0072PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0040E", "DP02_0040M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [19]:
api_cty(year = "2020", dataset = "acs/acs5/profile?", get_vars = 
       ["NAME", "DP02_0072PE", "DP02_0072PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0040E", "DP02_0040M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [20]:
api_cty(year = "2019", dataset = "acs/acs5/profile?", get_vars = 
       ["NAME", "DP02_0072PE", "DP02_0072PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0040E", "DP02_0040M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [21]:
api_cty(year = "2018", dataset = "acs/acs5/profile?", get_vars = 
       ["NAME", "DP02_0071PE", "DP02_0071PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0039E", "DP02_0039M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [22]:
api_cty(year = "2017", dataset = "acs/acs5/profile?", get_vars = 
       ["NAME", "DP02_0071PE", "DP02_0071PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0039E", "DP02_0039M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [23]:
api_cty(year = "2016", dataset = "acs/acs5/profile?", get_vars = 
       ["NAME", "DP02_0071PE", "DP02_0071PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0039E", "DP02_0039M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [24]:
api_cty(year = "2015", dataset = "acs/acs5/profile?", get_vars = 
       ["NAME", "DP02_0071PE", "DP02_0071PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0039E", "DP02_0039M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [25]:
#This is the function for tribal areas (2019 - 2022)

def api_aian(year, dataset, get_vars):
    global health

    # Build base URL
    HOST = "https://api.census.gov/data"
    base_url = "/".join([HOST, year, dataset])

    # Specify predicates
    predicates = {}
    predicates["get"] = ",".join(get_vars)
    predicates["for"] = "american indian area/alaska native area (reservation or statistical entity only):*"

    # Execute the request
    r = requests.get(base_url, params=predicates)

    # Construct the DataFrame
    col_names = col_names_dp + ["fips",'GEO_ID']
    df = pd.DataFrame(columns = col_names, data = r.json()[1:])
                
    # Create additional variables for file
    df["year"] = year
    df["area_type"] = "tribal_area"
    df["state_fips"] = ""
    df["county_fips"] = ""
    df["tract_fips"] = ""
    df["GEO_ID"] = df['fips'].str[9:14]
    df.drop('fips', axis=1, inplace=True)
   
    #append data
    health = pd.concat([health,df], ignore_index=True)

In [26]:
api_aian(year = "2022", dataset = "acs/acs5/profile?", get_vars = 
       ["NAME", "DP02_0072PE", "DP02_0072PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0040E", "DP02_0040M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [27]:
api_aian(year = "2021", dataset = "acs/acs5/profile?", get_vars = 
       ["NAME", "DP02_0072PE", "DP02_0072PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0040E", "DP02_0040M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [28]:
api_aian(year = "2020", dataset = "acs/acs5/profile?", get_vars = 
       ["NAME", "DP02_0072PE", "DP02_0072PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0040E", "DP02_0040M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [29]:
api_aian(year = "2019", dataset = "acs/acs5/profile?", get_vars = 
       ["NAME", "DP02_0072PE", "DP02_0072PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0040E", "DP02_0040M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [30]:
#This is the function for tribal areas, (2015 - 2018)
# For 2018 and before there is an additional geo description that gets removed in the code

def api_aian(year, dataset, get_vars):
    global health

    # Build base URL
    HOST = "https://api.census.gov/data"
    base_url = "/".join([HOST, year, dataset])

    # Specify Census variables and other predicates
    predicates = {}
    predicates["get"] = ",".join(get_vars)
    predicates["for"] = "american indian area/alaska native area (reservation or statistical entity only):*"

    # Execute the request, examine text of response object
    r = requests.get(base_url, params=predicates)

    # Construct the DataFrame
    col_names = col_names_dp + ['fips', 'GEO_ID','R']
    df = pd.DataFrame(columns = col_names, data = r.json()[1:])
                
    # Create additional variables for file
    df["year"] = year
    df["area_type"] = "tribal_area"
    df["state_fips"] = ""
    df["county_fips"] = ""
    df["tract_fips"] = ""
    df["GEO_ID"] = df['fips'].str[9:14]
    df.drop('fips', axis=1, inplace=True)
    df.drop('R', axis=1, inplace=True)
        
    #append data
    health = pd.concat([health,df], ignore_index=True)
   

In [31]:
api_aian(year = "2018", dataset = "acs/acs5/profile?", get_vars = 
       ["NAME", "DP02_0071PE", "DP02_0071PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0039E", "DP02_0039M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [32]:
api_aian(year = "2017", dataset = "acs/acs5/profile?", get_vars = 
       ["NAME", "DP02_0071PE", "DP02_0071PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0039E", "DP02_0039M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [33]:
api_aian(year = "2016", dataset = "acs/acs5/profile?", get_vars = 
       ["NAME", "DP02_0071PE", "DP02_0071PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0039E", "DP02_0039M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [34]:
api_aian(year = "2015", dataset = "acs/acs5/profile?", get_vars = 
       ["NAME", "DP02_0071PE", "DP02_0071PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0039E", "DP02_0039M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [35]:
#This is the function for tracts

def api_tract(year, dataset, get_vars):
    global health

    # Build base URL
    HOST = "https://api.census.gov/data"
    base_url = "/".join([HOST, year, dataset])

    # Specify predicates
    #You can't use state:* for tracts. You have to list all of the states out.
    predicates = {}
    predicates["get"] = ",".join(get_vars)
    predicates["for"] = "tract:*"
    predicates["in"] = "state:01,02,04,05,06,08,09,10,11,12,13,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,44,45,46,47,48,49,50,51,53,54,55,56,72"

    # Execute the request
    r = requests.get(base_url, params=predicates)

    # Construct the DataFrame
    col_names = col_names_dp + ['GEO_ID','state_fips', 'county_fips', 'tract_fips']
    df = pd.DataFrame(columns = col_names, data = r.json()[1:])
                
    # Create additional variables for file
    df["year"] = year
    df["area_type"] = "tract"
    df["GEO_ID"] = df['GEO_ID'].str[9:20]
       
    #append data
    health = pd.concat([health,df], ignore_index=True)
   

In [36]:
api_tract(year = "2022", dataset = "acs/acs5/profile?", get_vars = 
       ["NAME", "DP02_0072PE", "DP02_0072PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0040E", "DP02_0040M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [37]:
api_tract(year = "2021", dataset = "acs/acs5/profile?", get_vars = 
       ["NAME", "DP02_0072PE", "DP02_0072PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0040E", "DP02_0040M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [38]:
api_tract(year = "2020", dataset = "acs/acs5/profile?", get_vars = 
       ["NAME", "DP02_0072PE", "DP02_0072PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0040E", "DP02_0040M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [39]:
api_tract(year = "2019", dataset = "acs/acs5/profile?", get_vars = 
       ["NAME", "DP02_0072PE", "DP02_0072PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0040E", "DP02_0040M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [40]:
api_tract(year = "2018", dataset = "acs/acs5/profile?", get_vars = 
       ["NAME", "DP02_0071PE", "DP02_0071PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0039E", "DP02_0039M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [41]:
api_tract(year = "2017", dataset = "acs/acs5/profile?", get_vars = 
       ["NAME", "DP02_0071PE", "DP02_0071PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0039E", "DP02_0039M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [42]:
api_tract(year = "2016", dataset = "acs/acs5/profile?", get_vars = 
       ["NAME", "DP02_0071PE", "DP02_0071PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0039E", "DP02_0039M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

In [43]:
api_tract(year = "2015", dataset = "acs/acs5/profile?", get_vars = 
       ["NAME", "DP02_0071PE", "DP02_0071PM", #disability
                "DP03_0074PE", "DP03_0074PM", #foodstamps
                "DP02_0039E", "DP02_0039M", #fertility
                "DP03_0097PE", "DP03_0097PM", #private
                "DP03_0098PE", "DP03_0098PM", #public
                "GEO_ID"])

The following section of code pulls disability type from the subject tables

In [44]:
#This is the function for the nation for disability type

def api_us(year, dataset):
    global disability

    # Build base URL
    HOST = "https://api.census.gov/data"
    base_url = "/".join([HOST, year, dataset])

    # Specify Census variables and other predicates
    predicates = {}
    predicates["get"] = ",".join(get_vars_sub)
    predicates["for"] = "us:*"

    # Execute the request
    r = requests.get(base_url, params=predicates)

    # Construct the DataFrame
    col_names = col_names_sub + ['GEO_ID',"fips"]
    df = pd.DataFrame(columns = col_names, data = r.json()[1:])
                
    # Create additional variables for file
    df["year"] = year
    df["area_type"] = "national"
    df["state_fips"] = ""
    df["county_fips"] = ""
    df["tract_fips"] = ""
    df["GEO_ID"] = "us"
    df.drop('fips', axis=1, inplace=True)
   
    #append data
    disability = pd.concat([disability,df], ignore_index=True)

In [45]:
#run for all years for 1-year data and run 2020 for 5-year data
for x in vin:
    api_us(year=x, dataset = "acs/acs1/subject?")
    
api_us(year="2020", dataset = "acs/acs5/subject?")

In [46]:
#This is the function for region and division for disability type

def api_rd(year, dataset, geo_for, area_type, geo_id):
    global disability

    # Build base URL
    HOST = "https://api.census.gov/data"
    base_url = "/".join([HOST, year, dataset])

    # Specify Census variables and other predicates
    predicates = {}
    predicates["get"] = ",".join(get_vars_sub)
    predicates["for"] = geo_for

    # Execute the request, examine text of response object
    r = requests.get(base_url, params=predicates)

    # Construct the DataFrame
    col_names = col_names_sub + ['GEO_ID',"fips"]
    df = pd.DataFrame(columns = col_names, data = r.json()[1:])
                
    # Create additional variables for file
    df["year"] = year
    df["area_type"] = area_type
    df["state_fips"] = ""
    df["county_fips"] = ""
    df["tract_fips"] = ""
    df["GEO_ID"] = df[geo_id]
    df.drop('fips', axis=1, inplace=True)
   
    #append data
    disability = pd.concat([disability,df], ignore_index=True)

In [47]:
#region
#run for all years for 1-year data and run 2020 for 5-year data
for x in vin:
    api_rd(year=x, dataset = "acs/acs1/subject?", geo_for = "region:*", area_type="region", geo_id = ["fips"])
    
api_rd(year="2020", dataset = "acs/acs5/subject?", geo_for = "region:*", area_type="region", geo_id = ["fips"])

In [48]:
#division
#run for all years for 1-year data and run 2020 for 5-year data
for x in vin:
    api_rd(year=x, dataset = "acs/acs1/subject?", geo_for = "division:*", area_type="division", geo_id = ["fips"])
    
api_rd(year="2020", dataset = "acs/acs5/subject?", geo_for = "division:*", area_type="division", geo_id = ["fips"])

In [49]:
#This is the function for the state for disability type

def api_st(year, dataset):
    global disability

    # Build base URL
    HOST = "https://api.census.gov/data"
    base_url = "/".join([HOST, year, dataset])

    # Specify Census variables and other predicates
    predicates = {}
    predicates["get"] = ",".join(get_vars_sub)
    predicates["for"] = "state:*"

    # Execute the request, examine text of response object
    r = requests.get(base_url, params=predicates)

    # Construct the DataFrame
    col_names = col_names_sub + ["fips", 'GEO_ID']    
    df = pd.DataFrame(columns = col_names, data = r.json()[1:])
                
    # Create additional variables for file
    df["year"] = year
    df["area_type"] = "state"
    df["state_fips"] = df['fips'].str[9:11]
    df["county_fips"] = ""
    df["tract_fips"] = ""
    df["GEO_ID"] = df['fips'].str[9:11]
    df.drop('fips', axis=1, inplace=True)
   
    #append data
    disability = pd.concat([disability,df], ignore_index=True)

In [50]:
#run for all years for 1-year data and run 2020 for 5-year data
for x in vin:
    api_st(year=x, dataset = "acs/acs1/subject?")
    
api_st(year="2020", dataset = "acs/acs5/subject?")

In [51]:
#This is the function for counties for disability type

def api_cty(year, dataset):
    global disability

    # Build base URL
    HOST = "https://api.census.gov/data"
    base_url = "/".join([HOST, year, dataset])

    # Specify Census variables and other predicates
    predicates = {}
    predicates["get"] = ",".join(get_vars_sub)
    predicates["for"] = "county:*"

    # Execute the request, examine text of response object
    r = requests.get(base_url, params=predicates)

    # Construct the DataFrame
    col_names = col_names_sub + ['GEO_ID',"state_fips","county_fips"]
    df = pd.DataFrame(columns = col_names, data = r.json()[1:])
                
    # Create additional variables for file
    df["year"] = year
    df["area_type"] = "county"
    df["tract_fips"] = ""
    df["GEO_ID"] = df['GEO_ID'].str[9:14]
   
    #append data
    disability = pd.concat([disability,df], ignore_index=True)

In [52]:
#run for all years
for x in vina:
    api_cty(year=x, dataset = "acs/acs5/subject?")

In [53]:
#This is the function for tribal areas for disability type (2019 - 2020)

def api_aian(year, dataset):
    global disability

    # Build base URL
    HOST = "https://api.census.gov/data"
    base_url = "/".join([HOST, year, dataset])

    # Specify Census variables and other predicates
    predicates = {}
    predicates["get"] = ",".join(get_vars_sub)
    predicates["for"] = "american indian area/alaska native area (reservation or statistical entity only):*"

    # Execute the request, examine text of response object
    r = requests.get(base_url, params=predicates)

    # Construct the DataFrame
    col_names = col_names_sub + ["fips",'GEO_ID']
    df = pd.DataFrame(columns = col_names, data = r.json()[1:])
                
    # Create additional variables for file
    df["year"] = year
    df["area_type"] = "tribal_area"
    df["state_fips"] = ""
    df["county_fips"] = ""
    df["tract_fips"] = ""
    df["GEO_ID"] = df['fips'].str[9:14]
    df.drop('fips', axis=1, inplace=True)
   
    #append data
    disability = pd.concat([disability,df], ignore_index=True)

In [54]:
#run for 2019 - 2022
for x in vinb:
    api_aian(year=x, dataset = "acs/acs5/subject?")

In [55]:
#This is the function for tribal areas for disability type (2015 - 2018)

def api_aian(year, dataset):
    global disability

    # Build base URL
    HOST = "https://api.census.gov/data"
    base_url = "/".join([HOST, year, dataset])

    # Specify Census variables and other predicates
    predicates = {}
    predicates["get"] = ",".join(get_vars_sub)
    predicates["for"] = "american indian area/alaska native area (reservation or statistical entity only):*"

    # Execute the request, examine text of response object
    r = requests.get(base_url, params=predicates)

    # Construct the DataFrame
    col_names = col_names_sub + ["fips",'GEO_ID',"R"]
    df = pd.DataFrame(columns = col_names, data = r.json()[1:])
                
    # Create additional variables for file
    df["year"] = year
    df["area_type"] = "tribal_area"
    df["state_fips"] = ""
    df["county_fips"] = ""
    df["tract_fips"] = ""
    df["GEO_ID"] = df['fips'].str[9:14]
    df.drop('fips', axis=1, inplace=True)
    df.drop('R', axis=1, inplace=True)
   
    #append data
    disability = pd.concat([disability,df], ignore_index=True)

In [56]:
#run for 2015 - 2018
for x in vinc:
    api_aian(year=x, dataset = "acs/acs5/subject?")

In [57]:
#This is the function for tracts

def api_tract(year, dataset):
    global disability

    # Build base URL
    HOST = "https://api.census.gov/data"
    base_url = "/".join([HOST, year, dataset])

    # Specify Census variables and other predicates
    predicates = {}
    predicates["get"] = ",".join(get_vars_sub)
    predicates["for"] = "tract:*"
    predicates["in"] = "state:01,02,04,05,06,08,09,10,11,12,13,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,44,45,46,47,48,49,50,51,53,54,55,56,72"


    # Execute the request, examine text of response object
    r = requests.get(base_url, params=predicates)

    # Construct the DataFrame
    col_names = col_names_sub + ['GEO_ID','state_fips', 'county_fips', 'tract_fips']
    df = pd.DataFrame(columns = col_names, data = r.json()[1:])
                
    # Create additional variables for file
    df["year"] = year
    df["area_type"] = "tract"
    df["GEO_ID"] = df['GEO_ID'].str[9:20]
   
    #append data
    disability = pd.concat([disability,df], ignore_index=True)

In [58]:
#run for all years
for x in vina:
    api_tract(year=x, dataset = "acs/acs5/subject?")

In [59]:
# Merge datasets

health_and_disability = health.merge(disability, how='left', left_on=['Geo_name','GEO_ID','year','area_type','state_fips','county_fips','tract_fips'],
                                             right_on=['Geo_name','GEO_ID','year','area_type','state_fips','county_fips','tract_fips'])

The next section of code calculates rurality from the 2020 Decennial for nation to block group

In [60]:
#This is the code for rurality for the nation

# Build base URL
HOST = "https://api.census.gov/data"
year = "2020"
dataset = "dec/dhc"
base_url = "/".join([HOST, year, dataset])

# Specify Census variables and other predicates
get_vars = ["NAME", "P2_001N", "P2_003N", "GEO_ID"]
predicates = {}
predicates["get"] = ",".join(get_vars)
predicates["for"] = "us:*"

# Execute the request
r = requests.get(base_url, params=predicates)

# Construct the DataFrame
col_names = ['Geo_name', 'Total', 'Rural', 'fips', 'GEO_ID']
df = pd.DataFrame(columns = col_names, data = r.json()[1:])

# Specify variables as integers
df["Rural"] = df["Rural"].astype(int)
df["Total"] = df["Total"].astype(int)
                
# Create additional variables for file
df["year"] = year
df["area_type"] = "national"
df["state_fips"] = ""
df["county_fips"] = ""
df["tract_fips"] = ""
df["GEO_ID"] = "us"
df.drop('fips', axis=1, inplace=True)
df["Percent_rural"] = df["Rural"]/df["Total"]*100
df.drop('Total', axis=1, inplace=True)
df.drop('Rural', axis=1, inplace=True)

#append data
rural = pd.concat([rural,df], ignore_index=True)


In [61]:
#This is the code for rurality for region

# Build base URL
HOST = "https://api.census.gov/data"
year = "2020"
dataset = "dec/dhc"
base_url = "/".join([HOST, year, dataset])

# Specify Census variables and other predicates
get_vars = ["NAME", "P2_001N", "P2_003N", "GEO_ID"]
predicates = {}
predicates["get"] = ",".join(get_vars)
predicates["for"] = "region:*"

# Execute the request, examine text of response object
r = requests.get(base_url, params=predicates)

# Construct the DataFrame
col_names = ['Geo_name', 'Total', 'Rural', 'GEO_ID', 'region']
df = pd.DataFrame(columns = col_names, data = r.json()[1:])

df["Rural"] = df["Rural"].astype(int)
df["Total"] = df["Total"].astype(int)
                
# Create additional variables for file
df["year"] = year
df["area_type"] = "region"
df["state_fips"] = ""
df["county_fips"] = ""
df["tract_fips"] = ""
df["GEO_ID"] = df["region"]
df.drop('region', axis=1, inplace=True)
df["Percent_rural"] = df["Rural"]/df["Total"]*100
df.drop('Total', axis=1, inplace=True)
df.drop('Rural', axis=1, inplace=True)

#append data
rural = pd.concat([rural,df], ignore_index=True)


In [62]:
#This is the code for rurality for division

# Build base URL
HOST = "https://api.census.gov/data"
year = "2020"
dataset = "dec/dhc"
base_url = "/".join([HOST, year, dataset])

# Specify Census variables and other predicates
get_vars = ["NAME", "P2_001N", "P2_003N", "GEO_ID"]
predicates = {}
predicates["get"] = ",".join(get_vars)
predicates["for"] = "division:*"

# Execute the request, examine text of response object
r = requests.get(base_url, params=predicates)

# Construct the DataFrame
col_names = ['Geo_name', 'Total', 'Rural', 'GEO_ID', 'division']
df = pd.DataFrame(columns = col_names, data = r.json()[1:])

df["Rural"] = df["Rural"].astype(int)
df["Total"] = df["Total"].astype(int)
                
# Create additional variables for file
df["year"] = year
df["area_type"] = "division"
df["state_fips"] = ""
df["county_fips"] = ""
df["tract_fips"] = ""
df["GEO_ID"] = df["division"]
df.drop('division', axis=1, inplace=True)
df["Percent_rural"] = df["Rural"]/df["Total"]*100
df.drop('Total', axis=1, inplace=True)
df.drop('Rural', axis=1, inplace=True)

#append data
rural = pd.concat([rural,df], ignore_index=True)


In [63]:
#This is the code for rurality for state

# Build base URL
HOST = "https://api.census.gov/data"
year = "2020"
dataset = "dec/dhc"
base_url = "/".join([HOST, year, dataset])

# Specify Census variables and other predicates
get_vars = ["NAME", "P2_001N", "P2_003N", "GEO_ID"]
predicates = {}
predicates["get"] = ",".join(get_vars)
predicates["for"] = "state:*"

# Execute the request, examine text of response object
r = requests.get(base_url, params=predicates)

# Construct the DataFrame
col_names = ['Geo_name', 'Total', 'Rural', 'fips', 'GEO_ID']
df = pd.DataFrame(columns = col_names, data = r.json()[1:])

df["Rural"] = df["Rural"].astype(int)
df["Total"] = df["Total"].astype(int)
                
# Create additional variables for file
df["year"] = year
df["area_type"] = "state"
df["state_fips"] = df['fips'].str[9:11]
df["county_fips"] = ""
df["tract_fips"] = ""
df["GEO_ID"] = df['fips'].str[9:11]
df.drop('fips', axis=1, inplace=True)
df["Percent_rural"] = df["Rural"]/df["Total"]*100
df.drop('Total', axis=1, inplace=True)
df.drop('Rural', axis=1, inplace=True)

#append data
rural = pd.concat([rural,df], ignore_index=True)


In [64]:
#This is the code for rurality for county

# Build base URL
HOST = "https://api.census.gov/data"
year = "2020"
dataset = "dec/dhc"
base_url = "/".join([HOST, year, dataset])

# Specify Census variables and other predicates
get_vars = ["NAME", "P2_001N", "P2_003N", "GEO_ID"]
predicates = {}
predicates["get"] = ",".join(get_vars)
predicates["for"] = "county:*"

# Execute the request, examine text of response object
r = requests.get(base_url, params=predicates)

# Construct the DataFrame
col_names = ['Geo_name', 'Total', 'Rural', 'GEO_ID', 'state_fips', 'county_fips']
df = pd.DataFrame(columns = col_names, data = r.json()[1:])

df["Rural"] = df["Rural"].astype(int)
df["Total"] = df["Total"].astype(int)
                
# Create additional variables for file
df["year"] = year
df["area_type"] = "county"
df["tract_fips"] = ""
df["GEO_ID"] = df['GEO_ID'].str[9:14]
df["Percent_rural"] = df["Rural"]/df["Total"]*100
df.drop('Total', axis=1, inplace=True)
df.drop('Rural', axis=1, inplace=True)

#append data
rural = pd.concat([rural,df], ignore_index=True)


In [65]:
#This is the code for rurality for tribal areas

# Build base URL
HOST = "https://api.census.gov/data"
year = "2020"
dataset = "dec/dhc"
base_url = "/".join([HOST, year, dataset])

# Specify Census variables and other predicates
get_vars = ["NAME", "P2_001N", "P2_003N", "GEO_ID"]
predicates = {}
predicates["get"] = ",".join(get_vars)
predicates["for"] = "american indian area/alaska native area (reservation or statistical entity only):*"

# Execute the request, examine text of response object
r = requests.get(base_url, params=predicates)

# Construct the DataFrame
col_names = ['Geo_name', 'Total', 'Rural', 'fips', 'GEO_ID']
df = pd.DataFrame(columns = col_names, data = r.json()[1:])

df["Rural"] = df["Rural"].astype(int)
df["Total"] = df["Total"].astype(int)
                
# Create additional variables for file
df["year"] = year
df["area_type"] = "tribal_area"
df["state_fips"] = ""
df["county_fips"] = ""
df["tract_fips"] = ""
df["GEO_ID"] = df['fips'].str[9:14]
df.drop('fips', axis=1, inplace=True)

df["Percent_rural"] = df["Rural"]/df["Total"]*100
df.drop('Total', axis=1, inplace=True)
df.drop('Rural', axis=1, inplace=True)

#append data
rural = pd.concat([rural,df], ignore_index=True)


In [66]:
#This is the code for rurality for tracts

# Build base URL
HOST = "https://api.census.gov/data"
year = "2020"
dataset = "dec/dhc"
base_url = "/".join([HOST, year, dataset])

# Specify Census variables and other predicates
get_vars = ["NAME", "P2_001N", "P2_003N", "GEO_ID"]
predicates = {}
predicates["get"] = ",".join(get_vars)
predicates["for"] = "tract:*"
predicates["in"] = "state:01,02,04,05,06,08,09,10,11,12,13,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,44,45,46,47,48,49,50,51,53,54,55,56,72"

# Execute the request, examine text of response object
r = requests.get(base_url, params=predicates)

# Construct the DataFrame
col_names = ['Geo_name', 'Total', 'Rural', 'GEO_ID', 'state_fips', 'county_fips', 'tract_fips']
df = pd.DataFrame(columns = col_names, data = r.json()[1:])

df["Rural"] = df["Rural"].astype(int)
df["Total"] = df["Total"].astype(int)
                
# Create additional variables for file
df["year"] = year
df["area_type"] = "tract"
df["GEO_ID"] = df['GEO_ID'].str[9:20]

df["Percent_rural"] = df["Rural"]/df["Total"]*100
df.drop('Total', axis=1, inplace=True)
df.drop('Rural', axis=1, inplace=True)

#append data
rural = pd.concat([rural,df], ignore_index=True)


In [67]:
#This is the code for rurality for block groups
#Unable to bring in all blocks at once, so doing it in sections by state

# Build base URL
#HOST = "https://api.census.gov/data"
#year = "2020"
#dataset = "dec/dhc"
#base_url = "/".join([HOST, year, dataset])

# Specify Census variables and other predicates
#get_vars = ["NAME", "P2_001N", "P2_003N", "GEO_ID"]
#predicates = {}
#predicates["get"] = ",".join(get_vars)
#predicates["for"] = "block:*"
#predicates["in"] = "state:01,02,04,05,06,08,09,10,11,12,13,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,44,45,46,47,48,49,50,51,53,54,55,56,72"
#predicates["in"] = "county:*"
#predicates["in"] = "tract:*"

# Execute the request, examine text of response object
#r = requests.get(base_url, params=predicates)
r = requests.get("https://api.census.gov/data/2020/dec/dhc?get=NAME,P2_001N,P2_003N,GEO_ID&for=block:*&in=state:01,02,04,05,06,08,09,10,11,12,13,15,16,17,18,19,20,21&in=county:*&in=tract:*")

# Construct the DataFrame
col_names = ['Geo_name', 'Total', 'Rural', 'GEO_ID', 'state_fips', 'county_fips', 'tract_fips', "blockgroup_fips"]
df = pd.DataFrame(columns = col_names, data = r.json()[1:])

df["Rural"] = df["Rural"].astype(int)
df["Total"] = df["Total"].astype(int)
                
# Create additional variables for file
df["year"] = year
df["area_type"] = "block_group"
df["GEO_ID"] = df['GEO_ID'].str[9:24]

df["Percent_rural"] = df["Rural"]/df["Total"]*100
df.drop('Total', axis=1, inplace=True)
df.drop('Rural', axis=1, inplace=True)

#append data
rural = pd.concat([rural,df], ignore_index=True)

#https://api.census.gov/data/2020/dec/dhc?get=NAME&for=block:*&in=state:01,02,04,05,06,08,09,10,11,12,13,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,44,45,46,47,48,49,50,51,53,54,55,56,72&in=county:*&in=tract:*&key=YOUR_KEY_GOES_HERE


In [68]:
#This is the code for rurality for block groups

#r = requests.get(base_url, params=predicates)
r = requests.get("https://api.census.gov/data/2020/dec/dhc?get=NAME,P2_001N,P2_003N,GEO_ID&for=block:*&in=state:22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38&in=county:*&in=tract:*")

# Construct the DataFrame
col_names = ['Geo_name', 'Total', 'Rural', 'GEO_ID', 'state_fips', 'county_fips', 'tract_fips', "blockgroup_fips"]
df = pd.DataFrame(columns = col_names, data = r.json()[1:])

df["Rural"] = df["Rural"].astype(int)
df["Total"] = df["Total"].astype(int)
                
# Create additional variables for file
df["year"] = year
df["area_type"] = "block_group"
df["GEO_ID"] = df['GEO_ID'].str[9:24]

df["Percent_rural"] = df["Rural"]/df["Total"]*100
df.drop('Total', axis=1, inplace=True)
df.drop('Rural', axis=1, inplace=True)

#append data
rural = pd.concat([rural,df], ignore_index=True)

#https://api.census.gov/data/2020/dec/dhc?get=NAME&for=block:*&in=state:01,02,04,05,06,08,09,10,11,12,13,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,44,45,46,47,48,49,50,51,53,54,55,56,72&in=county:*&in=tract:*&key=YOUR_KEY_GOES_HERE


In [69]:
#This is the code for rurality for block groups

#r = requests.get(base_url, params=predicates)
r = requests.get("https://api.census.gov/data/2020/dec/dhc?get=NAME,P2_001N,P2_003N,GEO_ID&for=block:*&in=state:39,40,41,42,44,45,46,47,48,49&in=county:*&in=tract:*")

# Construct the DataFrame
col_names = ['Geo_name', 'Total', 'Rural', 'GEO_ID', 'state_fips', 'county_fips', 'tract_fips', "blockgroup_fips"]
df = pd.DataFrame(columns = col_names, data = r.json()[1:])

df["Rural"] = df["Rural"].astype(int)
df["Total"] = df["Total"].astype(int)
                
# Create additional variables for file
df["year"] = year
df["area_type"] = "block_group"
df["GEO_ID"] = df['GEO_ID'].str[9:24]

df["Percent_rural"] = df["Rural"]/df["Total"]*100
df.drop('Total', axis=1, inplace=True)
df.drop('Rural', axis=1, inplace=True)

#append data
rural = pd.concat([rural,df], ignore_index=True)

#https://api.census.gov/data/2020/dec/dhc?get=NAME&for=block:*&in=state:01,02,04,05,06,08,09,10,11,12,13,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,44,45,46,47,48,49,50,51,53,54,55,56,72&in=county:*&in=tract:*&key=YOUR_KEY_GOES_HERE

In [70]:
#This is the code for rurality for block groups

#r = requests.get(base_url, params=predicates)
r = requests.get("https://api.census.gov/data/2020/dec/dhc?get=NAME,P2_001N,P2_003N,GEO_ID&for=block:*&in=state:50,51,53,54,55,56,72&in=county:*&in=tract:*")

# Construct the DataFrame
col_names = ['Geo_name', 'Total', 'Rural', 'GEO_ID', 'state_fips', 'county_fips', 'tract_fips', "blockgroup_fips"]
df = pd.DataFrame(columns = col_names, data = r.json()[1:])

df["Rural"] = df["Rural"].astype(int)
df["Total"] = df["Total"].astype(int)
                
# Create additional variables for file
df["year"] = year
df["area_type"] = "block_group"
df["GEO_ID"] = df['GEO_ID'].str[9:24]

df["Percent_rural"] = df["Rural"]/df["Total"]*100
df.drop('Total', axis=1, inplace=True)
df.drop('Rural', axis=1, inplace=True)

#append data
rural = pd.concat([rural,df], ignore_index=True)

#https://api.census.gov/data/2020/dec/dhc?get=NAME&for=block:*&in=state:01,02,04,05,06,08,09,10,11,12,13,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,44,45,46,47,48,49,50,51,53,54,55,56,72&in=county:*&in=tract:*&key=YOUR_KEY_GOES_HERE

In [71]:
health_disability_rural = pd.merge(health_and_disability, rural, on=['area_type','GEO_ID','year'], how='outer')

In [72]:
health_disability_rural.drop('Geo_name_y', axis=1, inplace=True)
#health_disability_rural.drop('area_type_y', axis=1, inplace=True)
health_disability_rural.drop('state_fips_y', axis=1, inplace=True)
health_disability_rural.drop('county_fips_y', axis=1, inplace=True)
health_disability_rural.drop('tract_fips_y', axis=1, inplace=True)
health_disability_rural.rename(columns={'Geo_name_x': 'Geo_name'}, inplace=True)
#health_disability_rural.rename(columns={'area_type_x': 'area_type'}, inplace=True)
health_disability_rural.rename(columns={'state_fips_x': 'state_fips'}, inplace=True)
health_disability_rural.rename(columns={'county_fips_x': 'county_fips'}, inplace=True)
health_disability_rural.rename(columns={'tract_fips_x': 'tract_fips'}, inplace=True)

In [73]:
# Create CSV
#health_disability_rural.to_csv("health_disability_rural.csv", header=True, index=False) 

This part of the code downloads the number of hospitals from county business patterns

In [116]:
#This is the function for states for hospitals

def api_st(year, NAICS):
    global hospital

    # Build base URL
    HOST = "https://api.census.gov/data"
    dataset = "cbp?"
    base_url = "/".join([HOST, year, dataset])

    # Specify Census variables and other predicates
    predicates = {}
    predicates["get"] = ",".join(get_vars_cbp)
    predicates["for"] = "state:*"
    predicates[NAICS] = "622"

    # Execute the request, examine text of response object
    r = requests.get(base_url, params=predicates)

    # Construct the DataFrame
    col_names = col_names_cbp + ['GEO_ID', NAICS, "state_fips"]
    df = pd.DataFrame(columns = col_names, data = r.json()[1:])
                
    # Create additional variables for file
    df["year"] = year
    df["area_type"] = "state"
    df["county_fips"] = ""
    df["tract_fips"] = ""
    df["GEO_ID"] = df['GEO_ID'].str[9:11]
    df.drop(NAICS, axis=1, inplace=True)
   
    #append data
    hospital = pd.concat([hospital,df], ignore_index=True)
    
vin = ["2021","2020","2019","2018","2017"]
for x in vin:
    api_st(year=x,NAICS="NAICS2017")
    
vin = ["2016","2015"]
for x in vin:
    api_st(year=x,NAICS="NAICS2012")

In [117]:
# lists of each region/division and the FIPS codes assigned to them

Northeast =['09','23','25','33','44','50','34','36','42']
South = ['10','11','12','13','14','24','37','45','51','54','01','21','28','47','05','22','40','48']
West =['04','08','08','16','35','30','49','32','56','02','06','15','41','53']
Midwest=['18','17','26','39','55','19','20','27','29','31','38','46']

New_England=['09','23','25','33','44','50']
Middle_Atlantic=['34','36','42']
East_North_Central=['18','17','26','39','55']
West_North_Central=['19','20','27','29','31','38','46']
South_Atlantic=['10','11','12','13','14','24','37','45','51','54']
East_South_Central=['01','21','28','47']
West_South_Central=['05','22','40','48']
Mountain=['04','08','08','16','35','30','49','32','56']
Pacific=['02','06','15','41','53']

#Functions to return regions/divisions category. 
#Could change these to IDs or etc, would want want to update the string value being returned

def get_region(FIPS):
    if FIPS in Northeast:
        return 'Northeast Region'
    elif FIPS in South:
        return 'South Region'
    elif FIPS in West:
        return 'West Region'
    elif FIPS in Midwest:
        return 'Midwest Region'

def get_division(FIPS):
    if FIPS in New_England:
        return 'New England Division'
    elif FIPS in Middle_Atlantic:
        return 'Middle Atlantic Division'
    elif FIPS in East_North_Central:
        return 'East North Central Division'
    elif FIPS in West_North_Central:
        return 'West North Central Division'
    elif FIPS in South_Atlantic:
        return 'South Atlantic Division'
    elif FIPS in East_South_Central:
        return 'East South Central Division'
    elif FIPS in West_South_Central:
        return 'West South Central Division'
    elif FIPS in Mountain:
        return 'Mountain Division'
    elif FIPS in Pacific:
        return 'Pacific Division'


In [118]:
#region

#copy state file
states = hospital.copy()
 
#would use get_divsion function here instead of get region
states["Number_of_hospitals"] = states["Number_of_hospitals"].astype("int")
states['region'] = states['GEO_ID'].apply(get_region)

region_hosp_sum_tables = states.groupby(['region','year'], as_index=False).sum()
region_hosp_sum_tables = region_hosp_sum_tables[['region','year','Number_of_hospitals']]

#Create other variables for append
region_hosp_sum_tables.loc[region_hosp_sum_tables['region'].isin(['Northeast Region']),"GEO_ID"] = "1"
region_hosp_sum_tables.loc[region_hosp_sum_tables['region'].isin(['South Region']),"GEO_ID"] = "2"
region_hosp_sum_tables.loc[region_hosp_sum_tables['region'].isin(['West Region']),"GEO_ID"] = "3"
region_hosp_sum_tables.loc[region_hosp_sum_tables['region'].isin(['Midwest Region']),"GEO_ID"] = "4"
region_hosp_sum_tables["area_type"] = "region"
region_hosp_sum_tables["state_fips"] = ""
region_hosp_sum_tables["county_fips"] = ""
region_hosp_sum_tables["tract_fips"] = ""

region_hosp_sum_tables.rename(columns={'region': 'Geo_name'}, inplace=True)

  region_hosp_sum_tables = states.groupby(['region','year'], as_index=False).sum()


In [119]:
hospital = pd.concat([hospital,region_hosp_sum_tables], ignore_index=True)

In [120]:
#Division

#copy state file
div = hospital.copy()
 
#would use get_divsion function here instead of get region
div["Number_of_hospitals"] = div["Number_of_hospitals"].astype("int")
div['division'] = div['GEO_ID'].apply(get_division)

division_hosp_sum_tables = div.groupby(['division','year'], as_index=False).sum()
division_hosp_sum_tables = division_hosp_sum_tables[['division','year','Number_of_hospitals']]

#Create other variables for append
division_hosp_sum_tables.loc[division_hosp_sum_tables['division'].isin(['New England Division']),"GEO_ID"] = "1"
division_hosp_sum_tables.loc[division_hosp_sum_tables['division'].isin(['Middle Atlantic Division']),"GEO_ID"] = "2"
division_hosp_sum_tables.loc[division_hosp_sum_tables['division'].isin(['East North Central Division']),"GEO_ID"] = "3"
division_hosp_sum_tables.loc[division_hosp_sum_tables['division'].isin(['West North Central Division']),"GEO_ID"] = "4"
division_hosp_sum_tables.loc[division_hosp_sum_tables['division'].isin(['South Atlantic Division']),"GEO_ID"] = "5"
division_hosp_sum_tables.loc[division_hosp_sum_tables['division'].isin(['East South Central Division']),"GEO_ID"] = "6"
division_hosp_sum_tables.loc[division_hosp_sum_tables['division'].isin(['West South Central Division']),"GEO_ID"] = "7"
division_hosp_sum_tables.loc[division_hosp_sum_tables['division'].isin(['Mountain Division']),"GEO_ID"] = "8"
division_hosp_sum_tables.loc[division_hosp_sum_tables['division'].isin(['Pacific Division']),"GEO_ID"] = "9"
division_hosp_sum_tables["area_type"] = "division"
division_hosp_sum_tables["state_fips"] = ""
division_hosp_sum_tables["county_fips"] = ""
division_hosp_sum_tables["tract_fips"] = ""

division_hosp_sum_tables.rename(columns={'division': 'Geo_name'}, inplace=True)

  division_hosp_sum_tables = div.groupby(['division','year'], as_index=False).sum()


In [121]:
hospital = pd.concat([hospital,division_hosp_sum_tables], ignore_index=True)

In [123]:
#This is the function for the nation for hospitals

def api_us(year, NAICS):
    global hospital

    # Build base URL
    HOST = "https://api.census.gov/data"
    dataset = "cbp?"
    base_url = "/".join([HOST, year, dataset])

    # Specify Census variables and other predicates
    predicates = {}
    predicates["get"] = ",".join(get_vars_cbp)
    predicates["for"] = "us:*"
    predicates[NAICS] = "622"

    # Execute the request, examine text of response object
    r = requests.get(base_url, params=predicates)

    # Construct the DataFrame
    col_names = col_names_cbp + ['GEO_ID',NAICS, "fips"]
    df = pd.DataFrame(columns = col_names, data = r.json()[1:])
                
    # Create additional variables for file
    df["year"] = year
    df["area_type"] = "national"
    df["state_fips"] = ""
    df["county_fips"] = ""
    df["tract_fips"] = ""
    df["GEO_ID"] = "us"
    df.drop('fips', axis=1, inplace=True)
    df.drop(NAICS, axis=1, inplace=True)
   
    #append data
    hospital = pd.concat([hospital,df], ignore_index=True)
    
    
vin = ["2021","2020","2019","2018","2017"]
for x in vin:
    api_us(year=x,NAICS="NAICS2017")
    
vin = ["2016","2015"]
for x in vin:
    api_us(year=x,NAICS="NAICS2012")

In [124]:
#This is the function for counties for hospitals

def api_cty(year, NAICS):
    global hospital

    # Build base URL
    HOST = "https://api.census.gov/data"
    dataset = "cbp?"
    base_url = "/".join([HOST, year, dataset])

    # Specify Census variables and other predicates
    predicates = {}
    predicates["get"] = ",".join(get_vars_cbp)
    predicates["for"] = "county:*"
    predicates[NAICS] = "622"

    # Execute the request, examine text of response object
    r = requests.get(base_url, params=predicates)

    # Construct the DataFrame
    col_names = col_names_cbp + ['GEO_ID', NAICS,'state_fips', 'county_fips']
    df = pd.DataFrame(columns = col_names, data = r.json()[1:])
                
    # Create additional variables for file
    df["year"] = year
    df["area_type"] = "county"
    df["tract_fips"] = ""
    df["GEO_ID"] = df['GEO_ID'].str[9:14]
    df.drop(NAICS, axis=1, inplace=True)
   
    #append data
    hospital = pd.concat([hospital,df], ignore_index=True)
    
vin = ["2021","2020","2019","2018","2017"]
for x in vin:
    api_cty(year=x,NAICS="NAICS2017")
    
vin = ["2016","2015"]
for x in vin:
    api_cty(year=x,NAICS="NAICS2012")

Combining all datasets: Health, Disability, Rural, Hospital

In [125]:
# Merge datasets

health_and_nutrition_measures = health_disability_rural.merge(hospital, how='left', left_on=['Geo_name','GEO_ID','year','area_type','state_fips','county_fips','tract_fips'],
                                             right_on=['Geo_name','GEO_ID','year','area_type','state_fips','county_fips','tract_fips'])

In [126]:
# Create CSV
health_and_nutrition_measures.to_csv("/data/discover/Data/Health and Nutrition/health_and_nutrition_measures.csv", header=True, index=False) 