In [1]:
# Setting up libraries
import requests
import pandas as pd
import numpy as np

#Create a blank dataframe for appending data
general_measures = pd.DataFrame()

# year loop for different geos
vin = ["2022","2021","2019","2018","2017","2016","2015"]
vina = ["2022","2021","2020","2019","2018","2017","2016","2015"]
vinb = ["2022","2021","2020","2019"]
vinc = ["2018","2017","2016","2015"]
vind = ["2022","2021","2019","2018","2017"]
vine = ["2018","2017"]


# variable names for subject tables
get_vars = ["NAME", "B09010_001E", #under 18 in households
                "B09010_002E", #under 18 in hh w/ Public assistance
                "GEO_ID"]
    
# column names for subject tables
col_names_base = ['Geo_name', 'House_with_children',
                 'House_children_public_assistance']

In [2]:
#This is the function for the nation

def api_us(year, dataset):
    global general_measures

    # Build base URL
    HOST = "https://api.census.gov/data"
    base_url = "/".join([HOST, year, dataset])

    # Specify Census variables and other predicates
    predicates = {}
    predicates["get"] = ",".join(get_vars)
    predicates["for"] = "us:*"

    # Execute the request, examine text of response object
    r = requests.get(base_url, params=predicates)

    # Construct the DataFrame
    col_names = col_names_base + ['GEO_ID',"fips"]
    df = pd.DataFrame(columns = col_names, data = r.json()[1:])
                
    # Create additional variables for file
    df["year"] = year
    df["area_type"] = "national"
    df["state_fips"] = ""
    df["county_fips"] = ""
    df["tract_fips"] = ""
    df["GEO_ID"] = "us"
    df.drop('fips', axis=1, inplace=True)
   
    #append data
    general_measures = pd.concat([general_measures,df], ignore_index=True)

#run for all years except 2020
for x in vin:
    api_us(year=x, dataset = "acs/acs1")

#run for 2020 (could make vin loop a dictionary and not have to run this part...)
api_us(year="2020", dataset = "acs/acs5")

In [3]:
#This is the function for region and division

def api_rd(year, dataset, geo_for, area_type, geo_id):
    global general_measures

    # Build base URL
    HOST = "https://api.census.gov/data"
    base_url = "/".join([HOST, year, dataset])

    # Specify Census variables and other predicates
    predicates = {}
    predicates["get"] = ",".join(get_vars)
    predicates["for"] = geo_for

    # Execute the request, examine text of response object
    r = requests.get(base_url, params=predicates)

    # Construct the DataFrame
    col_names = col_names_base + ['GEO_ID',"fips"]
    df = pd.DataFrame(columns = col_names, data = r.json()[1:])
                
    # Create additional variables for file
    df["year"] = year
    df["area_type"] = area_type
    df["state_fips"] = ""
    df["county_fips"] = ""
    df["tract_fips"] = ""
    df["GEO_ID"] = df[geo_id] 
    df.drop('fips', axis=1, inplace=True)
   
    #append data
    general_measures = pd.concat([general_measures,df], ignore_index=True)

#region
#run for all years except 2020
for x in vin:
    api_rd(year=x, dataset = "acs/acs1", geo_for = "region:*", area_type="region", geo_id = ["fips"])

#run for 2020 (could make vin loop a dictionary and not have to run this part...)
api_rd(year="2020", dataset = "acs/acs5", geo_for = "region:*", area_type="region", geo_id = ["fips"])

#division
#run for all years except 2020
for x in vin:
    api_rd(year=x, dataset = "acs/acs1", geo_for = "division:*", area_type="division", geo_id = ["fips"])

#run for 2020 (could make vin loop a dictionary and not have to run this part...)
api_rd(year="2020", dataset = "acs/acs5", geo_for = "division:*", area_type="division", geo_id = ["fips"])

In [4]:
#This is the function for state

def api_st(year, dataset):
    global general_measures

    # Build base URL
    HOST = "https://api.census.gov/data"
    base_url = "/".join([HOST, year, dataset])

    # Specify Census variables and other predicates
    predicates = {}
    predicates["get"] = ",".join(get_vars)
    predicates["for"] = "state:*"

    # Execute the request, examine text of response object
    r = requests.get(base_url, params=predicates)

    # Construct the DataFrame
    col_names = col_names_base + ["fips",'GEO_ID']
    df = pd.DataFrame(columns = col_names, data = r.json()[1:])
                
    # Create additional variables for file
    df["year"] = year
    df["area_type"] = "state"
    df["state_fips"] = df['fips'].str[9:11]
    df["county_fips"] = ""
    df["tract_fips"] = ""
    df["GEO_ID"] = df['fips'].str[9:11]
    df.drop('fips', axis=1, inplace=True)
   
    #append data
    general_measures = pd.concat([general_measures,df], ignore_index=True)
    
#run for all years except 2020
for x in vin:
    api_st(year=x, dataset = "acs/acs1")

#run for 2020 (could make vin loop a dictionary and not have to run this part...)
api_st(year="2020", dataset = "acs/acs5")

In [5]:
#This is the function for county for data profiles

def api_cty(year, dataset):
    global general_measures

    # Build base URL
    HOST = "https://api.census.gov/data"
    base_url = "/".join([HOST, year, dataset])

    # Specify Census variables and other predicates
    predicates = {}
    predicates["get"] = ",".join(get_vars)
    predicates["for"] = "county:*"

    # Execute the request, examine text of response object
    r = requests.get(base_url, params=predicates)

    # Construct the DataFrame
    col_names = col_names_base + ['GEO_ID',"state_fips","county_fips"]    
    df = pd.DataFrame(columns = col_names, data = r.json()[1:])
                
    # Create additional variables for file
    df["year"] = year
    df["area_type"] = "county"
    df["tract_fips"] = ""
    df["GEO_ID"] = df['GEO_ID'].str[9:14]
   
    #append data
    general_measures = pd.concat([general_measures,df], ignore_index=True)
    
#run for all years except 2020
for x in vina:
    api_cty(year=x, dataset = "acs/acs5")

In [6]:
#This is the function for tribal areas for data profiles (2022 - 2019)

def api_aian(year, dataset):
    global general_measures

    # Build base URL
    HOST = "https://api.census.gov/data"
    base_url = "/".join([HOST, year, dataset])

    # Specify Census variables and other predicates
    predicates = {}
    predicates["get"] = ",".join(get_vars)
    predicates["for"] = "american indian area/alaska native area (reservation or statistical entity only):*"

    # Execute the request, examine text of response object
    r = requests.get(base_url, params=predicates)

    # Construct the DataFrame
    col_names = col_names_base + ['fips','GEO_ID']
    df = pd.DataFrame(columns = col_names, data = r.json()[1:])
                
    # Create additional variables for file
    df["year"] = year
    df["area_type"] = "tribal_area"
    df["state_fips"] = ""
    df["county_fips"] = ""
    df["tract_fips"] = ""
    df["GEO_ID"] = df['fips'].str[9:14]
    df.drop('fips', axis=1, inplace=True)
   
    #append data
    general_measures = pd.concat([general_measures,df], ignore_index=True)

#run for years (2022 - 2019)
for x in vinb:
    api_aian(year=x, dataset = "acs/acs5")

In [7]:
#This is the function for tribal areas (2015 - 2018)

def api_aian(year, dataset):
    global general_measures

    # Build base URL
    HOST = "https://api.census.gov/data"
    base_url = "/".join([HOST, year, dataset])

    # Specify Census variables and other predicates
    predicates = {}
    predicates["get"] = ",".join(get_vars)
    predicates["for"] = "american indian area/alaska native area (reservation or statistical entity only):*"

    # Execute the request, examine text of response object
    r = requests.get(base_url, params=predicates)

    # Construct the DataFrame
    col_names = col_names_base + ['fips','GEO_ID','R']
    df = pd.DataFrame(columns = col_names, data = r.json()[1:])
                
    # Create additional variables for file
    df["year"] = year
    df["area_type"] = "tribal_area"
    df["state_fips"] = ""
    df["county_fips"] = ""
    df["tract_fips"] = ""
    df["GEO_ID"] = df['fips'].str[9:14]
    df.drop('fips', axis=1, inplace=True)
    df.drop('R', axis=1, inplace=True)
   
    #append data
    general_measures = pd.concat([general_measures,df], ignore_index=True)
    
#run for years (2015 - 2018)
for x in vinc:
    api_aian(year=x, dataset = "acs/acs5")

In [8]:
#This is the function for tracts

def api_tract(year, dataset):
    global general_measures

    # Build base URL
    HOST = "https://api.census.gov/data"
    base_url = "/".join([HOST, year, dataset])

    # Specify Census variables and other predicates
    predicates = {}
    predicates["get"] = ",".join(get_vars)
    predicates["for"] = "tract:*"
    predicates["in"] = "state:01,02,04,05,06,08,09,10,11,12,13,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,44,45,46,47,48,49,50,51,53,54,55,56,72"


    # Execute the request, examine text of response object
    r = requests.get(base_url, params=predicates)

    # Construct the DataFrame
    col_names = col_names_base + ['GEO_ID','state_fips', 'county_fips', 'tract_fips']
    df = pd.DataFrame(columns = col_names, data = r.json()[1:])
                
    # Create additional variables for file
    df["year"] = year
    df["area_type"] = "tract"
    df["GEO_ID"] = df['GEO_ID'].str[9:20]
   
    #append data
    general_measures = pd.concat([general_measures,df], ignore_index=True)

#run for all years
for x in vina:
    api_tract(year=x, dataset = "acs/acs5")

In [9]:
# convert just columns "a" and "b"
general_measures[["House_with_children", "House_children_public_assistance"]] = general_measures[["House_with_children", "House_children_public_assistance"]].apply(pd.to_numeric)

In [10]:
general_measures["Percent_child_public_assistance"] = general_measures['House_children_public_assistance'] / general_measures['House_with_children']*100
general_measures = general_measures.round({"Percent_child_public_assistance": 1})

In [11]:
# Create CSV
general_measures.to_csv("/data/discover/Data/General/general_measures4.csv", header=True, index=False)