# Downloading Data from Census API for Second General Domain

In [1]:
# Setting up libraries
import requests
import pandas as pd

#Create a blank dataframe for appending data
general_measures = pd.DataFrame()

#create base column names for every geography and year
base_col_names_new = ['Geo_name', 'Total_households', 'Total_households_moe', 
                 "Married_coupled_households", "Married_coupled__moe",
                 "Cohab_coupled_households", "Cohab_coupled_households_moe",
                 "Male_hh_nospousepartner", "Male_hh_nospousepartner_moe",
                 "Female_hh_nospousepartner", "Female_hh_nospousepartner_moe",
                 "Avg_hh_size", "Avg_hh_size_moe",
                 "Percent_foreign_born", "Percent_foreign_born_moe",
                "Percent_morgage_gt_35%_inc", "Percent_morgage_gt_35%_inc_moe"]
                     #,"Percent_drove_alone", "Percent_drove_alone_moe"]

base_col_names_old = ['Geo_name', 'Total_households', 'Total_households_moe', 
                 "Married_coupled_households", "Married_coupled__moe",
                 "Avg_hh_size", "Avg_hh_size_moe",
                 "Percent_foreign_born", "Percent_foreign_born_moe",
                "Percent_morgage_gt_35%_inc", "Percent_morgage_gt_35%_inc_moe"]
                     #,"Percent_drove_alone", "Percent_drove_alone_moe"]


In [2]:
#This is the function for the nation

def api_us(year, dataset, geo_for, area_type, geo_id, base_col_names, get_vars):
    global general_measures

    # Build base URL
    HOST = "https://api.census.gov/data"
    base_url = "/".join([HOST, year, dataset])

    # Specify Census variables and other predicates
    predicates = {}
    predicates["get"] = ",".join(get_vars)
    predicates["for"] = geo_for

    # Execute the request, examine text of response object
    r = requests.get(base_url, params=predicates)

    # Construct the DataFrame
    col_names = base_col_names + ['GEO_ID','fips']
    df = pd.DataFrame(columns = col_names, data = r.json()[1:])
                
    # Create additional variables for file
    df["year"] = year
    df["area_type"] = area_type
    df["state_fips"] = ""
    df["county_fips"] = ""
    df["tract_fips"] = ""
    df["GEO_ID"] = geo_id  
    df.drop('fips', axis=1, inplace=True)
   
    #append data
    general_measures = pd.concat([general_measures,df], ignore_index=True)

In [3]:
#nation
api_us(year = "2022", dataset = "acs/acs1/profile?", geo_for = "us:*", area_type = "national", geo_id = "us", 
       base_col_names = base_col_names_new, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0002PE", "DP02_0002PM", #married
                "DP02_0004PE", "DP02_0004PM", #cohab
                "DP02_0006PE", "DP02_0006PM", #male hh
                "DP02_0010PE", "DP02_0010PM", #female hh    
                "DP02_0016E", "DP02_0016M", #AVG hh size   
                "DP02_0094PE", "DP02_0094PM", #foreign-born
                "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

api_us(year = "2021", dataset = "acs/acs1/profile?", geo_for = "us:*", area_type="national", geo_id = "us", 
       base_col_names = base_col_names_new, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0002PE", "DP02_0002PM", #married
                "DP02_0004PE", "DP02_0004PM", #cohab
                "DP02_0006PE", "DP02_0006PM", #male hh
                "DP02_0010PE", "DP02_0010PM", #female hh    
                "DP02_0016E", "DP02_0016M", #AVG hh size   
                "DP02_0094PE", "DP02_0094PM", #foreign-born   
                "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

#1-year data are unavailable for 2020
api_us(year = "2020", dataset = "acs/acs5/profile?", geo_for = "us:*", area_type="national", geo_id = "us", 
       base_col_names = base_col_names_new, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0002PE", "DP02_0002PM", #married
                "DP02_0004PE", "DP02_0004PM", #cohab
                "DP02_0006PE", "DP02_0006PM", #male hh
                "DP02_0010PE", "DP02_0010PM", #female hh    
                "DP02_0016E", "DP02_0016M", #AVG hh size   
                "DP02_0094PE", "DP02_0094PM", #foreign-born
                "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

api_us(year = "2019", dataset = "acs/acs1/profile?", geo_for = "us:*", area_type="national", geo_id = "us", 
       base_col_names = base_col_names_new, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0002PE", "DP02_0002PM", #married
                "DP02_0004PE", "DP02_0004PM", #cohab
                "DP02_0006PE", "DP02_0006PM", #male hh
                "DP02_0010PE", "DP02_0010PM", #female hh    
                "DP02_0016E", "DP02_0016M", #AVG hh size   
                "DP02_0093PE", "DP02_0093PM", #foreign-born
                "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

api_us(year = "2018", dataset = "acs/acs1/profile?", geo_for = "us:*", area_type="national", geo_id = "us",  
        base_col_names = base_col_names_old, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0004PE", "DP02_0004PM", #married   
                "DP02_0015E", "DP02_0015M", #AVG hh size   
                "DP02_0092PE", "DP02_0092PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

api_us(year = "2017", dataset = "acs/acs1/profile?", geo_for = "us:*", area_type="national", geo_id = "us",
               base_col_names = base_col_names_old, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0004PE", "DP02_0004PM", #married  
                "DP02_0015E", "DP02_0015M", #AVG hh size   
                "DP02_0092PE", "DP02_0092PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

api_us(year = "2016", dataset = "acs/acs1/profile?", geo_for = "us:*", area_type="national", geo_id = "us",
               base_col_names = base_col_names_old, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0004PE", "DP02_0004PM", #married  
                "DP02_0015E", "DP02_0015M", #AVG hh size   
                "DP02_0092PE", "DP02_0092PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

api_us(year = "2015", dataset = "acs/acs1/profile?", geo_for = "us:*", area_type="national", geo_id = "us", 
               base_col_names = base_col_names_old, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0004PE", "DP02_0004PM", #married 
                "DP02_0015E", "DP02_0015M", #AVG hh size   
                "DP02_0092PE", "DP02_0092PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

In [4]:
#This is the function for the region and division

def api_rd(year, dataset, geo_for, area_type, geo_id, base_col_names, get_vars):
    global general_measures

    # Build base URL
    HOST = "https://api.census.gov/data"
    base_url = "/".join([HOST, year, dataset])

    # Specify Census variables and other predicates
    predicates = {}
    predicates["get"] = ",".join(get_vars)
    predicates["for"] = geo_for

    # Execute the request, examine text of response object
    r = requests.get(base_url, params=predicates)

    # Construct the DataFrame
    col_names = base_col_names + ['GEO_ID','fips']
    df = pd.DataFrame(columns = col_names, data = r.json()[1:])
                
    # Create additional variables for file
    df["year"] = year
    df["area_type"] = area_type
    df["state_fips"] = ""
    df["county_fips"] = ""
    df["tract_fips"] = ""
    df["GEO_ID"] = df[geo_id]  
    df.drop('fips', axis=1, inplace=True)
   
    #append data
    general_measures = pd.concat([general_measures,df], ignore_index=True)

In [5]:
#region
api_rd(year = "2022", dataset = "acs/acs1/profile?", geo_for = "region:*", area_type="region", geo_id = ["fips"], 
       base_col_names = base_col_names_new, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0002PE", "DP02_0002PM", #married
                "DP02_0004PE", "DP02_0004PM", #cohab
                "DP02_0006PE", "DP02_0006PM", #male hh
                "DP02_0010PE", "DP02_0010PM", #female hh    
                "DP02_0016E", "DP02_0016M", #AVG hh size   
                "DP02_0094PE", "DP02_0094PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

api_rd(year = "2021", dataset = "acs/acs1/profile?", geo_for = "region:*", area_type="region", geo_id = ["fips"], 
       base_col_names = base_col_names_new, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0002PE", "DP02_0002PM", #married
                "DP02_0004PE", "DP02_0004PM", #cohab
                "DP02_0006PE", "DP02_0006PM", #male hh
                "DP02_0010PE", "DP02_0010PM", #female hh    
                "DP02_0016E", "DP02_0016M", #AVG hh size   
                "DP02_0094PE", "DP02_0094PM", #foreign-born
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

#1-year data are unavailable for 2020
api_rd(year = "2020", dataset = "acs/acs5/profile?", geo_for = "region:*", area_type="region", geo_id = ["fips"], 
       base_col_names = base_col_names_new, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0002PE", "DP02_0002PM", #married
                "DP02_0004PE", "DP02_0004PM", #cohab
                "DP02_0006PE", "DP02_0006PM", #male hh
                "DP02_0010PE", "DP02_0010PM", #female hh    
                "DP02_0016E", "DP02_0016M", #AVG hh size   
                "DP02_0094PE", "DP02_0094PM", #foreign-born
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

api_rd(year = "2019", dataset = "acs/acs1/profile?", geo_for = "region:*", area_type="region", geo_id = ["fips"], 
       base_col_names = base_col_names_new, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0002PE", "DP02_0002PM", #married
                "DP02_0004PE", "DP02_0004PM", #cohab
                "DP02_0006PE", "DP02_0006PM", #male hh
                "DP02_0010PE", "DP02_0010PM", #female hh    
                "DP02_0016E", "DP02_0016M", #AVG hh size   
                "DP02_0093PE", "DP02_0093PM", #foreign-born
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

api_rd(year = "2018", dataset = "acs/acs1/profile?", geo_for = "region:*", area_type="region", geo_id = ["fips"],  
        base_col_names = base_col_names_old, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0004PE", "DP02_0004PM", #married  
                "DP02_0015E", "DP02_0015M", #AVG hh size   
                "DP02_0092PE", "DP02_0092PM", #foreign-born
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

api_rd(year = "2017", dataset = "acs/acs1/profile?", geo_for = "region:*", area_type="region", geo_id = ["fips"],
               base_col_names = base_col_names_old, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0004PE", "DP02_0004PM", #married  
                "DP02_0015E", "DP02_0015M", #AVG hh size   
                "DP02_0092PE", "DP02_0092PM", #foreign-born
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone   
                "GEO_ID"])

api_rd(year = "2016", dataset = "acs/acs1/profile?", geo_for = "region:*", area_type="region", geo_id = ["fips"],
               base_col_names = base_col_names_old, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0004PE", "DP02_0004PM", #married   
                "DP02_0015E", "DP02_0015M", #AVG hh size   
                "DP02_0092PE", "DP02_0092PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

api_rd(year = "2015", dataset = "acs/acs1/profile?", geo_for = "region:*", area_type="region", geo_id = ["fips"], 
               base_col_names = base_col_names_old, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0004PE", "DP02_0004PM", #married   
                "DP02_0015E", "DP02_0015M", #AVG hh size   
                "DP02_0092PE", "DP02_0092PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

In [6]:
#division
api_rd(year = "2022", dataset = "acs/acs1/profile?", geo_for = "division:*", area_type="division", geo_id = ["fips"],  
       base_col_names = base_col_names_new, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0002PE", "DP02_0002PM", #married
                "DP02_0004PE", "DP02_0004PM", #cohab
                "DP02_0006PE", "DP02_0006PM", #male hh
                "DP02_0010PE", "DP02_0010PM", #female hh    
                "DP02_0016E", "DP02_0016M", #AVG hh size   
                "DP02_0094PE", "DP02_0094PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

api_rd(year = "2021", dataset = "acs/acs1/profile?", geo_for = "division:*", area_type="division", geo_id = ["fips"],  
       base_col_names = base_col_names_new, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0002PE", "DP02_0002PM", #married
                "DP02_0004PE", "DP02_0004PM", #cohab
                "DP02_0006PE", "DP02_0006PM", #male hh
                "DP02_0010PE", "DP02_0010PM", #female hh    
                "DP02_0016E", "DP02_0016M", #AVG hh size   
                "DP02_0094PE", "DP02_0094PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

#1-year data are unavailable for 2020
api_rd(year = "2020", dataset = "acs/acs5/profile?", geo_for = "division:*", area_type="division", geo_id = ["fips"],  
       base_col_names = base_col_names_new, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0002PE", "DP02_0002PM", #married
                "DP02_0004PE", "DP02_0004PM", #cohab
                "DP02_0006PE", "DP02_0006PM", #male hh
                "DP02_0010PE", "DP02_0010PM", #female hh    
                "DP02_0016E", "DP02_0016M", #AVG hh size   
                "DP02_0094PE", "DP02_0094PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

api_rd(year = "2019", dataset = "acs/acs1/profile?", geo_for = "division:*", area_type="division", geo_id = ["fips"], 
       base_col_names = base_col_names_new, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0002PE", "DP02_0002PM", #married
                "DP02_0004PE", "DP02_0004PM", #cohab
                "DP02_0006PE", "DP02_0006PM", #male hh
                "DP02_0010PE", "DP02_0010PM", #female hh    
                "DP02_0016E", "DP02_0016M", #AVG hh size   
                "DP02_0093PE", "DP02_0093PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

api_rd(year = "2018", dataset = "acs/acs1/profile?", geo_for = "division:*", area_type="division", geo_id = ["fips"],   
        base_col_names = base_col_names_old, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0004PE", "DP02_0004PM", #married  
                "DP02_0015E", "DP02_0015M", #AVG hh size   
                "DP02_0092PE", "DP02_0092PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

api_rd(year = "2017", dataset = "acs/acs1/profile?", geo_for = "division:*", area_type="division", geo_id = ["fips"], 
               base_col_names = base_col_names_old, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0004PE", "DP02_0004PM", #married  
                "DP02_0015E", "DP02_0015M", #AVG hh size   
                "DP02_0092PE", "DP02_0092PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

api_rd(year = "2016", dataset = "acs/acs1/profile?", geo_for = "division:*", area_type="division", geo_id = ["fips"],
               base_col_names = base_col_names_old, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0004PE", "DP02_0004PM", #married   
                "DP02_0015E", "DP02_0015M", #AVG hh size   
                "DP02_0092PE", "DP02_0092PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

api_rd(year = "2015", dataset = "acs/acs1/profile?", geo_for = "division:*", area_type="division", geo_id = ["fips"], 
               base_col_names = base_col_names_old, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0004PE", "DP02_0004PM", #married  
                "DP02_0015E", "DP02_0015M", #AVG hh size   
                "DP02_0092PE", "DP02_0092PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

In [7]:
#This is the function for the states

def api_st(year, dataset, base_col_names, get_vars):
    global general_measures

    # Build base URL
    HOST = "https://api.census.gov/data"
    base_url = "/".join([HOST, year, dataset])

    # Specify Census variables and other predicates
    predicates = {}
    predicates["get"] = ",".join(get_vars)
    predicates["for"] = "state:*"

    # Execute the request, examine text of response object
    r = requests.get(base_url, params=predicates)

    # Construct the DataFrame
    col_names = base_col_names + ['fips', 'GEO_ID']
    df = pd.DataFrame(columns = col_names, data = r.json()[1:])
                
    # Create additional variables for file
    df["year"] = year
    df["area_type"] = "state"
    df["state_fips"] = df['fips'].str[9:11]
    df["county_fips"] = ""
    df["tract_fips"] = ""
    df["GEO_ID"] = df['fips'].str[9:11]
    df.drop('fips', axis=1, inplace=True)
    
    #append data
    general_measures = pd.concat([general_measures,df], ignore_index=True)
   

In [8]:
#state
api_st(year = "2022", dataset = "acs/acs1/profile?", 
       base_col_names = base_col_names_new, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0002PE", "DP02_0002PM", #married
                "DP02_0004PE", "DP02_0004PM", #cohab
                "DP02_0006PE", "DP02_0006PM", #male hh
                "DP02_0010PE", "DP02_0010PM", #female hh    
                "DP02_0016E", "DP02_0016M", #AVG hh size   
                "DP02_0094PE", "DP02_0094PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

api_st(year = "2021", dataset = "acs/acs1/profile?", 
       base_col_names = base_col_names_new, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0002PE", "DP02_0002PM", #married
                "DP02_0004PE", "DP02_0004PM", #cohab
                "DP02_0006PE", "DP02_0006PM", #male hh
                "DP02_0010PE", "DP02_0010PM", #female hh    
                "DP02_0016E", "DP02_0016M", #AVG hh size   
                "DP02_0094PE", "DP02_0094PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

#1-year data are unavailable for 2020
api_st(year = "2020", dataset = "acs/acs5/profile?", 
       base_col_names = base_col_names_new, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0002PE", "DP02_0002PM", #married
                "DP02_0004PE", "DP02_0004PM", #cohab
                "DP02_0006PE", "DP02_0006PM", #male hh
                "DP02_0010PE", "DP02_0010PM", #female hh    
                "DP02_0016E", "DP02_0016M", #AVG hh size   
                "DP02_0094PE", "DP02_0094PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

api_st(year = "2019", dataset = "acs/acs1/profile?", 
       base_col_names = base_col_names_new, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0002PE", "DP02_0002PM", #married
                "DP02_0004PE", "DP02_0004PM", #cohab
                "DP02_0006PE", "DP02_0006PM", #male hh
                "DP02_0010PE", "DP02_0010PM", #female hh    
                "DP02_0016E", "DP02_0016M", #AVG hh size   
                "DP02_0093PE", "DP02_0093PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

api_st(year = "2018", dataset = "acs/acs1/profile?",   
        base_col_names = base_col_names_old, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0004PE", "DP02_0004PM", #married  
                "DP02_0015E", "DP02_0015M", #AVG hh size   
                "DP02_0092PE", "DP02_0092PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

api_st(year = "2017", dataset = "acs/acs1/profile?", 
               base_col_names = base_col_names_old, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0004PE", "DP02_0004PM", #married   
                "DP02_0015E", "DP02_0015M", #AVG hh size   
                "DP02_0092PE", "DP02_0092PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

api_st(year = "2016", dataset = "acs/acs1/profile?",
               base_col_names = base_col_names_old, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0004PE", "DP02_0004PM", #married   
                "DP02_0015E", "DP02_0015M", #AVG hh size   
                "DP02_0092PE", "DP02_0092PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

api_st(year = "2015", dataset = "acs/acs1/profile?",
                base_col_names = base_col_names_old, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0004PE", "DP02_0004PM", #married   
                "DP02_0015E", "DP02_0015M", #AVG hh size   
                "DP02_0092PE", "DP02_0092PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

In [9]:
#This is the function for counties

def api_cty(year, dataset, base_col_names, get_vars):
    global general_measures

    # Build base URL
    HOST = "https://api.census.gov/data"
    base_url = "/".join([HOST, year, dataset])

    # Specify Census variables and other predicates
    predicates = {}
    predicates["get"] = ",".join(get_vars)
    predicates["for"] = "county:*"

    # Execute the request
    r = requests.get(base_url, params=predicates)

    # Construct the DataFrame
    col_names = base_col_names + ['GEO_ID','state_fips', 'county_fips']
    df = pd.DataFrame(columns = col_names, data = r.json()[1:])
                
    # Create additional variables for file
    df["year"] = year
    df["area_type"] = "county"
    df["tract_fips"] = ""
    df["GEO_ID"] = df['GEO_ID'].str[9:14]
   
    #append data
    general_measures = pd.concat([general_measures,df], ignore_index=True)
   

In [10]:
api_cty(year = "2022", dataset = "acs/acs5/profile?",
                base_col_names = base_col_names_new,  get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0002PE", "DP02_0002PM", #married
                "DP02_0004PE", "DP02_0004PM", #cohab
                "DP02_0006PE", "DP02_0006PM", #male hh
                "DP02_0010PE", "DP02_0010PM", #female hh    
                "DP02_0016E", "DP02_0016M", #AVG hh size   
                "DP02_0094PE", "DP02_0094PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

In [11]:
api_cty(year = "2021", dataset = "acs/acs5/profile?",
                base_col_names = base_col_names_new,  get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0002PE", "DP02_0002PM", #married
                "DP02_0004PE", "DP02_0004PM", #cohab
                "DP02_0006PE", "DP02_0006PM", #male hh
                "DP02_0010PE", "DP02_0010PM", #female hh    
                "DP02_0016E", "DP02_0016M", #AVG hh size   
                "DP02_0094PE", "DP02_0094PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

In [12]:
api_cty(year = "2020", dataset = "acs/acs5/profile?",
                base_col_names = base_col_names_new,  get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0002PE", "DP02_0002PM", #married
                "DP02_0004PE", "DP02_0004PM", #cohab
                "DP02_0006PE", "DP02_0006PM", #male hh
                "DP02_0010PE", "DP02_0010PM", #female hh    
                "DP02_0016E", "DP02_0016M", #AVG hh size   
                "DP02_0094PE", "DP02_0094PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

In [13]:
api_cty(year = "2019", dataset = "acs/acs5/profile?",
                base_col_names = base_col_names_new,  get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0002PE", "DP02_0002PM", #married
                "DP02_0004PE", "DP02_0004PM", #cohab
                "DP02_0006PE", "DP02_0006PM", #male hh
                "DP02_0010PE", "DP02_0010PM", #female hh    
                "DP02_0016E", "DP02_0016M", #AVG hh size   
                "DP02_0093PE", "DP02_0093PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

In [14]:
api_cty(year = "2018", dataset = "acs/acs5/profile?",
                base_col_names = base_col_names_old,  get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0004PE", "DP02_0004PM", #married
                "DP02_0015E", "DP02_0015M", #AVG hh size   
                "DP02_0092PE", "DP02_0092PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

In [15]:
api_cty(year = "2017", dataset = "acs/acs5/profile?",
                base_col_names = base_col_names_old,  get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0004PE", "DP02_0004PM", #married
                "DP02_0015E", "DP02_0015M", #AVG hh size   
                "DP02_0092PE", "DP02_0092PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

In [16]:
api_cty(year = "2016", dataset = "acs/acs5/profile?",
                base_col_names = base_col_names_old,  get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0004PE", "DP02_0004PM", #married  
                "DP02_0015E", "DP02_0015M", #AVG hh size   
                "DP02_0092PE", "DP02_0092PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

In [17]:
api_cty(year = "2015", dataset = "acs/acs5/profile?",
                base_col_names = base_col_names_old,  get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0004PE", "DP02_0004PM", #married    
                "DP02_0015E", "DP02_0015M", #AVG hh size   
                "DP02_0092PE", "DP02_0092PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

In [18]:
#This is the function for tribal areas (2019 - 2022)

def api_aian(year, dataset, base_col_names, get_vars):
    global general_measures

    # Build base URL
    HOST = "https://api.census.gov/data"
    base_url = "/".join([HOST, year, dataset])

    # Specify Census variables and other predicates
    predicates = {}
    predicates["get"] = ",".join(get_vars)
    predicates["for"] = "american indian area/alaska native area (reservation or statistical entity only):*"

    # Execute the request, examine text of response object
    r = requests.get(base_url, params=predicates)

    # Construct the DataFrame
    col_names = base_col_names + ['fips', 'GEO_ID']
    df = pd.DataFrame(columns = col_names, data = r.json()[1:])
                
    # Create additional variables for file
    df["year"] = year
    df["area_type"] = "tribal_area"
    df["state_fips"] = ""
    df["county_fips"] = ""
    df["tract_fips"] = ""
    df["GEO_ID"] = df['fips'].str[9:14]
    df.drop('fips', axis=1, inplace=True)
 
    #append data
    general_measures = pd.concat([general_measures,df], ignore_index=True)
   

In [19]:
api_aian(year = "2022", dataset = "acs/acs5/profile?",
                base_col_names = base_col_names_new,   get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0002PE", "DP02_0002PM", #married
                "DP02_0004PE", "DP02_0004PM", #cohab
                "DP02_0006PE", "DP02_0006PM", #male hh
                "DP02_0010PE", "DP02_0010PM", #female hh    
                "DP02_0016E", "DP02_0016M", #AVG hh size   
                "DP02_0094PE", "DP02_0094PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

In [20]:
api_aian(year = "2021", dataset = "acs/acs5/profile?",
                base_col_names = base_col_names_new, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0002PE", "DP02_0002PM", #married
                "DP02_0004PE", "DP02_0004PM", #cohab
                "DP02_0006PE", "DP02_0006PM", #male hh
                "DP02_0010PE", "DP02_0010PM", #female hh    
                "DP02_0016E", "DP02_0016M", #AVG hh size   
                "DP02_0094PE", "DP02_0094PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

In [21]:
api_aian(year = "2020", dataset = "acs/acs5/profile?",
                base_col_names = base_col_names_new, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0002PE", "DP02_0002PM", #married
                "DP02_0004PE", "DP02_0004PM", #cohab
                "DP02_0006PE", "DP02_0006PM", #male hh
                "DP02_0010PE", "DP02_0010PM", #female hh    
                "DP02_0016E", "DP02_0016M", #AVG hh size   
                "DP02_0094PE", "DP02_0094PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

In [22]:
api_aian(year = "2019", dataset = "acs/acs5/profile?",
                base_col_names = base_col_names_new, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0002PE", "DP02_0002PM", #married
                "DP02_0004PE", "DP02_0004PM", #cohab
                "DP02_0006PE", "DP02_0006PM", #male hh
                "DP02_0010PE", "DP02_0010PM", #female hh    
                "DP02_0016E", "DP02_0016M", #AVG hh size   
                "DP02_0093PE", "DP02_0093PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

In [23]:
#This is the function for tribal areas, (2015 - 2018)

def api_aian(year, dataset, base_col_names, get_vars):
    global general_measures

    # Build base URL
    HOST = "https://api.census.gov/data"
    base_url = "/".join([HOST, year, dataset])

    # Specify Census variables and other predicates
    predicates = {}
    predicates["get"] = ",".join(get_vars)
    predicates["for"] = "american indian area/alaska native area (reservation or statistical entity only):*"

    # Execute the request, examine text of response object
    r = requests.get(base_url, params=predicates)

    # Construct the DataFrame
    col_names = base_col_names + ['fips', 'GEO_ID','R']
    df = pd.DataFrame(columns = col_names, data = r.json()[1:])
                
    # Create additional variables for file
    df["year"] = year
    df["area_type"] = "tribal_area"
    df["state_fips"] = ""
    df["county_fips"] = ""
    df["tract_fips"] = ""
    df["GEO_ID"] = df['fips'].str[9:14]
    df.drop('fips', axis=1, inplace=True)
    df.drop('R', axis=1, inplace=True)
        
    #append data
    general_measures = pd.concat([general_measures,df], ignore_index=True)
   

In [24]:
#The file changes when you get to year 2018

api_aian(year = "2018", dataset = "acs/acs5/profile?",
                base_col_names = base_col_names_old, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0004PE", "DP02_0004PM", #married
                "DP02_0015E", "DP02_0015M", #AVG hh size   
                "DP02_0092PE", "DP02_0092PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

In [25]:
api_aian(year = "2017", dataset = "acs/acs5/profile?",
                base_col_names = base_col_names_old, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0004PE", "DP02_0004PM", #married  
                "DP02_0015E", "DP02_0015M", #AVG hh size   
                "DP02_0092PE", "DP02_0092PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

In [26]:
api_aian(year = "2016", dataset = "acs/acs5/profile?",
                base_col_names = base_col_names_old, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0004PE", "DP02_0004PM", #married 
                "DP02_0015E", "DP02_0015M", #AVG hh size   
                "DP02_0092PE", "DP02_0092PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

In [27]:
api_aian(year = "2015", dataset = "acs/acs5/profile?",
                base_col_names = base_col_names_old, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0004PE", "DP02_0004PM", #married    
                "DP02_0015E", "DP02_0015M", #AVG hh size   
                "DP02_0092PE", "DP02_0092PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

In [28]:
#This is the function for tracts

def api_tract(year, dataset, base_col_names, get_vars):
    global general_measures

    # Build base URL
    HOST = "https://api.census.gov/data"
    base_url = "/".join([HOST, year, dataset])

    # Specify Census variables and other predicates
    predicates = {}
    predicates["get"] = ",".join(get_vars)
    predicates["for"] = "tract:*"
    predicates["in"] = "state:01,02,04,05,06,08,09,10,11,12,13,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,44,45,46,47,48,49,50,51,53,54,55,56,72"

    # Execute the request, examine text of response object
    r = requests.get(base_url, params=predicates)

    # Construct the DataFrame
    col_names = base_col_names + ['GEO_ID','state_fips', 'county_fips', 'tract_fips']
    df = pd.DataFrame(columns = col_names, data = r.json()[1:])
                
    # Create additional variables for file
    df["year"] = year
    df["area_type"] = "tract"
    df["GEO_ID"] = df['GEO_ID'].str[9:20]
       
    #append data
    general_measures = pd.concat([general_measures,df], ignore_index=True)
   

In [29]:
api_tract(year = "2022", dataset = "acs/acs5/profile?",
                base_col_names = base_col_names_new, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0002PE", "DP02_0002PM", #married
                "DP02_0004PE", "DP02_0004PM", #cohab
                "DP02_0006PE", "DP02_0006PM", #male hh
                "DP02_0010PE", "DP02_0010PM", #female hh    
                "DP02_0016E", "DP02_0016M", #AVG hh size   
                "DP02_0094PE", "DP02_0094PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

In [30]:
api_tract(year = "2021", dataset = "acs/acs5/profile?",
                base_col_names = base_col_names_new, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0002PE", "DP02_0002PM", #married
                "DP02_0004PE", "DP02_0004PM", #cohab
                "DP02_0006PE", "DP02_0006PM", #male hh
                "DP02_0010PE", "DP02_0010PM", #female hh    
                "DP02_0016E", "DP02_0016M", #AVG hh size   
                "DP02_0094PE", "DP02_0094PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

In [31]:
api_tract(year = "2020", dataset = "acs/acs5/profile?",
                base_col_names = base_col_names_new, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0002PE", "DP02_0002PM", #married
                "DP02_0004PE", "DP02_0004PM", #cohab
                "DP02_0006PE", "DP02_0006PM", #male hh
                "DP02_0010PE", "DP02_0010PM", #female hh    
                "DP02_0016E", "DP02_0016M", #AVG hh size   
                "DP02_0094PE", "DP02_0094PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

In [32]:
api_tract(year = "2019", dataset = "acs/acs5/profile?",
                base_col_names = base_col_names_new, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0002PE", "DP02_0002PM", #married
                "DP02_0004PE", "DP02_0004PM", #cohab
                "DP02_0006PE", "DP02_0006PM", #male hh
                "DP02_0010PE", "DP02_0010PM", #female hh    
                "DP02_0016E", "DP02_0016M", #AVG hh size   
                "DP02_0093PE", "DP02_0093PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

In [33]:
api_tract(year = "2018", dataset = "acs/acs5/profile?",
                base_col_names = base_col_names_old, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0004PE", "DP02_0004PM", #married   
                "DP02_0015E", "DP02_0015M", #AVG hh size   
                "DP02_0092PE", "DP02_0092PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

In [34]:
api_tract(year = "2017", dataset = "acs/acs5/profile?",
                base_col_names = base_col_names_old, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0004PE", "DP02_0004PM", #married   
                "DP02_0015E", "DP02_0015M", #AVG hh size   
                "DP02_0092PE", "DP02_0092PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

In [35]:
api_tract(year = "2016", dataset = "acs/acs5/profile?",
                base_col_names = base_col_names_old, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0004PE", "DP02_0004PM", #married 
                "DP02_0015E", "DP02_0015M", #AVG hh size   
                "DP02_0092PE", "DP02_0092PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

In [36]:
api_tract(year = "2015", dataset = "acs/acs5/profile?",
                base_col_names = base_col_names_old, get_vars = 
       ["NAME", "DP02_0001E", "DP02_0001M", #total households
                "DP02_0004PE", "DP02_0004PM", #married   
                "DP02_0015E", "DP02_0015M", #AVG hh size   
                "DP02_0092PE", "DP02_0092PM", #foreign-born   
        "DP04_0115PE", "DP04_0115PM", #house affordability
                #"DP03_0019PE", "DP03_0019PM", #drove alone
                "GEO_ID"])

In [37]:
# Create CSV
general_measures.to_csv("/data/discover/Data/General/general_measures2.csv", header=True, index=False)