#### Convert Census API to human readable dataframe

In [45]:
import pandas as pd
import numpy as np
import requests
from functools import lru_cache
import configparser
import os
from tqdm import tqdm
from multiprocessing import Pool

In [46]:
pd.options.mode.chained_assignment = None

In [47]:
def census_key():
    """
    Output: 
        Returns your Census API key
    """
    
    secret_path = os.path.expanduser("~") + "/.secrets" # .secrets should live in your home directory 
    config = configparser.RawConfigParser()
    config.read(secret_path)

    details_dict = dict(config.items("CENSUS_API"))
    airtable_key = details_dict["census_api_key"]

    return airtable_key

In [51]:
@lru_cache(maxsize=None)
def census_tract_api(year, profile, state_id, api_key = census_key()):
    """
    Input: 
        year = year of desired census api
        profile = data profile type desired from census api
        state_id = state desired from census api
        api_key = api key needed for census api

    Output: 
        Returns raw census api data as pandas dataframe
    """
    
    url = f"https://api.census.gov/data/{year}/acs/acs5/profile?get=group({profile})&for=tract:*&in=state:{state_id}&key={api_key}"

    # request url for provided variable code & year
    get_response = requests.get(url)

    # convert api return as into json
    json_return = get_response.json()

    # convert json to pandas data frame
    df_tract = pd.DataFrame(json_return)  
    
    return df_tract

In [52]:
@lru_cache(maxsize=None)
def census_var_names(var):
    """
    Input: 
        var = variable desired from census api

    Output: 
        Returns census api variable & variable names
    """

    url = f"https://api.census.gov/data/2020/acs/acs5/profile/variables/{var}.json"
    
    # request url for provided variable code
    get_response = requests.get(url) 

    # convert api return as into json
    json_return = get_response.json()

    # extract the value for "label" 
    var_label = json_return["label"]

    return var_label

def apply_var_cols(df):
    """
    Input: 
        df = raw dataframe returned from census api

    Output: 
        Returns dataframe with census variable names as column headers
    """
    
    # transpose first row containing all variable codes
    df_vars = df.iloc[:1, :].T.rename(columns={0:"variable"})

    # columns to keep
    col_keep = list(df_vars.variable[-5:])

    # iterate through each variable code and get the return api label
    var_labels = [ census_var_names(var) for var in df_vars.variable[:-5] ]

    # combine columns
    new_cols = var_labels + col_keep

    # apply column names
    df.columns = new_cols
    
    return df

In [53]:
# call census api using function

df_api_return = census_tract_api(2020, "DP02", "19", census_key())

In [16]:
# df_api_return.head()

In [17]:
# map variable names to column headers

df_census = apply_var_cols(df_api_return)

In [150]:
# df_census.head()

In [188]:
# create dataframe for census variables

df_variable_codes = df_census.iloc[:1, :-5].T.reset_index().rename({"index": "variable", 0: "code"}, axis = 1)

In [152]:
# df_variable_codes.head()

In [176]:
# expand dataframe to map every geo_id to a variable
# collect each geo_id dataframe as a list

geo_df_lst = []
df_census_variables = df_census.iloc[1:, :-4] # remove additional geo data

for geo_id in df_census_variables["GEO_ID"]:
    df_census_variables_filter = df_census_variables[df_census_variables["GEO_ID"] == geo_id] # filter for each geo_id; every row of the dataframe
    geo_dict = df_census_variables_filter.set_index("GEO_ID").agg(dict,1).to_dict() # create dictionary of geo_id and each of it's variables + values
    geo_dict_values = list(geo_dict.values())[0]
    initial_geo_df = pd.DataFrame({"variable_name": list(geo_dict_values.keys()), "value": list(geo_dict_values.values())}) # create temp dataframe of the geo's variables & values
    initial_geo_df["geo_id"] = list(geo_dict.keys())[0] # create column so every row has the corresponding geo_id
    initial_geo_df["value"] = initial_geo_df["value"].fillna(0) 
    geo_df_lst.append(initial_geo_df)

geo_df = pd.concat(geo_df_lst) # combine all dataframes in the list (similar to SQL union)

In [177]:
# geo_df.head()

In [178]:
# break up variable name into three components: the measurement type, demographic_target, demographic
# create a column for each component for df_variable_codes
measurement = []
demographic_target = []
demographic = []

for var in df_variable_codes["variable_name"]:
    var_split = var.split("!!")

    if len(var_split) == 0:
        continue

    col_measure = var_split[0]

    measurement.append(col_measure.lower()) # measurement value
    demographic_target.append(var_split[1].lower()) # demographic target
    
    if len(var_split[2:]) > 1:
        demographic.append(" ".join(map(str, var_split[2:])).lower()) # demographic
    else: 
        demographic.append(var_split[2].lower()) # demographic

df_variable_codes["measurement"] = measurement
df_variable_codes["demographic_target"] = demographic_target
df_variable_codes["demographic"] = demographic

In [179]:
df_variable_codes.head()

Unnamed: 0,variable_name,variable,measurement,demographic_target,demographic
0,Estimate!!HOUSEHOLDS BY TYPE!!Total households,DP02_0001E,estimate,households by type,total households
1,Annotation of Estimate!!HOUSEHOLDS BY TYPE!!To...,DP02_0001EA,annotation of estimate,households by type,total households
2,Margin of Error!!HOUSEHOLDS BY TYPE!!Total hou...,DP02_0001M,margin of error,households by type,total households
3,Annotation of Margin of Error!!HOUSEHOLDS BY T...,DP02_0001MA,annotation of margin of error,households by type,total households
4,Percent!!HOUSEHOLDS BY TYPE!!Total households,DP02_0001PE,percent,households by type,total households


In [180]:
# create data frame of remaining geo data
df_census_geo = df_census.iloc[:, -5:]

# # replace header with first row
df_census_geo.columns = [ col.lower() for col in df_census_geo.iloc[0] ] 

df_census_geo = df_census_geo[1:]

In [181]:
# df_census_geo.head()

In [182]:
# join geo_df, df_variable_codes, and df_census_geo

geo_df_final = geo_df.merge(df_variable_codes, how = "left", on = "variable_name").merge(df_census_geo, how = "left", on = "geo_id")

In [183]:
# re-order columns
geo_df_final = geo_df_final[["geo_id", "name", "state", "county", "tract", "variable", "variable_name", "measurement", "demographic_target", "demographic", "value"]]

In [184]:
geo_df_final

Unnamed: 0,geo_id,name,state,county,tract,variable,variable_name,measurement,demographic_target,demographic,value
0,1400000US19155031601,"Census Tract 316.01, Pottawattamie County, Iowa",19,155,031601,DP02_0001E,Estimate!!HOUSEHOLDS BY TYPE!!Total households,estimate,households by type,total households,1891
1,1400000US19155031601,"Census Tract 316.01, Pottawattamie County, Iowa",19,155,031601,DP02_0001EA,Annotation of Estimate!!HOUSEHOLDS BY TYPE!!To...,annotation of estimate,households by type,total households,0
2,1400000US19155031601,"Census Tract 316.01, Pottawattamie County, Iowa",19,155,031601,DP02_0001M,Margin of Error!!HOUSEHOLDS BY TYPE!!Total hou...,margin of error,households by type,total households,160
3,1400000US19155031601,"Census Tract 316.01, Pottawattamie County, Iowa",19,155,031601,DP02_0001MA,Annotation of Margin of Error!!HOUSEHOLDS BY T...,annotation of margin of error,households by type,total households,0
4,1400000US19155031601,"Census Tract 316.01, Pottawattamie County, Iowa",19,155,031601,DP02_0001PE,Percent!!HOUSEHOLDS BY TYPE!!Total households,percent,households by type,total households,1891
...,...,...,...,...,...,...,...,...,...,...,...
1103867,1400000US19155031300,"Census Tract 313, Pottawattamie County, Iowa",19,155,031300,DP02_0154MA,Annotation of Margin of Error!!COMPUTERS AND I...,annotation of margin of error,computers and internet use,total households with a broadband internet sub...,0
1103868,1400000US19155031300,"Census Tract 313, Pottawattamie County, Iowa",19,155,031300,DP02_0154PE,Percent!!COMPUTERS AND INTERNET USE!!Total hou...,percent,computers and internet use,total households with a broadband internet sub...,78.9
1103869,1400000US19155031300,"Census Tract 313, Pottawattamie County, Iowa",19,155,031300,DP02_0154PEA,Annotation of Percent!!COMPUTERS AND INTERNET ...,annotation of percent,computers and internet use,total households with a broadband internet sub...,0
1103870,1400000US19155031300,"Census Tract 313, Pottawattamie County, Iowa",19,155,031300,DP02_0154PM,Percent Margin of Error!!COMPUTERS AND INTERNE...,percent margin of error,computers and internet use,total households with a broadband internet sub...,8.8
