## Script to pull census data.

This notebook includes a simple script that will pull the requested variables form the 5-year American Community Survey at either the census tract or congressional district level.  The user must request a census api key and enter it in the line that says "my_api_key".  The user should then put the variable names from the ACS5 in the indicated line, seperated by commas.  The user may optionally put names in the indicated line, also seperated by commas.  Both should be surrounded by quotes (single/double quotes are both fine).  Then run the cells of this notebook

This is just a simple application of the library here: https://github.com/datamade/census

If you are missing the required libraries, put these in a new cell and run it:

* !pip install census
* !pip install us
* !pip install tqdm

In [12]:
from census import Census
from us import states
import requests
import pandas as pd
from tqdm import tqdm_notebook
from IPython.display import clear_output

my_api_key = "b141795955348002155a81415a728a61faab9746"# Put your api key here
try:
    census_query_object = Census(my_api_key)
except:
    print("Did you enter an api key surrounded by quotes?")



variables_of_interest = ('NAME', 'B19013_001E', 'B19013B_001E')
custom_names = ('default', 'default', 'default')


def grab_by_tracts(variables_list, names=custom_names, file_name = "tracts_data.csv", save=True):
    """This gathers the requested variables from the 5-year American Community 
    Survey at the census tract level.  Note, this will take longer then the 
    district data, perhaps a couple minutes."""

    stats = []
    census_query_object = Census(my_api_key)
    for x in tqdm_notebook(states.STATES):
        stats = stats + census_query_object.acs5.state_county_tract(
            tuple(variables_list), x.fips, Census.ALL, Census.ALL)
    df = pd.DataFrame(stats)
    df['State Name'] = df['state'].apply(lambda x: states.STATES_AND_TERRITORIES[int(x)-1].abbr)

    if names and type(names)==list:
        for i in range( min(len(names), df.shape[1]) ):
            if names[i]!="default" and names[i]!='':
                df.columns[i]=names[i]

#     df.to_csv(file_name)
    clear_output()
    display(df.head())
    return df

def grab_by_districts(variables_list, names=custom_names, file_name = "district_data.csv", save=True):
    """This gathers the requested variables from the 5-year American Community 
    Survey at the congressional district level."""
    
    stats = []
    census_query_object = Census(my_api_key)
    for x in tqdm_notebook(states.STATES, leave=False):
        stats = stats + census_query_object.acs5.state_district(
            tuple(variables_list), x.fips , Census.ALL)
    df = pd.DataFrame(stats)
    df['State Name'] = df['state'].apply(lambda x: states.STATES_AND_TERRITORIES[int(x)-1].abbr)
    
    if names and type(names)==list:
        for i in range( min(len(names), df.shape[1]) ):
            if names[i]!="default" and names[i]!='':
                df.columns[i]=names[i]

#     
    clear_output()
    


In [13]:
"""Run this cell to save district data."""

"""You may optionally indicate new variables and names by removing the hashmarks in front of the next two lines"""
# variables_of_interest = ('NAME', 'B19013_001E', 'B19013B_001E')
# custom_names = ('default', 'default', 'default')


grab_by_districts(variables_of_interest)

In [14]:
"""Run this cell to save census tract data.  Note, this will take longer then the district data, perhaps a couple minutes."""

"""You may optionally indicate new variables and names by removing the hashmarks in front of the next two lines"""
# variables_of_interest = ('NAME', 'B19013_001E', 'B19013B_001E')
# custom_names = ('default', 'default', 'default')

grab_by_tracts(variables_of_interest)

Unnamed: 0,B19013B_001E,B19013_001E,NAME,county,state,tract,State Name
0,26157.0,66000.0,"Census Tract 201, Autauga County, Alabama",1,1,20100,AL
1,30052.0,41107.0,"Census Tract 202, Autauga County, Alabama",1,1,20200,AL
2,42554.0,51250.0,"Census Tract 203, Autauga County, Alabama",1,1,20300,AL
3,39808.0,52704.0,"Census Tract 204, Autauga County, Alabama",1,1,20400,AL
4,41023.0,52463.0,"Census Tract 205, Autauga County, Alabama",1,1,20500,AL


Unnamed: 0,B19013B_001E,B19013_001E,NAME,county,state,tract,State Name
0,26157.0,66000.0,"Census Tract 201, Autauga County, Alabama",001,01,020100,AL
1,30052.0,41107.0,"Census Tract 202, Autauga County, Alabama",001,01,020200,AL
2,42554.0,51250.0,"Census Tract 203, Autauga County, Alabama",001,01,020300,AL
3,39808.0,52704.0,"Census Tract 204, Autauga County, Alabama",001,01,020400,AL
4,41023.0,52463.0,"Census Tract 205, Autauga County, Alabama",001,01,020500,AL
5,32609.0,63750.0,"Census Tract 206, Autauga County, Alabama",001,01,020600,AL
6,25600.0,45234.0,"Census Tract 207, Autauga County, Alabama",001,01,020700,AL
7,-666666666.0,74603.0,"Census Tract 208.01, Autauga County, Alabama",001,01,020801,AL
8,37772.0,61242.0,"Census Tract 208.02, Autauga County, Alabama",001,01,020802,AL
9,34470.0,44591.0,"Census Tract 209, Autauga County, Alabama",001,01,020900,AL


In [15]:

import pandas as pd
import numpy as np

d = 'https://api.census.gov/data/'
acs1_district_url=d+'{}/acs/acs1?get={},NAME&for=congressional%20district:*&in=state:11&key={}' #2015
acs5_district_url=d+'{}/acs/acs5?get={},NAME&for=congressional%20district:*&in=state:11&key={}' #2015
sf1_district_url=d+'{}/acs/acs1?get={},NAME&for=congressional%20district:*&in=state:11&key={}' #2010
acs5_tract_url=d+'{}/acs/acs5?get={}&for=tract:000100&in=state:11%20county:001&key={}' # 2015
sf1_tract_url=d+'{}/dec/sf1?get={}&for=tract:000100&in=state:11%20county:001&key={}' #2010
sf3_tract_url=d+'{}/sf3?get={}&for=tract:000100&in=state:11%20county:001&key={}' #2000

names = ['acs1_district', 'acs5_district', 'sf1_district', 'acs5_district', 'sf1_tract', 'sf3_tract']
urls=[acs1_district_url,acs5_district_url,sf1_district_url,
      acs5_tract_url,sf1_tract_url,sf3_tract_url]
years=[2015,2015,2010,2015,2010,2000]


dfq = pd.DataFrame(np.array([names, urls, years]).T, columns=['name', 'url', 'year'])
dfq



Unnamed: 0,name,url,year
0,acs1_district,https://api.census.gov/data/{}/acs/acs1?get={}...,2015
1,acs5_district,https://api.census.gov/data/{}/acs/acs5?get={}...,2015
2,sf1_district,https://api.census.gov/data/{}/acs/acs1?get={}...,2010
3,acs5_district,https://api.census.gov/data/{}/acs/acs5?get={}...,2015
4,sf1_tract,https://api.census.gov/data/{}/dec/sf1?get={}&...,2010
5,sf3_tract,https://api.census.gov/data/{}/sf3?get={}&for=...,2000


In [16]:
my_api_key = "b141795955348002155a81415a728a61faab9746"

import requests
def acs_check(field):
    valids = []
    
    for row in dfq.itertuples(): # This is a deep, shameful thing for me to use iterrows
        try:
            r = requests.get(row[2].format(str(row[3]),'NAME', my_api_key)).ok
            valids.append(r)
        except:
            valids.append(None)
    return valids
            
def acs_check_proto(field):
    for url, year in zip(urls, years):
        try:
            print(url.format(year, field, my_api_key))
        except:
            print('foobar')

In [17]:
variables = ["C23002A_004E","C23002A_004M","C23002B_004E","C23002B_004M",
             "C23002C_004E","C23002C_004M","C23002D_004E","C23002D_004M",
             "C23002E_004E","C23002E_004M","C23002F_004E","C23002F_004M",
             "C23002G_004E","C23002G_004M","C23002H_004E","C23002H_004M",
             "C23002I_004E","C23002I_004M","C23002A_001E","C23002A_001M",
             "C23002B_001E","C23002B_001M","C23002C_001E","C23002C_001M",
             "C23002D_001E","C23002D_001M","C23002E_001E","C23002E_001M",
             "C23002F_001E","C23002F_001M","C23002G_001E","C23002G_001M",
             "C23002H_001E","C23002H_001M","C23002I_001E","C23002I_001M"]

In [18]:
from tqdm import tqdm_notebook
import pandas as pd
import requests

valid = []
for field in tqdm_notebook(variables[:]):
    valid.append(list(acs_check(field)))
    
pd.DataFrame(valid, columns=dfq['name'].values)




Unnamed: 0,acs1_district,acs5_district,sf1_district,acs5_district.1,sf1_tract,sf3_tract
0,True,True,True,True,True,True
1,True,True,True,True,True,True
2,True,True,True,True,True,True
3,True,True,True,True,True,True
4,True,True,True,True,True,True
5,True,True,True,True,True,True
6,True,True,True,True,True,True
7,True,True,True,True,True,True
8,True,True,True,True,True,True
9,True,True,True,True,True,True
