In [1]:
#dependencies
#for consistency with the course I am keeping all api keys in one file named config.py
import requests
from config import census_api_key
import pandas as pd

### Census Api basic info
Here is the link to the American Community Survey API for 2011-2017
    - https://www.census.gov/data/developers/data-sets/acs-1year.html

Chart with all the types of variables you can look up
    - https://api.census.gov/data/2017/acs/acs1.html
Variables - breaking down the data by sex/age/ethnicity
    - https://api.census.gov/data/2017/acs1/variables.html
Geographies - breaking down the data by different geographic areas/sizes
    - https://api.census.gov/data/2017/acs/acs1/geography.html


In [52]:
#Census Data Notes
#base API structure
    #base url + year + data set + data type (variable) + geographic area + API key
    #if you want to show all of a variable type <*> instead of a number/code
        #example - to get information for all states you would type <in=state:*>
base_url = "https://api.census.gov/data"
years = [2012,2013,2014,2015,2016,2017]
data_set = "acs1" #American Community Survey
#state codes for census
#https://www.census.gov/govs/www/gidstate.html
    #Illinois = 17

#Variable types (aka age/sex/ethnicity) for 1-year American Community Surveys
#api.census.gov/data/2013/acs1/variables.html

data_type = "B01003_001E" #total population unweighted
#B01003_001E = total population weighted
#data_type_error = "B01003_001M" #population margin of error

population_df=pd.DataFrame()

In [53]:
#Chicago has 77 Community Areas
#state public use micro area has combined community areas
#example from census website
#https://api.census.gov/data/2013/acs1?get=NAME,B01001_001E&for=public%20use%20microdata%20area:*&in=state:*&key=YOUR_KEY_GOES_HERE

geographic_area = "public%20use%20microdata%20area"

In [54]:
#loop through years
for year in years:
    try:
        #build query url
        query_url = f"{base_url}/{year}/{data_set}?get=NAME,{data_type}&for={geographic_area}:*&in=state:17&key={census_api_key}"
        #make request
        res = requests.get(query_url)
        res = res.json()
        #change response into dataframe
        df = pd.DataFrame.from_records(res)
        #add year column to datafram
        df["year"] = year
        #add to population_df
        population_df = population_df.append(df, sort="True")
    except ValueError:
        print(f"Error {year}")

Error 2016
Error 2017


In [55]:
#From try/except errors and df
    #Errors with years 2016,2017
    #different data sorting 2011

In [56]:
population_df.head()

Unnamed: 0,0,1,2,3,year
0,NAME,B01003_001E,state,public use microdata area,2012
1,"Jo Daviess, Carroll, Whiteside & Lee Counties ...",130443,17,00104,2012
2,"Rock Island County PUMA, Illinois",147457,17,00105,2012
3,"Henry, Mercer, Henderson, Warren, Hancock, McD...",179259,17,00202,2012
4,"Adams, Pike, Brown, Schuyler & Mason Counties ...",110822,17,00300,2012


In [57]:
#groupby year
population_df["year"].value_counts()

2015    89
2014    89
2013    89
2012    89
Name: year, dtype: int64

In [46]:
population_df.columns

Index([0, 1, 2, 3, 'year'], dtype='object')

Unnamed: 0,0,1,2,3
0,NAME,B01001_001E,state,public use microdata area
1,"Jo Daviess, Carroll, Whiteside & Lee Counties ...",129732,17,00104
2,"Rock Island County PUMA, Illinois",147258,17,00105
3,"Henry, Mercer, Henderson, Warren, Hancock, McD...",176975,17,00202
4,"Adams, Pike, Brown, Schuyler & Mason Counties ...",112212,17,00300
5,"Cass, Morgan, Scott, Greene, Macoupin, Jersey ...",142706,17,00401
6,"Montgomery, Bond, Clinton, Fayette & Effingham...",140504,17,00501
7,"Douglas, Edgar, Coles & Cumberland Counties PU...",101406,17,00600
8,"Clark, Jasper, Crawford, Lawrence, Richland, C...",110775,17,00700
9,"South & Southeast Illinois PUMA, Illinois",137470,17,00800
