In [1]:
#dependencies
#for consistency with the course I am keeping all api keys in one file named config.py
import requests
from config import census_api_key
import pandas as pd

### Census Api basic info
Here is the link to the American Community Survey API for 2011-2017
    - https://www.census.gov/data/developers/data-sets/acs-1year.html

Chart with all the types of variables you can look up
    - https://api.census.gov/data/2017/acs/acs1.html
Variables - breaking down the data by sex/age/ethnicity
    - https://api.census.gov/data/2017/acs1/variables.html
Geographies - breaking down the data by different geographic areas/sizes
    - https://api.census.gov/data/2017/acs/acs1/geography.html


In [2]:
#Census Data Notes
#base API structure
    #base url + year + data set + data type (variable) + geographic area + API key
    #if you want to show all of a variable type <*> instead of a number/code
        #example - to get information for all states you would type <in=state:*>
base_url = "https://api.census.gov/data"
years = [2012,2013,2014,2015]
data_set = "acs5" #American Community Survey
#state codes for census
#https://www.census.gov/govs/www/gidstate.html
    #Illinois = 17

#Variable types (aka age/sex/ethnicity) for 1-year American Community Surveys
#api.census.gov/data/2013/acs1/variables.html

data_type = "B01003_001E" #total population
#data_type_error = "B01003_001M" #population margin of error

population_df=pd.DataFrame()

Notes: 
The Chicago crime database has data from 2001-present, American Community Surveys contain data up to 2017.
We originally were looking at data from 2011-2017, but data from 2011 and earlier has different community areas than 2012 on. Additionally the data is not available yet when we try to search for 2016 and 2017, so we are looking at data from 2012-2015.

In [3]:
#Chicago has 77 Community Areas
#state public use micro area has combined community areas
#example from census website
#https://api.census.gov/data/2013/acs1?get=NAME,B01001_001E&for=public%20use%20microdata%20area:*&in=state:*&key=YOUR_KEY_GOES_HERE

geographic_area = "public%20use%20microdata%20area"

In [4]:
#loop through years
for year in years:
    try:
        #build query url
        query_url = f"{base_url}/{year}/{data_set}?get=NAME,{data_type}&for={geographic_area}:*&in=state:17&key={census_api_key}"
        #make request
        res = requests.get(query_url)
        res = res.json()
        #change response into dataframe
        df = pd.DataFrame.from_records(res)
        #add year column to datafram
        df["year"] = year
        #add to population_df
        population_df = population_df.append(df, sort="True")
    except ValueError:
        print(f"Error {year}")

In [5]:
#From try/except errors and df
    #Errors with years 2016,2017
    #different data sorting 2011
#Only using years 2012-2015

In [6]:
population_df.head()

Unnamed: 0,0,1,2,3,year
0,NAME,B01003_001E,state,public use microdata area,2012
1,"Jo Daviess, Carroll, Whiteside & Lee Counties ...",132179,17,00104,2012
2,"Rock Island County PUMA, Illinois",147504,17,00105,2012
3,"Henry, Mercer, Henderson, Warren, Hancock, McD...",180417,17,00202,2012
4,"Adams, Pike, Brown, Schuyler & Mason Counties ...",112582,17,00300,2012


In [7]:
#groupby year
population_df[3].value_counts()

03411                        4
02601                        4
03420                        4
03503                        4
02700                        4
03207                        4
public use microdata area    4
03525                        4
00900                        4
03414                        4
01204                        4
03310                        4
03531                        4
00501                        4
03602                        4
03407                        4
01104                        4
03205                        4
01300                        4
00105                        4
03005                        4
00700                        4
01701                        4
02100                        4
00401                        4
03413                        4
03412                        4
03204                        4
01001                        4
03524                        4
                            ..
03208                        4
03202   

In [8]:
population_df.columns

Index([0, 1, 2, 3, 'year'], dtype='object')