## Census API - ACS Example

Using the American Community Survey to examine some demographic and economic trends at the U.S. county level.

List of variables from the 5-year ACS are found here: https://api.census.gov/data/2015/acs5/variables.html

In [1]:
import requests
import pandas as pd

import config
key = config.census_key

In [2]:
base = 'http://api.census.gov/data/'
years = ['2015']#['2009', '2012', '2015']
variables = {'NAME':'Name',
             'B01001_001E': 'Population total',
             'B25001_001E': 'Total Housing Units',
             'B25002_002E': 'Occupied Housing Units',
             'B25002_003E': 'Vacant Housing Units',
             'B19013_001E': 'Real Median Income',
             'B01002_001E': 'Median Age',
             'B27001_001E': 'Health Insurance Coverage'}
v = ','.join(variables.keys())
s = '*'

In [3]:
df = pd.DataFrame()
for y in years:
    url = '{}{}/acs5?get={}&for=county:*&in=state:{}&key={}'.format(base, y, v, s, key)
    r = requests.get(url).json()
    dft = pd.DataFrame(r[1:], columns=r[0])#[variables.keys()]
    dft['Year'] = y
    df = df.append(dft)

In [4]:
df = df.rename(columns=variables).set_index(['Name', 'Year']).sort_index(level='Name')
df['Household size'] = df['Population total'].astype(int) / df['Occupied Housing Units'].astype(int)
df['Vacant units per person'] = df['Vacant Housing Units'].astype(int) / df['Population total'].astype(int)
df['HIC Share'] = df['Health Insurance Coverage'].astype(int) / df['Population total'].astype(int)

In [5]:
df['FIPS'] = df['state'] + df['county']
df['FIPS'] = df['FIPS'].astype(int)
df['FIPS'] = df['FIPS'].map(lambda i: str(i).zfill(5))
df['FIPS'] = df['FIPS'].str.replace('46102', '46113')
df['Median Age'] = df['Median Age'].astype(float)

In [6]:
df['Population total'] = df['Population total'].astype(int)

In [19]:
df['Real Median Income'] = df['Real Median Income'].astype(float)

In [21]:
df['Real Median Income'].max()

123453.0

In [100]:
df.to_csv('ACS5_county_level.csv', encoding='utf-8')

### Map the results

Note: to make the example below work, you will first need to save [this](https://raw.githubusercontent.com/wrobstory/vincent_map_data/master/us_counties.topo.json) topo.json file in the same directory as the jupyter notebook.

In [23]:
import vincent
vincent.core.initialize_notebook()

geo_data = [{'name': 'counties',
             'url': 'us_counties.topo.json',
             'feature': 'us_counties.geo'},            
            {'name': 'states',
             'url': 'us_states.topo.json',
             'feature': 'us_states.geo'}
             ]

vis = vincent.Map(data=df, geo_data=geo_data, scale=1100,
                  projection='albersUsa', data_bind='Real Median Income',
                  data_key='FIPS', map_key={'counties': 'properties.FIPS'})

del vis.marks[1].properties.update
vis.marks[0].properties.enter.stroke.value = '#fff'
vis.marks[1].properties.enter.stroke.value = '#000000'
vis.scales['color'].domain = [0, 75000]
vis.legend(title='Real Median Income')
vis.to_json('vega.json')

vis.display()

In [102]:
col_names = ['Code', 'County', 'State', 'Title', 'Period', 'LF', 'Emp', 'U Level', 'U Rate']

In [103]:
df = pd.read_table('https://www.bls.gov/web/metro/laucntycur14.txt', 
                   header=None, names=col_names, skiprows=6, sep='|')

In [104]:
df = df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)

In [105]:
df = df[df['Period'] == df.dropna()['Period'].iloc[-1]]

In [106]:
df['FIPS'] = df['Code'].str[2:7]
df['FIPS'] = df['FIPS'].str.replace('46102', '46113')
df['FIPS'] = df['FIPS'].str.replace('02158', '02270')

In [107]:
df.to_csv('County_level_unemp.csv')

In [111]:
geo_data = [{'name': 'counties',
             'url': 'us_counties.topo.json',
             'feature': 'us_counties.geo'},            
            {'name': 'states',
             'url': 'us_states.topo.json',
             'feature': 'us_states.geo'}
             ]

vis = vincent.Map(data=df, geo_data=geo_data, scale=1100,
                  projection='albersUsa', data_bind='U Rate',
                  data_key='FIPS', map_key={'counties': 'properties.FIPS'})

del vis.marks[1].properties.update
vis.marks[0].properties.enter.stroke.value = '#fff'
vis.marks[1].properties.enter.stroke.value = '#000000'
vis.scales['color'].domain = [0, 10]
vis.legend(title='Unemployment Rate')
vis.to_json('vega.json')

vis.display()

In [None]:
import vincent
vincent.core.initialize_notebook()
geo_data = [{'name': 'counties',
             'url': 'https://raw.githubusercontent.com/wrobstory/vincent_map_data/master/us_counties.topo.json',
             'feature': 'us_counties.geo'},
            {'name': 'states',
             'url': 'https://raw.githubusercontent.com/wrobstory/vincent_map_data/master/us_states.topo.json',
             'feature': 'us_states.geo'}
             ]

vis = vincent.Map(geo_data=geo_data, scale=1000, projection='albersUsa')
#Get rid of State fill, customize stroke color
del vis.marks[1].properties.update
vis.marks[0].properties.update.fill.value = '#084081'
vis.marks[1].properties.enter.stroke.value = '#fff'
vis.marks[0].properties.enter.stroke.value = '#808080'
vis.to_json('vega.json')

vis.display()

In [None]:
counties_url = 'https://raw.githubusercontent.com/wrobstory/vincent_map_data/master/us_counties.topo.json'

In [None]:
import json
with open('us_counties.topo.json', 'r') as f:
    get_id = json.load(f)

In [None]:
for get_id['objects']['us_counties.geo']['geometries'][0]['properties']['FIPS']

In [None]:
geometries = get_id['objects']['us_counties.geo']['geometries']
county_codes = [x['properties']['FIPS'] for x in geometries]
county_df = pd.DataFrame({'FIPS': county_codes}, dtype=str)
county_df = county_df.astype(int)

In [None]:
import vincent
vincent.core.initialize_notebook()

geo_data = [{'name': 'counties',
             'url': 'us_counties.topo.json',
             'feature': 'us_counties.geo'}]

vis = vincent.Map(data=df, geo_data=geo_data, scale=1100,
                  projection='albersUsa', data_bind='Median Age',
                  data_key='FIPS', map_key={'counties': 'properties.FIPS'})

vis.scales['color'].domain = [20, 60]
vis.legend(title='Median Age')
vis.to_json('vega.json')

vis.display()

In [None]:
d = {}
for y in years:
    url = '{}{}/acs5?get={}&for=county:*&in=state:{}&key={}'.format(base, y, v, s, key)
    r = requests.get(url).json()
    df = pd.DataFrame(r[1:], columns=r[0]).set_index(['NAME',y])
    df['hh_size'] = df['B00001_001E'].astype(int) / df['B00002_001E'].astype(int)
    d[y] = df['hh_size']

In [None]:
pd.DataFrame(r[1:], columns=r[0])[variables.keys()]

In [None]:
r[1:][0]

In [None]:
d = {}
for y in years:
    url = '{}{}/acs5?get={}&for=county:*&in=state:{}&key={}'.format(base, y, v, s, key)
    r = requests.get(url).json()    

In [None]:
variables = {'NAME':'Name',
             'B00002_001E': 'Households',
             'B00001_001E': 'Population',
             'B19013_001E': 'Real Median Income',
             'B01002_001E': 'Median Age'}

In [None]:
v = ','.join(variables.keys())

In [None]:
df['year'] = y

In [None]:
variables

In [None]:
df.reset_index().set_index(['NAME', 'year']).to_dict()

In [None]:
base = 'http://api.census.gov/data/'
years = ['2009', '2012', '2015']
variables = 'NAME,B01002_001E'
state = '10'

In [None]:
d = {}
for y in years:
    url = '{}{}/acs5?get={}&for=county:*&in=state:{}&key={}'.format(base, y, variables, state, api_key)
    r = requests.get(url).json()
    df = pd.DataFrame(r[1:], columns=r[0]).set_index('NAME')
    d[y] = df['B01002_001E']

In [None]:
base = 'http://api.census.gov/data/'
years = ['2009', '2012', '2015']
variables = 'NAME,B19013_001E'
state = '10'

In [None]:
d = {}
for y in years:
    url = '{}{}/acs5?get={}&for=state:{}&key={}'.format(base, y, variables, state, api_key)
    r = requests.get(url).json()
    df = pd.DataFrame(r[1:], columns=r[0]).set_index('NAME')
    d[y] = df['B19013_001E']
    
d

In [None]:
df['B19013_001E']

In [None]:
y = '2015'
url = '{}{}/acs5?get={}&for=state:*&key={}'.format(base, y, variables, api_key)
r = requests.get(url).json()
df = pd.DataFrame(r[1:], columns=r[0]).set_index('NAME')

In [None]:
df.sort_values('B19013_001E')

### Working Example - Percent of Working Age Population Earning Less than X

In [None]:
base = 'http://api.census.gov/data/'
variables = 'NAME,B20004_002E'

In [None]:
y = '2015'
url = '{}{}/acs5?get={}&for=state:*&key={}'.format(base, y, variables, api_key)
r = requests.get(url).json()
df = pd.DataFrame(r[1:], columns=r[0]).set_index('NAME')

In [None]:
df