In [1]:
# Dependencies
from urllib.parse import urlencode, urlparse, parse_qsl
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census
from us import states
import gmaps
import gmaps.datasets
import gmaps.geojson_geometries
import json

# Census & gmaps API Keys
from config import (census_key, g_key)

# Configure gmaps
gmaps.configure(api_key=g_key)

In [2]:
# Read in the csv containing state centroid coordinates
centroids = pd.read_csv("resources/state_centroids.csv")


In [3]:
# Run Census Search to retrieve data on all states
# Note the addition of "B23025_005E" for unemployment count
year_list = [2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018]
census_sum_pd = pd.DataFrame()
for year in range(len(year_list)):
    year_request = year_list[year]
    c = Census(census_key, year=year_request)
    census_data =c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E",
                          "B17001_002E",
                          "B23025_005E"), {'for': 'state:*'})
    census_pd = pd.DataFrame(census_data)
    # Column Reordering
    census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median_Age",
                                      "B19013_001E": "Household_Income",
                                      "B19301_001E": "Per_Capita_Income",
                                      "B17001_002E": "Poverty Count",
                                      "B23025_005E": "Unemployment Count",
                                      "NAME": "State_Name", "state": "State"})

    # Add in Poverty Rate (Poverty Count / Population)
    census_pd["Poverty_Rate"] = 100 * \
    census_pd["Poverty Count"].astype(
        int) / census_pd["Population"].astype(int)

    # Add in Employment Rate (Employment Count / Population)
    census_pd["Unempl_Rate"] = 100 * \
    census_pd["Unemployment Count"].astype(
        int) / census_pd["Population"].astype(int)
    census_pd['Year'] = year_request
    print('Retrieval data for:', year_request)
    census_sum_pd = pd.concat([census_sum_pd, census_pd], axis=0)

Retrieval data for: 2011
Retrieval data for: 2012
Retrieval data for: 2013
Retrieval data for: 2014
Retrieval data for: 2015
Retrieval data for: 2016
Retrieval data for: 2017
Retrieval data for: 2018


In [4]:
# Merge the datasets using the sate columns
census_data = pd.merge(census_sum_pd, centroids, how="left", left_on="State_Name", right_on="State")
census_data.drop(['Poverty Count', 'Unemployment Count', 'State_x', 'State_y'], axis=1, inplace=True)

In [5]:
census_data.groupby(['State_Name', 'Year']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,Household_Income,Population,Median_Age,Per_Capita_Income,Poverty_Rate,Unempl_Rate,Latitude,Longitude
State_Name,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Alabama,2011,42934.0,4747424.0,37.7,23483.0,17.133186,4.540210,32.7794,-86.8287
Alabama,2012,43160.0,4777326.0,37.8,23587.0,17.631035,4.830694,32.7794,-86.8287
Alabama,2013,43253.0,4799277.0,38.1,23680.0,18.140878,5.040968,32.7794,-86.8287
Alabama,2014,43511.0,4817678.0,38.2,23936.0,18.467610,4.747432,32.7794,-86.8287
Alabama,2015,43623.0,4830620.0,38.4,24091.0,18.367415,4.287172,32.7794,-86.8287
...,...,...,...,...,...,...,...,...,...
Wyoming,2014,58252.0,575251.0,36.8,29381.0,11.346525,2.772703,42.9957,-107.5512
Wyoming,2015,58840.0,579679.0,36.8,29803.0,11.212240,2.587984,42.9957,-107.5512
Wyoming,2016,59143.0,583029.0,36.8,30139.0,11.279370,2.593010,42.9957,-107.5512
Wyoming,2017,60938.0,583200.0,37.0,31214.0,10.870713,2.479252,42.9957,-107.5512


In [13]:
#print(census_pd.corr())
# Save as a csv
# Note to avoid any issues later, use encoding="utf-8"
census_data_2011 = census_data[census_data['Year']==2011]
census_data_2011.to_csv("resources/states_2011.csv", encoding="utf-8", index=False)
census_data.to_csv("resources/states_total.csv", encoding="utf-8", index=False)

In [8]:
states_df = census_data[['Latitude', 'Longitude']]
weight = census_data['Unempl_Rate']
fig = gmaps.figure(map_type='HYBRID')
fig.add_layer(gmaps.heatmap_layer(states_df, weights=weight, dissipating=False, max_intensity=40, point_radius=2))
#states_layer = gmaps.symbol_layer(states_df, fill_color='green', stroke_color='blue', scale=2)
#fig.add_layer(states_layer)

In [9]:
# Convert poverty rate as a list
# Convert bank rate to list
poverty_rate = census_data["Poverty_Rate"].tolist()

In [10]:
# Create a map using state centroid coordinates to set markers
marker_locations = census_data[['Latitude', 'Longitude']]

# Create a marker_layer using the poverty list to fill the info box
fig = gmaps.figure()
markers = gmaps.marker_layer(marker_locations,
    info_box_content=[f"Poverty Rate: {rate}" for rate in poverty_rate])
fig.add_layer(markers)
fig

Figure(layout=FigureLayout(height='420px'))

In [11]:
year='2010'
dsource='zbp'
cols='ESTAB,EMP,PAYANN'
state='17'
base_url = f'https://api.census.gov/data/{year}/{dsource}'
data_url = f'{base_url}?get={cols}&for=zipcode:*&ST={state}&key={census_key}'
response=requests.get(data_url)
illinois = pd.DataFrame(response.json())
illinois.columns = illinois.iloc[0]
illinois.drop(0, axis=0, inplace=True)
illinois.sort_values(by='zipcode')

Unnamed: 0,ESTAB,EMP,PAYANN,ST,zipcode
1,682,3022,83703,17,11419
2,55,416,11094,17,17853
3,12,0,0,17,60001
4,586,4429,142143,17,60002
5,1906,19854,1067994,17,60010
...,...,...,...,...,...
1483,7,140,4694,17,62998
1484,16,165,3102,17,62999
1485,1047,17392,698924,17,76180
1486,105,1503,37804,17,76182
