# API Census Data
This code will pull data from the US Census Bureau website using API calls

In [1]:
# Import appropirate dependencies
import pandas as pd
from census import Census

# Census & gmaps API Keys
from config import (census_api_key)
c = Census(census_api_key, year=2017)

### Collect the data

In [2]:
# Collect State income data: this is an API function call.
# The extracted data comes from U.S. Census Bureau, 2013-2017 American Community Survey 5-Year Estimates.

income_and_pop_data_states = c.acs5.get(("NAME", 
                                         'B19013_001E', 
                                         'B19301_001E', 
                                         'B01003_001E', 
                                         "B23025_002E", 
                                         "B23025_007E"), {'for': 'state:*'})



### Clean the data

In [3]:
# Change the header names from census codes to english words.
income_and_pop_data_states = pd.DataFrame(income_and_pop_data_states)
income_and_pop_data_states = income_and_pop_data_states.rename(columns={
                                      "B19013_001E": "Median Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B01003_001E": "Total Population",
                                      "B23025_002E": "Employment labor force",
                                      "B23025_007E": "Employment not labor force",
                                      "NAME": "State"})

# Drop unneeded data, set the index to 'State', order the columns
income_and_pop_data_states = income_and_pop_data_states[['State', 
                                                         'Total Population', 
                                                         'Median Household Income', 
                                                         'Per Capita Income', 
                                                         "Employment labor force", 
                                                         "Employment not labor force"]]
income_and_pop_data_states = income_and_pop_data_states.set_index('State')

# Format population and income values
income_and_pop_data_states = income_and_pop_data_states.astype({"Total Population": int, "Employment labor force": int, "Employment not labor force": int})
income_and_pop_data_states['Median Household Income'] = income_and_pop_data_states['Median Household Income'].map('${:,.2f}'.format)
income_and_pop_data_states['Per Capita Income'] = income_and_pop_data_states['Per Capita Income'].map('${:,.2f}'.format)

income_and_pop_data_states.head()


Unnamed: 0_level_0,Total Population,Median Household Income,Per Capita Income,Employment labor force,Employment not labor force
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Puerto Rico,3468963,"$19,775.00","$12,081.00",1262220,1574064
Alabama,4850771,"$46,472.00","$25,746.00",2233209,1642927
Alaska,738565,"$76,114.00","$35,065.00",400622,170831
Arizona,6809946,"$53,510.00","$27,964.00",3197116,2174225
Arkansas,2977944,"$43,813.00","$24,426.00",1366102,984959


## Create a dictionary from the dataframe

In [4]:
# Make a dictionary with Names as keys
data_dictionary = income_and_pop_data_states.to_dict('index')

### Load the data into a MongoDB database

In [5]:
import pymongo

# Establish connection with Mongo
conn = "mongodb://chuck:chuck0223@ds349175.mlab.com:49175/heroku_gl1v7mxq"
client = pymongo.MongoClient(conn)

In [6]:
# Define the database
db = client.heroku_gl1v7mxq

In [7]:
# Insert a document into the 'students' collection
db.state_census.insert_one(data_dictionary)

<pymongo.results.InsertOneResult at 0x116bbafc8>