In [1]:
#!pip install census

Collecting census
  Downloading census-0.8.15-py2.py3-none-any.whl (10 kB)
Collecting future
  Downloading future-0.18.2.tar.gz (829 kB)
Building wheels for collected packages: future
  Building wheel for future (setup.py): started
  Building wheel for future (setup.py): finished with status 'done'
  Created wheel for future: filename=future-0.18.2-py3-none-any.whl size=491062 sha256=4e13ee24d092d94ef9ea3f2102f31bb04d3a87383934d7bf65a514899256b133
  Stored in directory: c:\users\greg\appdata\local\pip\cache\wheels\6e\9c\ed\4499c9865ac1002697793e0ae05ba6be33553d098f3347fb94
Successfully built future
Installing collected packages: future, census
Successfully installed census-0.8.15 future-0.18.2


In [1]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census

# Census API Key
from config import api_key
c = Census(api_key, year=2013)

In [2]:
# Run Census Search to retrieve data on all zip codes (2013 ACS5 Census)
# See: https://github.com/CommerceDataService/census-wrapper for library documentation
# See: https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b for labels
census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E",
                          "B17001_002E"), {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "NAME": "Name", "zip code tabulation area": "Zipcode"})

# Add in Poverty Rate (Poverty Count / Population)
census_pd["Poverty Rate"] = 100 * \
    census_pd["Poverty Count"].astype(
        int) / census_pd["Population"].astype(int)

# Final DataFrame
# Re-ordering the columns:
census_pd = census_pd[["Zipcode", "Population", "Median Age", "Household Income",
                       "Per Capita Income", "Poverty Count", "Poverty Rate"]]

# Visualize
print(len(census_pd))
census_pd.head()

33120


Unnamed: 0,Zipcode,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate
0,8518,5217.0,41.5,74286.0,33963.0,170.0,3.258578
1,8520,27468.0,37.4,90293.0,37175.0,1834.0,6.67686
2,8525,4782.0,47.1,118656.0,59848.0,43.0,0.899205
3,8527,54867.0,42.2,88588.0,37021.0,2191.0,3.993293
4,8528,245.0,48.5,58676.0,49117.0,0.0,0.0


In [3]:
# Get the name of the columns in the dataframe:

census_pd.columns

Index(['Zipcode', 'Population', 'Median Age', 'Household Income',
       'Per Capita Income', 'Poverty Count', 'Poverty Rate'],
      dtype='object')

In [3]:
# Save as a csv
# Note to avoid any issues later, use encoding="utf-8"
census_pd.to_csv("census_data.csv", encoding="utf-8", index=False)

# index = False means it doesn't take the index with it when it saves the csv