# Compiling census data 

This script downloads 2016 ACS census data into a `.csv` file. 


In [1]:
import pandas as pd
import requests
from census import Census
from us import states

In [2]:
# To reproduce the data below, you'll need to save your 
# Census API key to `../data/census-api-key.txt`.
# You can obtain a key here: https://api.census.gov/data/key_signup.html
api_key = open("../data/census-api-key.txt").read().strip()
c = Census(api_key)

The counties of the New York-Newark-Jersey City, NY-NJ-PA Metropolitan Statistical Area, sourced [from here](https://www.bea.gov/regional/docs/msalist.cfm?mlist=45):

* 34003 — "Bergen, NJ"
* 34013 — "Essex, NJ"
* 34017 — "Hudson, NJ"
* 34019 — "Hunterdon, NJ"
* 34023 — "Middlesex, NJ"
* 34025 — "Monmouth, NJ"
* 34027 — "Morris, NJ"
* 34029 — "Ocean, NJ"
* 34031 — "Passaic, NJ"
* 34035 — "Somerset, NJ"
* 34037 — "Sussex, NJ"
* 34039 — "Union, NJ"
* 36005 — "Bronx, NY"
* 36027 — "Dutchess, NY"
* 36047 — "Kings, NY"
* 36059 — "Nassau, NY"
* 36061 — "New York, NY"
* 36071 — "Orange, NY"
* 36079 — "Putnam, NY"
* 36081 — "Queens, NY"
* 36085 — "Richmond, NY"
* 36087 — "Rockland, NY"
* 36103 — "Suffolk, NY"
* 36119 — "Westchester, NY"
* 42103 — "Pike, PA"


In [3]:
nyc_met_area = [
    {"state_code":"34", "county_code": "003", "county_name": "Bergen, NJ"},
    {"state_code":"34", "county_code": "013", "county_name": "Essex, NJ"},
    {"state_code":"34", "county_code": "017", "county_name": "Hudson, NJ"},
    {"state_code":"34", "county_code": "019", "county_name": "Hunterdon, NJ"},
    {"state_code":"34", "county_code": "023", "county_name": "Middlesex, NJ"},
    {"state_code":"34", "county_code": "025", "county_name": "Monmouth, NJ"},
    {"state_code":"34", "county_code": "027", "county_name": "Morris, NJ"},
    {"state_code":"34", "county_code": "029", "county_name": "Ocean, NJ"},
    {"state_code":"34", "county_code": "031", "county_name": "Passaic, NJ"},
    {"state_code":"34", "county_code": "035", "county_name": "Somerset, NJ"},
    {"state_code":"34", "county_code": "037", "county_name": "Sussex, NJ"},
    {"state_code":"34", "county_code": "039", "county_name": "Union, NJ"},
    {"state_code":"36", "county_code": "005", "county_name": "Bronx, NY"},
    {"state_code":"36", "county_code": "027", "county_name": "Dutchess, NY"},
    {"state_code":"36", "county_code": "047", "county_name": "Kings, NY"},
    {"state_code":"36", "county_code": "059", "county_name": "Nassau, NY"},
    {"state_code":"36", "county_code": "061", "county_name": "New York, NY"},
    {"state_code":"36", "county_code": "071", "county_name": "Orange, NY"},
    {"state_code":"36", "county_code": "079", "county_name": "Putnam, NY"},
    {"state_code":"36", "county_code": "081", "county_name": "Queens, NY"},
    {"state_code":"36", "county_code": "085", "county_name": "Richmond, NY"},
    {"state_code":"36", "county_code": "087", "county_name": "Rockland, NY"},
    {"state_code":"36", "county_code": "103", "county_name": "Suffolk, NY"},
    {"state_code":"36", "county_code": "119", "county_name": "Westchester, NY"},
    {"state_code":"42", "county_code": "103", "county_name": "Pike, PA"}  
]

In [4]:
# Full API variable list available here https://api.census.gov/data/2016/acs/acs5/variables/
categories = [
     'NAME', # county name
     'B01001_001E', # Total population
     'B19013_001E', # Median income
     'B25077_001E', # Median home value
     'B15011_001E', # Total population age 25+ years with a bachelor's degree or higher
     'B03002_003E', # Not Hispanic or Latino!!White alone
     'B03002_004E', # Not Hispanic or Latino!!Black or African American alone
     'B02001_004E', # American Indian and Alaska Native Alone
     'B03002_006E', # Not Hispanic or Latino!!Asian alone
     'B03002_007E', # Not Hispanic or Latino!!Native Hawaiian and Other Pacific Islander alone
     'B03002_008E', # Not Hispanic or Latino!!Some other race alone
     'B03002_009E', # Not Hispanic or Latino!!Two or more races
     'B03002_012E', # Hispanic or Latino
]

In [5]:
def get_acs_data(state_code, county_code):
    results = c.acs5.state_county_tract(
        categories,
        state_code,
        county_code, 
        Census.ALL,
        year = 2016
    )
    return [ {
        'geoid': res['state'] + res['county'] + res['tract'],
        'name': res['NAME'],
        'total_population': res['B01001_001E'],
        'median_income': res['B19013_001E'],
        'median_home_value': res['B25077_001E'],
        'educational_attainment': res['B15011_001E'],
        'white_alone': res['B03002_003E'],
        'black_alone': res['B03002_004E'],
        'native': res['B02001_004E'],
        'asian': res['B03002_006E'],
        'native_hawaiian_pacific_islander': res['B03002_007E'],
        'some_other_race_alone': res['B03002_008E'],
        'two_or_more': res['B03002_009E'],
        'hispanic_or_latino': res['B03002_012E']        
    } for res in results ]

In [6]:
census_data = []
for county in nyc_met_area:
    print(county["county_name"])
    census_data += get_acs_data(county["state_code"], county["county_code"])

census_data = pd.DataFrame(census_data)[[
    'geoid',
    'name',
    'total_population',
    'median_income',
    'median_home_value',
    'educational_attainment',
    'white_alone',
    'black_alone',
    'native',
    'asian',
    'native_hawaiian_pacific_islander',
    'some_other_race_alone',
    'two_or_more',
    'hispanic_or_latino',
]]

census_data.head()

Bergen, NJ
Essex, NJ
Hudson, NJ
Hunterdon, NJ
Middlesex, NJ
Monmouth, NJ
Morris, NJ
Ocean, NJ
Passaic, NJ
Somerset, NJ
Sussex, NJ
Union, NJ
Bronx, NY
Dutchess, NY
Kings, NY
Nassau, NY
New York, NY
Orange, NY
Putnam, NY
Queens, NY
Richmond, NY
Rockland, NY
Suffolk, NY
Westchester, NY
Pike, PA


Unnamed: 0,geoid,name,total_population,median_income,median_home_value,educational_attainment,white_alone,black_alone,native,asian,native_hawaiian_pacific_islander,some_other_race_alone,two_or_more,hispanic_or_latino
0,34003001000,"Census Tract 10, Bergen County, New Jersey",6767,151641.0,680000.0,3045.0,5667.0,75.0,0.0,759.0,0.0,0.0,132.0,134.0
1,34003002100,"Census Tract 21, Bergen County, New Jersey",1522,114545.0,2000001.0,836.0,788.0,141.0,0.0,444.0,0.0,0.0,27.0,122.0
2,34003002200,"Census Tract 22, Bergen County, New Jersey",5389,90647.0,453800.0,1791.0,3481.0,99.0,9.0,1247.0,0.0,36.0,19.0,504.0
3,34003002300,"Census Tract 23, Bergen County, New Jersey",5828,112031.0,610000.0,2363.0,3595.0,89.0,37.0,1627.0,0.0,0.0,32.0,448.0
4,34003003100,"Census Tract 31, Bergen County, New Jersey",4946,76906.0,301900.0,1588.0,1803.0,306.0,0.0,1435.0,0.0,13.0,24.0,1365.0


In [7]:
len(census_data)

4700

In [8]:
census_data.to_csv(
    "../output/2016_census_data.csv",
    index = False
)

Tract counts by county:

In [9]:
(
    census_data
    .assign(
        state_code = lambda df: df["geoid"].str.slice(0, 2),
        county_code = lambda df: df["geoid"].str.slice(2, 5)
    )
    .groupby([
        "state_code",
        "county_code"
    ])
    .size()
    .to_frame("tracts")
    .reset_index()
    .merge(
        pd.DataFrame(nyc_met_area),
        how = "outer",
        on = [
            "state_code",
            "county_code"
        ]
    )
    .sort_values("tracts", ascending = False)
)

Unnamed: 0,state_code,county_code,tracts,county_name
14,36,47,761,"Kings, NY"
19,36,81,669,"Queens, NY"
12,36,5,339,"Bronx, NY"
22,36,103,323,"Suffolk, NY"
16,36,61,288,"New York, NY"
15,36,59,284,"Nassau, NY"
23,36,119,223,"Westchester, NY"
1,34,13,210,"Essex, NJ"
0,34,3,179,"Bergen, NJ"
4,34,23,175,"Middlesex, NJ"


---

---

---