# Download Census population data
* Move to script when ready

In [1]:
#%pip install censusdata

In [2]:
import numpy as np
import pandas as pd
import geopandas as gpd
import os
import boto3
import census
from us import states

In [3]:
# Can't figure out how to read the API key from env
c = census.Census('2dacc2d1fe8ae85c99e2f934a70576d6f731bb0f', year = 2017)

In [4]:
s3 = boto3.client('s3')

## Import population data from Census API

In [5]:
raw = c.acs5.state_county_tract('B01003_001E', states.CA.fips, '037', census.ALL)
df = pd.DataFrame(raw)

In [6]:
# Subset for LA County
df['GEOID'] = df.state + df.county + df.tract
df = df[['GEOID', 'B01003_001E']]
df = df.sort_values('GEOID', ascending = True)

In [7]:
"""
# 2017 5-year ACS pop data is the most recent data
# 1-year ACS does not do tract-level estimates
# Cleaning GEOID from censusdata package is difficult

pop = pd.DataFrame()

for y in range(2017, 2018):
    data = censusdata.download('acs5', y, 
                               censusdata.censusgeo([('state', '06'), ('county', '037'), ('tract', '*')]), 
                               ['B01003_001E'])
    data['year'] = y

pop = pop.append(data)
"""

"\n# 2017 5-year ACS pop data is the most recent data\n# 1-year ACS does not do tract-level estimates\n# Cleaning GEOID from censusdata package is difficult\n\npop = pd.DataFrame()\n\nfor y in range(2017, 2018):\n    data = censusdata.download('acs5', y, \n                               censusdata.censusgeo([('state', '06'), ('county', '037'), ('tract', '*')]), \n                               ['B01003_001E'])\n    data['year'] = y\n\npop = pop.append(data)\n"

## Import census tract boundary file and write to S3

In [8]:
tract = gpd.read_file('s3://city-of-los-angeles-data-lake/public-health-dashboard/gis/source/tl_2019_06_tract/')

In [9]:
# Subset to LA County
tract = tract[tract.COUNTYFP == '037']

keep_me = ['GEOID', 'geometry']
tract = tract[keep_me]

In [10]:
tract.to_crs({'init':'epsg:2229'}).to_file(driver = 'GeoJSON', filename = '../data/census_tracts.geojson')
s3.upload_file('../data/census_tracts.geojson', 'city-of-los-angeles-data-lake', 
               'public-health-dashboard/gis/raw/census_tracts.geojson')