# 2010 Census and LODES data

Prepared for a validation of BAUS contingency plan runs, 2023-10. See [the Asana task](https://app.asana.com/0/385259290425521/1205437224589265/f) and [the Tableau workbook](https://10ay.online.tableau.com/#/site/metropolitantransportationcommission/workbooks/1346304/views).

In [1]:
import pandas as pd
import requests

In [2]:
variables = {
    'H003001': 'RES_UNITS',
    'H003002': 'TOTHH',
}

counties = {
    '001': 'Alameda',
    '013': 'Contra Costa',
    '041': 'Marin',
    '055': 'Napa',
    '075': 'San Francisco',
    '081': 'San Mateo',
    '085': 'Santa Clara',
    '095': 'Solano',
    '097': 'Sonoma',
}

# Retrieve 2010 Census SF1 data via Census API
resp = requests.get(f"https://api.census.gov/data/2010/dec/sf1?get={','.join(variables.keys())}&for=block:*&in=state:06%20county:{','.join(counties.keys())}").json()
census = pd.DataFrame.from_records(resp[1:], columns=resp[0])

# Cast variables to int and rename them
for col, label in variables.items():
    census[label] = census[col].astype(int)
    del census[col]

# Create full FIPS code column and set it as the index
census['GEOID10'] = census['state'] + census['county'] + census['tract'] + census['block']
census = census.set_index('GEOID10')[variables.values()]

census

Unnamed: 0_level_0,RES_UNITS,TOTHH
GEOID10,Unnamed: 1_level_1,Unnamed: 2_level_1
060014001001055,8,8
060014001001056,46,40
060014001001058,71,64
060014001001059,24,23
060014001001064,0,0
...,...,...
060979901000006,0,0
060979901000007,0,0
060979901000008,0,0
060979901000009,0,0


In [3]:
block_to_taz_mapping = pd.read_csv(
    r'M:\Data\GIS layers\TM1_taz_census2010\2010block_to_TAZ1454.csv',
    usecols=['GEOID10', 'TAZ1454'],
    dtype={'GEOID10': 'string'},
    index_col='GEOID10',
)
block_to_taz_mapping.columns = ['ZONE']
block_to_taz_mapping

Unnamed: 0_level_0,ZONE
GEOID10,Unnamed: 1_level_1
060014001001000,1005
060014001001001,1005
060014001001002,1005
060014001001003,1005
060014001001004,1005
...,...
060971543043138,1403
060971543043139,1403
060971543043140,1403
060971543043141,1403


In [4]:
# What share of residential units and HHs are in blocks not covered by the block-to-TAZ mapping?
census[~census.index.isin(block_to_taz_mapping.index)].sum() / census.sum()

RES_UNITS    0.000027
TOTHH        0.000025
dtype: float64

In [5]:
# Using LODES version 7 because version 8 (latest) uses 2020 census blocks
lodes = pd.read_csv(
    'https://lehd.ces.census.gov/data/lodes/LODES7/ca/wac/ca_wac_S000_JT00_2010.csv.gz',
    usecols=['w_geocode', 'C000'],
    dtype={'w_geocode': 'string'},
    index_col='w_geocode',
)
lodes.columns = ['TOTEMP']
lodes

Unnamed: 0_level_0,TOTEMP
w_geocode,Unnamed: 1_level_1
060014001001000,2267
060014001001005,9
060014001001006,16
060014001001007,7474
060014001001008,78
...,...
061150411004025,1
061150411004027,1
061150411004034,2
061150411004035,1


In [6]:
# Join the three datasets, group by TAZ, aggregate, and coerce to int
df = census.join(lodes).join(block_to_taz_mapping, how='inner').groupby('ZONE').sum().astype(int)
df

Unnamed: 0_level_0,RES_UNITS,TOTHH,TOTEMP
ZONE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,61,25,14412
2,152,135,23181
3,294,270,2095
4,174,58,16507
5,735,524,16588
...,...,...,...
1450,2966,2813,745
1451,2051,1925,431
1452,2159,1988,1707
1453,315,264,440


In [8]:
df.sum()

RES_UNITS    2785872
TOTHH        2607958
TOTEMP       3159661
dtype: int64

In [7]:
df.to_csv('TAZ1454 2010 Land Use.csv')