In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import os

#### Read parcel/MAZ lookup files and BAUS run output

In [2]:
# p10 parcel ID - TM2 maz lookup
p_maz_lookup_file = 'M:\\Data\\GIS layers\\p10_TM2_maz\\p10_maz\\p10_maz_lookup_compare_20210809.csv'
p_maz_lookup = pd.read_csv(p_maz_lookup_file, usecols = ['PARCEL_ID', 'maz_new'])
print('Read {} rows of parcel_id/maz lookup table with {} unique parcels and {} unique MAZs'.format(
    p_maz_lookup.shape[0],
    len(p_maz_lookup.PARCEL_ID.unique()),
    len(p_maz_lookup.maz_new.unique())))

p_maz_lookup.rename(columns={'maz_new': 'MAZ'}, inplace=True)
print(p_maz_lookup.dtypes)
display(p_maz_lookup.head())

Read 1956208 rows of parcel_id/maz lookup table with 1956208 unique parcels and 39129 unique MAZs
PARCEL_ID      int64
MAZ          float64
dtype: object


Unnamed: 0,PARCEL_ID,MAZ
0,229116,324291.0
1,244166,331415.0
2,202378,323229.0
3,2004420,718260.0
4,340332,318182.0


In [3]:
# Read FBP 2015 UrbanSim output

fbp_2015_file = 'C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim\\PBA50\\Final Blueprint runs\\Final Blueprint (s24)\\BAUS v2.25 - FINAL VERSION\\run182_parcel_data_2015.csv'.format(os.getenv('USERNAME'))
fbp_2015 = pd.read_csv(fbp_2015_file, usecols = ['parcel_id', 'hhq1','hhq2','hhq3','hhq4','tothh'])

print('Read {} rows of FBP 2015 UrbanSim output data'.format(fbp_2015.shape[0]))
fbp_2015.rename(columns={'parcel_id': 'PARCEL_ID'},inplace=True)
print(fbp_2015.dtypes)
display(fbp_2015.head())

Read 1956212 rows of FBP 2015 UrbanSim output data
PARCEL_ID      int64
hhq1         float64
hhq2         float64
hhq3         float64
hhq4         float64
tothh        float64
dtype: object


Unnamed: 0,PARCEL_ID,hhq1,hhq2,hhq3,hhq4,tothh
0,229116,,,,,
1,244166,,,,,
2,202378,2.0,7.0,7.0,14.0,30.0
3,2004420,,,,,
4,340332,,,,,


In [4]:
# merge BAUS output with parcel/MAZ lookup
fbp_2015_maz = fbp_2015.merge(p_maz_lookup, on='PARCEL_ID', how='left')
fbp_2015_maz.head()

Unnamed: 0,PARCEL_ID,hhq1,hhq2,hhq3,hhq4,tothh,MAZ
0,229116,,,,,,324291.0
1,244166,,,,,,331415.0
2,202378,2.0,7.0,7.0,14.0,30.0,323229.0
3,2004420,,,,,,718260.0
4,340332,,,,,,318182.0


In [5]:
# There should be no parcel with MAZ=999999 and HH >0
print(fbp_2015_maz.loc[(fbp_2015_maz.MAZ==999999) & (fbp_2015_maz.tothh > 0)].shape[0])
display(fbp_2015_maz.loc[(fbp_2015_maz.MAZ==999999) & (fbp_2015_maz.tothh > 0)].sum())

0


PARCEL_ID    0.0
hhq1         0.0
hhq2         0.0
hhq3         0.0
hhq4         0.0
tothh        0.0
MAZ          0.0
dtype: float64

In [6]:
# check missing maz/taz data
display(fbp_2015_maz.loc[fbp_2015_maz.MAZ.isnull()])

# assign them to MAZ 217456, TAZ 201085, Santa Clara
synthetic_idx = fbp_2015_maz.MAZ.isnull()
fbp_2015_maz.loc[synthetic_idx, 'MAZ'] = 217456

# double check - this should be empty
display(fbp_2015_maz.loc[fbp_2015_maz.MAZ.isnull()])

Unnamed: 0,PARCEL_ID,hhq1,hhq2,hhq3,hhq4,tothh,MAZ
1956208,2054504,,,,,,
1956209,2054505,1119.0,409.0,157.0,83.0,1768.0,
1956210,2054506,,,,,,
1956211,572927,,,,,,


Unnamed: 0,PARCEL_ID,hhq1,hhq2,hhq3,hhq4,tothh,MAZ


In [7]:
# fill na with 0
for i in ['hhq1','hhq2','hhq3','hhq4','tothh']:
    fbp_2015_maz[i].fillna(0, inplace=True)

# summarize by MAZ
fbp_2015_maz['MAZ'] = fbp_2015_maz['MAZ'].apply(np.int64)
fbp_2015_by_maz = fbp_2015_maz.groupby('MAZ')['hhq1','hhq2','hhq3','hhq4','tothh'].sum().reset_index()
print('Created {} rows of HH data summary at TM2 maz level'.format(fbp_2015_by_maz.shape[0]))
fbp_2015_by_maz.sort_values(by='MAZ')

  import sys


Created 39129 rows of HH data summary at TM2 maz level


Unnamed: 0,MAZ,hhq1,hhq2,hhq3,hhq4,tothh
0,10001,8.0,17.0,12.0,21.0,58.0
1,10002,13.0,18.0,9.0,21.0,61.0
2,10003,13.0,27.0,14.0,17.0,71.0
3,10004,8.0,25.0,15.0,23.0,71.0
4,10005,15.0,21.0,14.0,26.0,76.0
...,...,...,...,...,...,...
39124,814495,16.0,18.0,24.0,36.0,94.0
39125,814497,8.0,18.0,37.0,66.0,129.0
39126,814500,51.0,70.0,63.0,90.0,274.0
39127,814506,8.0,10.0,12.0,28.0,58.0


#### Add other MAZ attributes

In [8]:
# TM2 maz - TM2 taz/county lookup
maz_taz_lookup_file = 'C:\\Users\\{}\\Documents\\GitHub\\travel-model-two\\maz_taz\\mazs_tazs_county_v2.2.csv'.format(os.getenv('USERNAME'))
maz_taz_lookup = pd.read_csv(maz_taz_lookup_file)
print('Read {} rows of maz/taz lookup table'.format(maz_taz_lookup.shape[0]))
print(maz_taz_lookup.dtypes)
display(maz_taz_lookup.head())

Read 39726 rows of maz/taz lookup table
MAZ             int64
TAZ             int64
COUNTY          int64
county_name    object
dtype: object


Unnamed: 0,MAZ,TAZ,COUNTY,county_name
0,10001,56,1,San Francisco
1,10002,56,1,San Francisco
2,10003,10,1,San Francisco
3,10004,53,1,San Francisco
4,10005,48,1,San Francisco


In [9]:
# TM2 maz areas
maz_area_file = 'M:\Data\GIS layers\TM2_maz_taz_v2.2\mazs_TM2_v2_2.shp'
maz_area = gpd.read_file(maz_area_file)
print('Read {} rows of maz spatial data'.format(maz_area.shape[0]))
maz_area = maz_area[['maz','acres']]
maz_area.columns = ['MAZ', 'acres']
display(maz_area.head())

Read 39726 rows of maz spatial data


Unnamed: 0,MAZ,acres
0,10001,4.190409
1,10002,4.15705
2,10003,4.20573
3,10004,4.056231
4,10005,4.330024


In [10]:
maz_att = maz_taz_lookup.merge(maz_area, on='MAZ', how='outer')
print(maz_att.shape)

fbp_2015_by_maz = fbp_2015_by_maz.merge(maz_att, on='MAZ', how='outer')
print(fbp_2015_by_maz.shape)

# fillna again
for i in ['hhq1','hhq2','hhq3','hhq4','tothh']:
    fbp_2015_by_maz[i].fillna(0, inplace=True)

fbp_2015_by_maz.sort_values(by=['COUNTY','MAZ'], inplace=True)

# double check total HH counts against BAUS
display(fbp_2015_by_maz[['hhq1','hhq2','hhq3','hhq4','tothh']].sum())

(39726, 5)
(39727, 10)


hhq1      706436.0
hhq2      649102.0
hhq3      577085.0
hhq4      743953.0
tothh    2676576.0
dtype: float64

In [11]:
output_file = 'C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim\\Travel Model 2\\FBP2015_by_TM2_maz_20210809.csv'.format(os.getenv('USERNAME'))
fbp_2015_by_maz.to_csv(output_file, index=False)