In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import os

#### Read lookup files and BAUS run output

In [2]:
# p10 parcel ID - TM2 maz lookup
p_maz_lookup_file = 'C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim\\PBA50\\Current PBA50 Large General Input Data\\2020_08_17_parcel_to_maz22.csv'.format(os.getenv('USERNAME'))
p_maz_lookup = pd.read_csv(p_maz_lookup_file, usecols = ['PARCEL_ID', 'maz'])
print('Read {} rows of parcel_id/maz lookup table'.format(p_maz_lookup.shape[0]))
p_maz_lookup.rename(columns={'maz': 'MAZ'}, inplace=True)
print(p_maz_lookup.dtypes)
display(p_maz_lookup.head())

Read 1956208 rows of parcel_id/maz lookup table
PARCEL_ID    int64
MAZ          int64
dtype: object


Unnamed: 0,PARCEL_ID,MAZ
0,229116,310596
1,244166,331415
2,202378,310099
3,2004420,710778
4,340332,318182


In [3]:
# TM2 maz - TM2 taz/county lookup
maz_taz_lookup_file = 'C:\\Users\\{}\\Documents\\GitHub\\travel-model-two\\maz_taz\\mazs_tazs_county_v2.2.csv'.format(os.getenv('USERNAME'))
maz_taz_lookup = pd.read_csv(maz_taz_lookup_file)
print('Read {} rows of maz/taz lookup table'.format(maz_taz_lookup.shape[0]))
print(maz_taz_lookup.dtypes)
display(maz_taz_lookup.head())

Read 39726 rows of maz/taz lookup table
MAZ             int64
TAZ             int64
COUNTY          int64
county_name    object
dtype: object


Unnamed: 0,MAZ,TAZ,COUNTY,county_name
0,10001,56,1,San Francisco
1,10002,56,1,San Francisco
2,10003,10,1,San Francisco
3,10004,53,1,San Francisco
4,10005,48,1,San Francisco


In [4]:
# TM2 maz areas
maz_area_file = 'M:\Data\GIS layers\TM2_maz_taz_v2.2\mazs_TM2_v2_2.shp'
maz_area = gpd.read_file(maz_area_file)
print('Read {} rows of maz spatial data'.format(maz_area.shape[0]))
maz_area = maz_area[['maz','acres']]
maz_area.columns = ['MAZ', 'acres']
maz_area

Read 39726 rows of maz spatial data


Unnamed: 0,MAZ,acres
0,10001,4.190409
1,10002,4.157050
2,10003,4.205730
3,10004,4.056231
4,10005,4.330024
...,...,...
39721,814481,8.806087
39722,814495,68.479018
39723,814497,56.609816
39724,814500,76.224530


In [5]:
# Merge to get TM2 geo
p_tm2_geo = p_maz_lookup.merge(maz_taz_lookup, on='MAZ', how='left')
print('Final p10-TM2 geos lookup has {} parcels, {} maz, {} taz'.format(p_tm2_geo.shape[0],
                                                                        len(p_tm2_geo.MAZ.unique()),
                                                                        len(p_tm2_geo.TAZ.unique())))

# check missing data - maz 999999 doesnt have TAZ and County values
display(p_tm2_geo.loc[p_tm2_geo.MAZ.isnull()])
display(p_tm2_geo.loc[p_tm2_geo.TAZ.isnull()].MAZ.unique())
display(p_tm2_geo.loc[p_tm2_geo.county_name.isnull()].MAZ.unique())

# assign the value as "NA"
p_tm2_geo.loc[p_tm2_geo.TAZ.isnull(), 'TAZ'] = 0
p_tm2_geo.loc[p_tm2_geo.county_name.isnull(), 'county_name'] = 'NA'
p_tm2_geo.loc[p_tm2_geo.COUNTY.isnull(), 'COUNTY'] = 0

Final p10-TM2 geos lookup has 1956208 parcels, 38677 maz, 4727 taz


Unnamed: 0,PARCEL_ID,MAZ,TAZ,COUNTY,county_name


array([999999], dtype=int64)

array([999999], dtype=int64)

In [6]:
# Read FBP 2015 UrbanSim output

fbp_2015_file = 'C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim\\PBA50\\Final Blueprint runs\\Final Blueprint (s24)\\BAUS v2.25 - FINAL VERSION\\run182_parcel_data_2015.csv'.format(os.getenv('USERNAME'))
fbp_2015 = pd.read_csv(fbp_2015_file, usecols = ['parcel_id', 'hhq1','hhq2','hhq3','hhq4','tothh'])

print('Read {} rows of FBP 2015 UrbanSim output data'.format(fbp_2015.shape[0]))
fbp_2015.rename(columns={'parcel_id': 'PARCEL_ID'},inplace=True)
print(fbp_2015.dtypes)
display(fbp_2015.head())

Read 1956212 rows of FBP 2015 UrbanSim output data
PARCEL_ID      int64
hhq1         float64
hhq2         float64
hhq3         float64
hhq4         float64
tothh        float64
dtype: object


Unnamed: 0,PARCEL_ID,hhq1,hhq2,hhq3,hhq4,tothh
0,229116,,,,,
1,244166,,,,,
2,202378,2.0,7.0,7.0,14.0,30.0
3,2004420,,,,,
4,340332,,,,,


#### Merge BAUS output with TM2 lookup and summarize by TM2 maz

In [7]:
# join with parcel_id - TM2 geo lookup
fbp_2015_tm2_geo = fbp_2015.merge(p_tm2_geo, on='PARCEL_ID', how='left')
fbp_2015_tm2_geo.head()

Unnamed: 0,PARCEL_ID,hhq1,hhq2,hhq3,hhq4,tothh,MAZ,TAZ,COUNTY,county_name
0,229116,,,,,,310596.0,300436.0,4.0,Alameda
1,244166,,,,,,331415.0,300430.0,4.0,Alameda
2,202378,2.0,7.0,7.0,14.0,30.0,310099.0,300217.0,4.0,Alameda
3,2004420,,,,,,710778.0,700296.0,8.0,Sonoma
4,340332,,,,,,318182.0,300547.0,4.0,Alameda


In [8]:
# check missing maz/taz data
display(fbp_2015_tm2_geo.loc[fbp_2015_tm2_geo.MAZ.isnull()])

# assign them to MAZ 217456, TAZ 201085, Santa Clara
synthetic_idx = fbp_2015_tm2_geo.MAZ.isnull()
fbp_2015_tm2_geo.loc[synthetic_idx, 'MAZ'] = 217456
fbp_2015_tm2_geo.loc[synthetic_idx, 'TAZ'] = 201085
fbp_2015_tm2_geo.loc[synthetic_idx, 'county_name'] = 'Santa Clara'
fbp_2015_tm2_geo.loc[synthetic_idx, 'COUNTY'] = 3

Unnamed: 0,PARCEL_ID,hhq1,hhq2,hhq3,hhq4,tothh,MAZ,TAZ,COUNTY,county_name
1956208,2054504,,,,,,,,,
1956209,2054505,1119.0,409.0,157.0,83.0,1768.0,,,,
1956210,2054506,,,,,,,,,
1956211,572927,,,,,,,,,


In [9]:
# fill na with 0
for i in ['hhq1','hhq2','hhq3','hhq4','tothh']:
    fbp_2015_tm2_geo[i].fillna(0, inplace=True)

# convert MAZ to integer
fbp_2015_tm2_geo['MAZ'] = fbp_2015_tm2_geo['MAZ'].apply(np.int64)
fbp_2015_tm2_geo['TAZ'] = fbp_2015_tm2_geo['TAZ'].apply(np.int64)
fbp_2015_tm2_geo['COUNTY'] = fbp_2015_tm2_geo['COUNTY'].apply(np.int64)

In [10]:
fbp_2015_maz = fbp_2015_tm2_geo.groupby(['COUNTY', 'county_name', 'TAZ', 'MAZ'])['hhq1','hhq2','hhq3','hhq4','tothh'].sum().reset_index()
print('Created {} rows of HH data summary at TM2 maz level'.format(fbp_2015_maz.shape[0]))
fbp_2015_maz.sort_values(by='MAZ')

  """Entry point for launching an IPython kernel.


Created 38677 rows of HH data summary at TM2 maz level


Unnamed: 0,COUNTY,county_name,TAZ,MAZ,hhq1,hhq2,hhq3,hhq4,tothh
301,1,San Francisco,56,10001,8.0,17.0,12.0,21.0,58.0
302,1,San Francisco,56,10002,13.0,18.0,9.0,21.0,61.0
47,1,San Francisco,10,10003,13.0,27.0,14.0,17.0,71.0
280,1,San Francisco,53,10004,8.0,25.0,15.0,23.0,71.0
250,1,San Francisco,48,10005,15.0,21.0,14.0,26.0,76.0
...,...,...,...,...,...,...,...,...,...
37995,9,Marin,800090,814495,13.0,16.0,22.0,32.0,83.0
37716,9,Marin,800055,814497,8.0,17.0,36.0,65.0,126.0
37373,9,Marin,800007,814500,12.0,29.0,15.0,23.0,79.0
37380,9,Marin,800008,814506,7.0,5.0,9.0,24.0,45.0


In [11]:
# append acreage data
fbp_2015_maz = fbp_2015_maz.merge(maz_area, on='MAZ', how='outer')

In [12]:
output_file = 'C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim\\Travel Model 2\\FBP2015_by_TM2_maz.csv'.format(os.getenv('USERNAME'))
fbp_2015_maz.to_csv(output_file, index=False)