In [45]:
import pandas as pd
import geopandas as gpd
import os, time

In [46]:
today = time.strftime('%Y_%m_%d')

In [66]:
## file location
interim_dir = 'M:/Application/PBA50Plus_Data_Processing/crosswalks/interim'
final_dir = 'M:/Application/PBA50Plus_Data_Processing/crosswalks/draft_blueprint'
tm_data_dir = 'M:/Application/Model One/RTP2025/INPUT_DEVELOPMENT'
qaqc_dir = 'M:/Application/PBA50Plus_Data_Processing/crosswalks/QAQC'

#### Tract - HRA crosswalk
* CensusTract2020 geography; HRA (PBA50+ Draft Blueprint) is based on CensusTract2010 geography.
* Interim file created with 
*python TAZ_Census_otherGeographies_overlay_crosswalk.py "M:/Data/Census/Geography/tl_2020_06_tract/tl_2020_06_tract_bayarea.shp" "tract2020" "GEOID" "C:/Users/ywang/Box/Modeling and Surveys/Urban Modeling/Bay Area UrbanSim/p10 Datasets for PBA2050plus/raw_data_to_build_parcels_geography/pba50plus_GrowthGeographies_p10tagging/HRA_final_2023_shapefile/final_2023_public.shp" "hraPBA50plus" "oppcat" "M:/Application/PBA50Plus_Data_Processing/crosswalks/interim" --scenario DBP*
* Minor field modifications here.

In [54]:
# load interim file
tract20_hraPBA50plus = pd.read_csv(os.path.join(interim_dir, 'tract2020_hraPBA50plus_crosswalk_DBP.csv'))
print('{} rows, {} unique GEOID'.format(tract20_hraPBA50plus.shape[0], tract20_hraPBA50plus['GEOID'].nunique()))
display(tract20_hraPBA50plus.head(3))

# create additional fields
tract20_hraPBA50plus['taz_hra'] = 0
tract20_hraPBA50plus.loc[tract20_hraPBA50plus['oppcat_hraPBA50plus'].isin(['Highest Resource', 'High Resource']), 'taz_hra'] = 1
tract20_hraPBA50plus.rename(columns = {'oppcat_hraPBA50plus': 'hra_category'}, inplace=True)
# convert tract ID to 11-digit string
tract20_hraPBA50plus['GEOID'] = tract20_hraPBA50plus['GEOID'].apply(lambda x: '{0:0>11}'.format(x))
display(tract20_hraPBA50plus.head(3))

## write out
print(tract20_hraPBA50plus.shape[0])
print(tract20_hraPBA50plus['GEOID'].nunique())
tract20_hraPBA50plus[['GEOID', 'hra_category', 'taz_hra', 'scen']].to_csv(os.path.join(final_dir, 'tract20_hraPBA50plus_{}.csv'.format(today)), index=False)

1772 rows, 1772 unique GEOID


Unnamed: 0,GEOID,base_sq_m,oppcat_hraPBA50plus,intersection_sq_m,area_share,scen
0,6085501402,522333.0,Low Resource,516718.3,0.989251,DBP
1,6013392000,1050404.0,Highest Resource,1009085.0,0.960664,DBP
2,6013351200,9048823.0,Highest Resource,8869840.0,0.98022,DBP


Unnamed: 0,GEOID,base_sq_m,hra_category,intersection_sq_m,area_share,scen,taz_hra
0,6085501402,522333.0,Low Resource,516718.3,0.989251,DBP,0
1,6013392000,1050404.0,Highest Resource,1009085.0,0.960664,DBP,1
2,6013351200,9048823.0,Highest Resource,8869840.0,0.98022,DBP,1


1772
1772


#### TAZ - HRA crosswalk
* Based on Tract2020_HRA crosswalk and TAZ_Tract2020 crosswalk. 
* Tract2020_HRA crosswalk is created in the previous step.
* TAZ_Tract2020 crosswalk is created with
*python TAZ_Census_otherGeographies_overlay_crosswalk.py "M:/Data/GIS layers/TM1_taz/bayarea_rtaz1454_rev1_WGS84.shp" "TAZ1454"  "TAZ1454" "M:/Data/Census/Geography/tl_2020_06_tract/tl_2020_06_tract_bayarea.shp" "tract2020" "GEOID" "M:/Data/GIS layers/TM1_taz_census2020"*
* Merge and clean up fields.

In [56]:
# load TAZ_tract2020 crosswalk
taz_tract20 = pd.read_csv(r'M:\Data\GIS layers\TM1_taz_census2020\TAZ1454_tract2020_crosswalk.csv')
print('{} rows, {} unique TAZ1454'.format(taz_tract20.shape[0], taz_tract20['TAZ1454'].nunique()))
display(taz_tract20.head(3))
# convert tract ID to 11-digit string
taz_tract20['GEOID_tract2020'] = taz_tract20['GEOID_tract2020'].apply(lambda x: '{0:0>11}'.format(x))
display(taz_tract20.head(3))

# load Tract2020_HRA data
tract20_hraPBA50plus_df = pd.read_csv(os.path.join(interim_dir, 'tract2020_hraPBA50plus_crosswalk_DBP.csv'))
print('{} rows, {} unique GEOID'.format(tract20_hraPBA50plus_df.shape[0], tract20_hraPBA50plus_df['GEOID'].nunique()))
display(tract20_hraPBA50plus_df.head(3))

# create additional fields
tract20_hraPBA50plus_df['taz_hra'] = 0
tract20_hraPBA50plus_df.loc[tract20_hraPBA50plus_df['oppcat_hraPBA50plus'].isin(['Highest Resource', 'High Resource']), 'taz_hra'] = 1
tract20_hraPBA50plus_df.rename(columns = {'oppcat_hraPBA50plus': 'hra_category'}, inplace=True)
# convert tract ID to 11-digit string
tract20_hraPBA50plus_df['GEOID'] = tract20_hraPBA50plus_df['GEOID'].apply(lambda x: '{0:0>11}'.format(x))
display(tract20_hraPBA50plus_df.head(3))

# merge
taz_hraPBA50plus = taz_tract20[['TAZ1454', 'GEOID_tract2020']].merge(
    tract20_hraPBA50plus_df[['GEOID', 'hra_category', 'taz_hra', 'scen']],
    left_on='GEOID_tract2020',
    right_on='GEOID',
    how='left'
)
print('{} rows after merge'.format(taz_hraPBA50plus.shape[0]))
display(taz_hraPBA50plus.head(3))

# # clean up fields
# taz_epcPBA50plus.rename(columns={'epc_2050p': 'taz_epc'}, inplace=True)

# write out
taz_hraPBA50plus[['TAZ1454', 'hra_category', 'taz_hra', 'scen']].to_csv(os.path.join(final_dir, 'taz1454_hraPBA50plus_{}.csv'.format(today)), index=False)

1454 rows, 1454 unique TAZ1454


Unnamed: 0,TAZ1454,base_sq_m,GEOID_tract2020,intersection_sq_m,area_share
0,1005,6794573.0,6001400100,6645546.0,0.978067
1,999,659175.4,6001400200,574075.7,0.8709
2,998,1073933.0,6001400300,1034456.0,0.963241


Unnamed: 0,TAZ1454,base_sq_m,GEOID_tract2020,intersection_sq_m,area_share
0,1005,6794573.0,6001400100,6645546.0,0.978067
1,999,659175.4,6001400200,574075.7,0.8709
2,998,1073933.0,6001400300,1034456.0,0.963241


1772 rows, 1772 unique GEOID


Unnamed: 0,GEOID,base_sq_m,oppcat_hraPBA50plus,intersection_sq_m,area_share,scen
0,6085501402,522333.0,Low Resource,516718.3,0.989251,DBP
1,6013392000,1050404.0,Highest Resource,1009085.0,0.960664,DBP
2,6013351200,9048823.0,Highest Resource,8869840.0,0.98022,DBP


Unnamed: 0,GEOID,base_sq_m,hra_category,intersection_sq_m,area_share,scen,taz_hra
0,6085501402,522333.0,Low Resource,516718.3,0.989251,DBP,0
1,6013392000,1050404.0,Highest Resource,1009085.0,0.960664,DBP,1
2,6013351200,9048823.0,Highest Resource,8869840.0,0.98022,DBP,1


1454 rows after merge


Unnamed: 0,TAZ1454,GEOID_tract2020,GEOID,hra_category,taz_hra,scen
0,1005,6001400100,6001400100,Highest Resource,1,DBP
1,999,6001400200,6001400200,Highest Resource,1,DBP
2,998,6001400300,6001400300,High Resource,1,DBP


#### Tract - EPC crosswalk
* TBD

#### TAZ - EPC (former "COC") crosswalk
* PBA50+ EPC data is created by DataViz based on CensusTract2020 geography.
* Join it with TAZ_Tract2020_crosswalk, which was created with
*python TAZ_Census_otherGeographies_overlay_crosswalk.py "M:/Data/GIS layers/TM1_taz/bayarea_rtaz1454_rev1_WGS84.shp" "TAZ1454"  "TAZ1454" "M:/Data/Census/Geography/tl_2020_06_tract/tl_2020_06_tract_bayarea.shp" "tract2020" "GEOID" "M:/Data/GIS layers/TM1_taz_census2020"*
* Minor field modifications here.

In [57]:
# load TAZ_tract2020 crosswalk
taz_tract20 = pd.read_csv(r'M:\Data\GIS layers\TM1_taz_census2020\TAZ1454_tract2020_crosswalk.csv')
print('{} rows, {} unique TAZ1454'.format(taz_tract20.shape[0], taz_tract20['TAZ1454'].nunique()))
display(taz_tract20.head(3))
# convert tract ID to 11-digit string
taz_tract20['GEOID_tract2020'] = taz_tract20['GEOID_tract2020'].apply(lambda x: '{0:0>11}'.format(x))
display(taz_tract20.head(3))

# load PBA50+ EPC data
epcPBA50plus = gpd.read_file('M:/Application/RTP2025_Equity_Performances/EquityPriorityCommunities/DRAFT_Equity_Priority_Communities_-_Plan_Bay_Area_2050_Plus_(ACS_2022).geojson')
print('{} rows, {} unique tract_geoid'.format(epcPBA50plus.shape[0], epcPBA50plus['tract_geoid'].nunique()))
display(epcPBA50plus.head(3))

# merge
taz_epcPBA50plus = taz_tract20[['TAZ1454', 'GEOID_tract2020']].merge(
    epcPBA50plus[['tract_geoid', 'epc_2050p']],
    left_on='GEOID_tract2020',
    right_on='tract_geoid',
    how='left'
)
print('{} rows after merge'.format(taz_epcPBA50plus.shape[0]))
display(taz_epcPBA50plus.head(3))

# clean up fields
taz_epcPBA50plus.rename(columns={'epc_2050p': 'taz_epc'}, inplace=True)

# write out
taz_epcPBA50plus[['TAZ1454', 'taz_epc']].to_csv(os.path.join(final_dir, 'taz1454_epcPBA50plus_{}.csv'.format(today)), index=False)

1454 rows, 1454 unique TAZ1454


Unnamed: 0,TAZ1454,base_sq_m,GEOID_tract2020,intersection_sq_m,area_share
0,1005,6794573.0,6001400100,6645546.0,0.978067
1,999,659175.4,6001400200,574075.7,0.8709
2,998,1073933.0,6001400300,1034456.0,0.963241


Unnamed: 0,TAZ1454,base_sq_m,GEOID_tract2020,intersection_sq_m,area_share
0,1005,6794573.0,6001400100,6645546.0,0.978067
1,999,659175.4,6001400200,574075.7,0.8709
2,998,1073933.0,6001400300,1034456.0,0.963241


1765 rows, 1765 unique tract_geoid


Unnamed: 0,tract_geoid,county_fip,tot_pop_poc,tot_pop_se,tot_pop_po,tot_pop_ci,tot_pop_ov,tot_hh,tot_fam,pop_poc,...,spfam_1_2,lep_1_2,disab_1_2,below2_1_2,hus_re_1_2,zvhh_1_2,epc_2050p,epc_class,ObjectId,geometry
0,6095252604,95,4004,4004,3842,3840,3757,1136,909,3168,...,1,0,0,1,0,0,1,High,1,"POLYGON ((-122.06227 38.25832, -122.04962 38.2..."
1,6095252605,95,5914,5914,5914,5914,5314,2001,1437,4976,...,1,1,1,1,1,0,1,Higher,2,"POLYGON ((-122.04526 38.27290, -122.04279 38.2..."
2,6075016802,75,3807,3807,3785,3785,3698,1955,471,1898,...,0,0,0,0,0,1,0,,3,"POLYGON ((-122.42918 37.77418, -122.42589 37.7..."


1454 rows after merge


Unnamed: 0,TAZ1454,GEOID_tract2020,tract_geoid,epc_2050p
0,1005,6001400100,6001400100,0
1,999,6001400200,6001400200,0
2,998,6001400300,6001400300,0


#### Tract - Growth Geographies crosswalk
* Interim file created with *python TAZ_Census_otherGeographies_overlay_crosswalk.py "M:/Data/Census/Geography/tl_2020_06_tract/tl_2020_06_tract_bayarea.shp" "tract2020" "GEOID" "C:/Users/ywang/Box/Modeling and Surveys/Urban Modeling/Bay Area UrbanSim/p10 Datasets for PBA2050plus/raw_data_to_build_parcels_geography/pba50plus_GrowthGeographies_p10tagging/PBA50Plus_Growth_Geographies_120823.shp" "ggPBA50plus" "gg_id" "M:/Application/PBA50Plus_Data_Processing/crosswalks/interim" --scenario DBP*.
* Minor field cleaning.

In [60]:
# load interim file
tract20_ggPBA50plus = pd.read_csv(os.path.join(interim_dir, 'tract2020_ggPBA50plus_crosswalk_DBP.csv'))
print('{} rows, {} unique GEOID'.format(tract20_ggPBA50plus.shape[0], tract20_ggPBA50plus['GEOID'].nunique()))
display(tract20_ggPBA50plus.head(3))

# create additional fields
tract20_ggPBA50plus['growth_geo'] = 0
tract20_ggPBA50plus.loc[tract20_ggPBA50plus['gg_id_ggPBA50plus'] == 'ggPBA50plus', 'growth_geo'] = 1
# convert tract ID to 11-digit string
tract20_ggPBA50plus['GEOID'] = tract20_ggPBA50plus['GEOID'].apply(lambda x: '{0:0>11}'.format(x))
display(tract20_ggPBA50plus.head(3))

## write out
print(tract20_ggPBA50plus.shape[0])
print(tract20_ggPBA50plus['GEOID'].nunique())
tract20_ggPBA50plus[['GEOID', 'growth_geo', 'scen']].to_csv(os.path.join(final_dir, 'tract20_ggPBA50plus_{}.csv'.format(today)), index=False)

1772 rows, 1772 unique GEOID


Unnamed: 0,GEOID,base_sq_m,gg_id_ggPBA50plus,intersection_sq_m,area_share,scen
0,6085501402,522333.0,ggPBA50plus,520302.738583,0.996113,DBP
1,6013392000,1050404.0,ggPBA50plus,5306.680685,0.005052,DBP
2,6013351200,9048823.0,ggPBA50plus,823894.308418,0.09105,DBP


Unnamed: 0,GEOID,base_sq_m,gg_id_ggPBA50plus,intersection_sq_m,area_share,scen,growth_geo
0,6085501402,522333.0,ggPBA50plus,520302.738583,0.996113,DBP,1
1,6013392000,1050404.0,ggPBA50plus,5306.680685,0.005052,DBP,1
2,6013351200,9048823.0,ggPBA50plus,823894.308418,0.09105,DBP,1


1772
1772


#### Tract - TRA crosswalk
* Interim file created with *python TAZ_Census_otherGeographies_overlay_crosswalk.py "M:/Data/Census/Geography/tl_2020_06_tract/tl_2020_06_tract_bayarea.shp" "tract2020" "GEOID" "M:/Application/PBA50Plus_Data_Processing/Draft_Blueprint_Growth_Geographies/QAQC/gg_tra_pba50plus.shp" "ggtraPBA50plus" "ggtra" "M:/Application/PBA50Plus_Data_Processing/crosswalks/interim" --scenario DBP*.
* Minor field cleaning.

In [59]:
# load interim file
tract20_ggtraPBA50plus = pd.read_csv(os.path.join(interim_dir, 'tract2020_ggtraPBA50plus_crosswalk_DBP.csv'))
print('{} rows, {} unique GEOID'.format(tract20_ggtraPBA50plus.shape[0], tract20_ggtraPBA50plus['GEOID'].nunique()))
display(tract20_ggtraPBA50plus.head(3))

# create additional fields
tract20_ggtraPBA50plus['gg_tra'] = 0
tract20_ggtraPBA50plus.loc[tract20_ggtraPBA50plus['ggtra_ggtraPBA50plus'] == 'ggtraPBA50plus', 'gg_tra'] = 1
# convert tract ID to 11-digit string
tract20_ggtraPBA50plus['GEOID'] = tract20_ggtraPBA50plus['GEOID'].apply(lambda x: '{0:0>11}'.format(x))
display(tract20_ggtraPBA50plus.head(3))

## write out
print(tract20_ggtraPBA50plus.shape[0])
print(tract20_ggtraPBA50plus['GEOID'].nunique())
tract20_ggtraPBA50plus[['GEOID', 'gg_tra', 'scen']].to_csv(os.path.join(final_dir, 'tract20_ggtraPBA50plus_{}.csv'.format(today)), index=False)

1772 rows, 1772 unique GEOID


Unnamed: 0,GEOID,base_sq_m,ggtra_ggtraPBA50plus,intersection_sq_m,area_share,scen
0,6085501402,522333.0,ggtraPBA50plus,520263.047889,0.996037,DBP
1,6013392000,1050404.0,,,,DBP
2,6013351200,9048823.0,,,,DBP


Unnamed: 0,GEOID,base_sq_m,ggtra_ggtraPBA50plus,intersection_sq_m,area_share,scen,gg_tra
0,6085501402,522333.0,ggtraPBA50plus,520263.047889,0.996037,DBP,1
1,6013392000,1050404.0,,,,DBP,0
2,6013351200,9048823.0,,,,DBP,0


1772
1772


#### TAZ-Growth Geography and TAZ-TRA(within GG)
* TAZ-Growth Geography crosswalk: created with *python TAZ_Census_otherGeographies_overlay_crosswalk.py "M:/Data/GIS layers/TM1_taz/bayarea_rtaz1454_rev1_WGS84.shp" "TAZ1454"  "TAZ1454" "C:/Users/ywang/Box/Modeling and Surveys/Urban Modeling/Bay Area UrbanSim/p10 Datasets for PBA2050plus/raw_data_to_build_parcels_geography/pba50plus_GrowthGeographies_p10tagging/PBA50Plus_Growth_Geographies_120823.shp" "ggPBA50plus" "gg_id" "M:/Application/PBA50Plus_Data_Processing/crosswalks/interim" --scenario DBP*
* TAZ-TRA(within GG) crosswalk: created with *python TAZ_Census_otherGeographies_overlay_crosswalk.py "M:/Data/GIS layers/TM1_taz/bayarea_rtaz1454_rev1_WGS84.shp" "TAZ1454"  "TAZ1454" "M:/Application/PBA50Plus_Data_Processing/Draft_Blueprint_Growth_Geographies/QAQC/gg_tra_pba50plus.shp" "ggtraPBA50plus" "ggtra" "M:/Application/PBA50Plus_Data_Processing/crosswalks/interim" --scenario DBP*
* The reason for not using Tract20_GG crosswalk and Tract20_GGTRA crosswalk: Travel Model in the Parking pricing strategy, need to calculate the share of each TAZ's area in GG and in GG+TRA, therefore requires direct spatial overlay calculation.

In [61]:
## TAZ - Growth Geography
# load interim file
taz_ggPBA50plus = pd.read_csv(os.path.join(interim_dir, 'TAZ1454_ggPBA50plus_crosswalk_DBP.csv'))
print('{} rows, {} unique GEOID'.format(taz_ggPBA50plus.shape[0], taz_ggPBA50plus['TAZ1454'].nunique()))
display(taz_ggPBA50plus.head(3))

# create additional fields
taz_ggPBA50plus['growth_geo'] = 0
taz_ggPBA50plus.loc[taz_ggPBA50plus['gg_id_ggPBA50plus'] == 'ggPBA50plus', 'growth_geo'] = 1
display(taz_ggPBA50plus.head(3))

## write out
print(taz_ggPBA50plus.shape[0])
print(taz_ggPBA50plus['TAZ1454'].nunique())
taz_ggPBA50plus[['TAZ1454', 'growth_geo', 'scen']].to_csv(os.path.join(final_dir, 'taz1454_ggPBA50plus_{}.csv'.format(today)), index=False)

1454 rows, 1454 unique GEOID


Unnamed: 0,TAZ1454,base_sq_m,gg_id_ggPBA50plus,intersection_sq_m,area_share,scen
0,1005,6794573.0,,,,DBP
1,999,659175.4,ggPBA50plus,654381.5,0.992727,DBP
2,998,1073933.0,ggPBA50plus,1073906.0,0.999975,DBP


Unnamed: 0,TAZ1454,base_sq_m,gg_id_ggPBA50plus,intersection_sq_m,area_share,scen,growth_geo
0,1005,6794573.0,,,,DBP,0
1,999,659175.4,ggPBA50plus,654381.5,0.992727,DBP,1
2,998,1073933.0,ggPBA50plus,1073906.0,0.999975,DBP,1


1454
1454


In [62]:
## TAZ - TRA (within GG)
# load interim file
taz_ggtraPBA50plus = pd.read_csv(os.path.join(interim_dir, 'TAZ1454_ggtraPBA50plus_crosswalk_DBP.csv'))
print('{} rows, {} unique GEOID'.format(taz_ggtraPBA50plus.shape[0], taz_ggtraPBA50plus['TAZ1454'].nunique()))
display(taz_ggtraPBA50plus.head(3))

# create additional fields
taz_ggtraPBA50plus['gg_tra'] = 0
taz_ggtraPBA50plus.loc[taz_ggtraPBA50plus['ggtra_ggtraPBA50plus'] == 'ggtraPBA50plus', 'gg_tra'] = 1
display(taz_ggtraPBA50plus.head(3))

## write out
print(taz_ggtraPBA50plus.shape[0])
print(taz_ggtraPBA50plus['TAZ1454'].nunique())
taz_ggtraPBA50plus[['TAZ1454', 'gg_tra', 'scen']].to_csv(os.path.join(final_dir, 'taz1454_ggtraPBA50plus_{}.csv'.format(today)), index=False)

1454 rows, 1454 unique GEOID


Unnamed: 0,TAZ1454,base_sq_m,ggtra_ggtraPBA50plus,intersection_sq_m,area_share,scen
0,1005,6794573.0,,,,DBP
1,999,659175.4,ggtraPBA50plus,580953.8,0.881334,DBP
2,998,1073933.0,ggtraPBA50plus,1035041.0,0.963785,DBP


Unnamed: 0,TAZ1454,base_sq_m,ggtra_ggtraPBA50plus,intersection_sq_m,area_share,scen,gg_tra
0,1005,6794573.0,,,,DBP,0
1,999,659175.4,ggtraPBA50plus,580953.8,0.881334,DBP,1
2,998,1073933.0,ggtraPBA50plus,1035041.0,0.963785,DBP,1


1454
1454


In [64]:
# create data needed for travel model Parking Pricing strategy

taz_gg_ggtraPBA50plus = pd.merge(
    taz_ggPBA50plus.rename(columns={'intersection_sq_m': 'area_within_GG_sq_m',
                                    'area_share': 'pct_area_within_GG'}),
    taz_ggtraPBA50plus.rename(columns={'intersection_sq_m': 'area_within_GG_TRA_sq_m',
                                       'area_share': 'pct_area_within_GG_TRA'}),
    on=['TAZ1454', 'base_sq_m', 'scen'],
    how='outer'
)
print('check merge: {} rows, {} unique TAZ id'.format(
    taz_gg_ggtraPBA50plus.shape[0], taz_gg_ggtraPBA50plus['TAZ1454'].nunique()
))
display(taz_gg_ggtraPBA50plus.head(3))

# fillna
for i in ['area_within_GG_sq_m', 'pct_area_within_GG', 'area_within_GG_TRA_sq_m', 'pct_area_within_GG_TRA']:
    taz_gg_ggtraPBA50plus[i].fillna(0, inplace=True)

# bring in 'area_sqmi' from PBA50 data
taz_gg_ggtraPBA50 = pd.read_excel(
    r'M:\Application\Model One\RTP2021\Blueprint\INPUT_DEVELOPMENT\parking_strategy\TAZ_intersect_GG_TRA.xlsx',
    sheet_name = 'bayarea_rtaz1454_rev1_WGS84')
display(taz_gg_ggtraPBA50.head(3))

taz_gg_ggtraPBA50plus = taz_gg_ggtraPBA50plus.merge(taz_gg_ggtraPBA50[['TAZ1454', 'area_sqmi']], on='TAZ1454', how='outer')
print('check merge: {} rows, {} unique TAZ id'.format(
    taz_gg_ggtraPBA50plus.shape[0], taz_gg_ggtraPBA50plus['TAZ1454'].nunique()
))

# write out the table for travel model
taz_gg_ggtraPBA50plus[['TAZ1454', 'area_sqmi', 'pct_area_within_GG', 'pct_area_within_GG_TRA', 'scen']].to_csv(
        os.path.join(tm_data_dir, 'parking_strategy', 'taz1454_GG_TRA_crosswalk.csv'), index=False)


check merge: 1454 rows, 1454 unique TAZ id


Unnamed: 0,TAZ1454,base_sq_m,gg_id_ggPBA50plus,area_within_GG_sq_m,pct_area_within_GG,scen,growth_geo,ggtra_ggtraPBA50plus,area_within_GG_TRA_sq_m,pct_area_within_GG_TRA,gg_tra
0,1005,6794573.0,,,,DBP,0,,,,0
1,999,659175.4,ggPBA50plus,654381.5,0.992727,DBP,1,ggtraPBA50plus,580953.8,0.881334,1
2,998,1073933.0,ggPBA50plus,1073906.0,0.999975,DBP,1,ggtraPBA50plus,1035041.0,0.963785,1


Unnamed: 0,TAZ1454,area_sqmi,area_within_GG,pct_area_within_GG,area_within_GG_TRA123,pct_area_within_GG_TRA123
0,1,0.03169,0.03169,1.0,0.03169,1.0
1,2,0.04862,0.04862,1.0,0.04862,1.0
2,3,0.02298,0.02298,1.0,0.02298,1.0


check merge: 1454 rows, 1454 unique TAZ id


In [67]:
# also create QAQC data - compare w/ PBA50 Final Blueprint
SQMETER_TO_SQMILE = 3.861e-7

taz_gg_ggtraPBA50['version'] = 'PBA50'

# make columns consistent
taz_gg_ggtraPBA50plus['version'] = 'PBA50+ Draft BP'
taz_gg_ggtraPBA50plus['area_within_GG'] = taz_gg_ggtraPBA50plus['area_within_GG_sq_m'] * SQMETER_TO_SQMILE
taz_gg_ggtraPBA50plus['area_within_GG_TRA'] = taz_gg_ggtraPBA50plus['area_within_GG_TRA_sq_m'] * SQMETER_TO_SQMILE
taz_gg_ggtraPBA50.rename(columns={'area_within_GG_TRA123': 'area_within_GG_TRA', 
                                  'pct_area_within_GG_TRA123': 'pct_area_within_GG_TRA'}, inplace=True)

taz_gg_ggtra_crosswalk_comp = pd.concat([
    taz_gg_ggtraPBA50, 
    taz_gg_ggtraPBA50plus[['TAZ1454', 'area_sqmi', 'area_within_GG', 'pct_area_within_GG', 
                           'area_within_GG_TRA', 'pct_area_within_GG_TRA', 'version']]])
display(taz_gg_ggtra_crosswalk_comp.head(3))
taz_gg_ggtra_crosswalk_comp.to_csv(os.path.join(qaqc_dir, 'taz1454_gg_ggtra_crosswalk_comp.csv'), index=False)

Unnamed: 0,TAZ1454,area_sqmi,area_within_GG,pct_area_within_GG,area_within_GG_TRA,pct_area_within_GG_TRA,version
0,1,0.03169,0.03169,1.0,0.03169,1.0,PBA50
1,2,0.04862,0.04862,1.0,0.04862,1.0,PBA50
2,3,0.02298,0.02298,1.0,0.02298,1.0,PBA50
