Goal: get the count of HHs, HHs/acre, and Jobs in TRAs, HRAs, and PDAs for
- Base Year 2015
- FBP Year 2035
- FBP Year 2050
- No Project Year 2050 

In [1]:
import pandas as pd
import os

### Set up inputs and output templates

In [2]:
# Inputs

BAUS_output_dir = 'C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim\\PBA50'.format(os.getenv('USERNAME'))
FBP_run_dir = os.path.join(BAUS_output_dir, 'Final Blueprint runs', 'Final Blueprint (s24)', 'BAUS v2.25 - FINAL VERSION')
NP_run_dir = os.path.join(BAUS_output_dir, 'EIR runs', 'Baseline Large (s25) runs', 'NP_v8_FINAL')

Lookup_dir = 'M:\\Data\\GIS layers\\Blueprint Land Use Strategies\\ID_idx\\Final Blueprint'
parcel_geos_crosswalk_file = os.path.join(Lookup_dir, 'parcel_tra_hra_pda_fbp_20210816.csv')
geos_acreage_file = os.path.join(Lookup_dir, 'land_acreage_by_geographies.csv')


In [3]:
# load and modify parcel_geo crosswalk

parcel_geos_crosswalk = pd.read_csv(parcel_geos_crosswalk_file)
print('read {} rows of parcel-geos crosswalk, including the following columns: \n{}'.format(
    parcel_geos_crosswalk.shape[0],
    list(parcel_geos_crosswalk)))

# CARB doesn't request data on individual PDAs, so create a new column for PDA/non-PDA
parcel_geos_crosswalk['fbp_pda_cat_id'] = 'non-PDA'
parcel_geos_crosswalk.loc[parcel_geos_crosswalk.pda_id_pba50_fb.notnull(), 'fbp_pda_cat_id'] = 'PDA'

# some statistics
print(parcel_geos_crosswalk.fbp_tra_cat_id.value_counts())
print(parcel_geos_crosswalk.fbp_hra_id.value_counts())
print(parcel_geos_crosswalk.fbp_pda_cat_id.value_counts())

# rename 'PARCEL_ID'
parcel_geos_crosswalk.rename(columns={'PARCEL_ID': 'parcel_id'}, inplace=True)

display(parcel_geos_crosswalk)

read 1956212 rows of parcel-geos crosswalk, including the following columns: 
['PARCEL_ID', 'juris', 'fbp_tra_id', 'fbp_tra_cat_id', 'fbp_hra_id', 'pda_id_pba50_fb', 'pda_name']
non-tra    1189682
tra3        512800
tra2        213208
tra1         40522
Name: fbp_tra_cat_id, dtype: int64
non-HRA    1113026
HRA         843186
Name: fbp_hra_id, dtype: int64
non-PDA    1637682
PDA         318530
Name: fbp_pda_cat_id, dtype: int64


Unnamed: 0,parcel_id,juris,fbp_tra_id,fbp_tra_cat_id,fbp_hra_id,pda_id_pba50_fb,pda_name,fbp_pda_cat_id
0,229116,livermore,non-tra,non-tra,HRA,,,non-PDA
1,244166,livermore,tra3,tra3,non-HRA,,,non-PDA
2,202378,hayward,non-tra,non-tra,non-HRA,,,non-PDA
3,2004420,unincorporated_sonoma,non-tra,non-tra,HRA,,,non-PDA
4,340332,fremont,non-tra,non-tra,HRA,,,non-PDA
...,...,...,...,...,...,...,...,...
1956207,782909,unincorporated_marin,tra3,tra3,HRA,,,non-PDA
1956208,2054504,unincorporated_marin,non-tra,non-tra,non-HRA,,,non-PDA
1956209,2054505,unincorporated_santa_clara,non-tra,non-tra,non-HRA,,,non-PDA
1956210,2054506,unincorporated_marin,non-tra,non-tra,non-HRA,,,non-PDA


In [4]:
# create dataframe to store the summary value

tra_summary = pd.DataFrame(index=parcel_geos_crosswalk.fbp_tra_cat_id.unique())
display(tra_summary)

hra_summary = pd.DataFrame(index=parcel_geos_crosswalk.fbp_hra_id.unique())
display(hra_summary)

pda_summary = pd.DataFrame(index=parcel_geos_crosswalk.fbp_pda_cat_id.unique())
display(pda_summary)

non-tra
tra3
tra2
tra1


HRA
non-HRA


non-PDA
PDA


### Read parcel data and summarize tothh and totemp

In [5]:
def load_parcel_data(file_name):
    """
    load parcel data with the needed columns
    """
    
    df = pd.read_csv(file_name,
                     usecols = ['parcel_id', 'tothh', 'totemp'])
    print('load {} rows of parcel data'.format(df.shape[0]))
    return df

In [6]:
def tag_parcel(parcel_df, crosswalk):
    """
    add geographies tagging to parcels
    """
   
    parcel_tagged = parcel_df.merge(crosswalk, on='parcel_id', how='left')
    return parcel_tagged

In [7]:
def summarize_by_geography(parcel_df, metrics_cols, geo_col):
    """
    summarize BAUS parcel-level output by certain geography, e.g., hra_id, tra_id
    
    Inputs:
        - parcel_df: parcel-level BAUS output
        - metrics_cols: a list of columns specifying which metrics to summarize, e.g. tothh (total households)
        - geo_col: a column that tags parcels by geography designation,
                    e.g. column 'hra_id' tags parcels by 'hra' and 'non-hra'

    """
    
    parcel_summary = parcel_df.groupby(geo_col)[metrics_cols].sum()
    return parcel_summary

In [8]:
# list geographies to summarize by and metrics to summarize
geo_cols = ['fbp_tra_cat_id', 'fbp_hra_id', 'fbp_pda_cat_id']
metrics_cols = ['tothh', 'totemp']

In [9]:
# all parcel files to summarize by year+scenario

parcel_output_files = {'2015'    : os.path.join(FBP_run_dir, 'run182_parcel_data_2015.csv'),      # Base Year 20
                       '2035_FBP': os.path.join(FBP_run_dir, 'run182_parcel_data_2035_UBI.csv'),  # FBP Year 2035
                       '2050_FBP': os.path.join(FBP_run_dir, 'run182_parcel_data_2050_UBI.csv'),  # FBP Year 2050
                       '2050_NP' : os.path.join(NP_run_dir, 'run314_parcel_data_2050.csv')}       # No Project Year 2050 

In [10]:
# loop through parcel files, summarize, and add to the pre-set dataframes

for runkey in parcel_output_files:
    print(runkey)
    filename = parcel_output_files[runkey]
    print(filename)
        
    # load parcel-level data
    parcel_df = load_parcel_data(filename)
    display(parcel_df.head(3))
    
    # add geo tagging
    parcel_tagged = tag_parcel(parcel_df, parcel_geos_crosswalk[['parcel_id'] + geo_cols])
    display(parcel_tagged.head(3))
    
    # summarize by each geography
    for geo in geo_cols:
        parcel_summary = summarize_by_geography(parcel_tagged, metrics_cols, geo)
        display(parcel_summary)
        
        # rename the columns to add 'year' info
#         parcel_summary.columns = [geo] + [x+'_'+year for x in metrics_cols]
        parcel_summary.columns = [runkey+'_'+x for x in metrics_cols]
        display(parcel_summary)
        
        if geo == 'fbp_tra_cat_id':
#             tra_summary = tra_summary.merge(parcel_summary, on=geo, how='outer')
            tra_summary = tra_summary.merge(parcel_summary, left_index=True, right_index=True, how='outer')
        elif geo == 'fbp_hra_id':
            hra_summary = hra_summary.merge(parcel_summary, left_index=True, right_index=True, how='outer')
        elif geo == 'fbp_pda_cat_id':
            pda_summary = pda_summary.merge(parcel_summary, left_index=True, right_index=True, how='outer')

2015
C:\Users\ywang\Box\Modeling and Surveys\Urban Modeling\Bay Area UrbanSim\PBA50\Final Blueprint runs\Final Blueprint (s24)\BAUS v2.25 - FINAL VERSION\run182_parcel_data_2015.csv
load 1956212 rows of parcel data


Unnamed: 0,parcel_id,tothh,totemp
0,229116,,
1,244166,,
2,202378,30.0,


Unnamed: 0,parcel_id,tothh,totemp,fbp_tra_cat_id,fbp_hra_id,fbp_pda_cat_id
0,229116,,,non-tra,HRA,non-PDA
1,244166,,,tra3,non-HRA,non-PDA
2,202378,30.0,,non-tra,non-HRA,non-PDA


Unnamed: 0_level_0,tothh,totemp
fbp_tra_cat_id,Unnamed: 1_level_1,Unnamed: 2_level_1
non-tra,1333334.0,1501815.0
tra1,126128.0,524460.0
tra2,444482.0,861022.0
tra3,772632.0,1118021.0


Unnamed: 0_level_0,2015_tothh,2015_totemp
fbp_tra_cat_id,Unnamed: 1_level_1,Unnamed: 2_level_1
non-tra,1333334.0,1501815.0
tra1,126128.0,524460.0
tra2,444482.0,861022.0
tra3,772632.0,1118021.0


Unnamed: 0_level_0,tothh,totemp
fbp_hra_id,Unnamed: 1_level_1,Unnamed: 2_level_1
HRA,1143799.0,1331924.0
non-HRA,1532777.0,2673394.0


Unnamed: 0_level_0,2015_tothh,2015_totemp
fbp_hra_id,Unnamed: 1_level_1,Unnamed: 2_level_1
HRA,1143799.0,1331924.0
non-HRA,1532777.0,2673394.0


Unnamed: 0_level_0,tothh,totemp
fbp_pda_cat_id,Unnamed: 1_level_1,Unnamed: 2_level_1
PDA,686790.0,1882352.0
non-PDA,1989786.0,2122966.0


Unnamed: 0_level_0,2015_tothh,2015_totemp
fbp_pda_cat_id,Unnamed: 1_level_1,Unnamed: 2_level_1
PDA,686790.0,1882352.0
non-PDA,1989786.0,2122966.0


2035_FBP
C:\Users\ywang\Box\Modeling and Surveys\Urban Modeling\Bay Area UrbanSim\PBA50\Final Blueprint runs\Final Blueprint (s24)\BAUS v2.25 - FINAL VERSION\run182_parcel_data_2035_UBI.csv
load 1956212 rows of parcel data


Unnamed: 0,parcel_id,tothh,totemp
0,229116,,
1,244166,,
2,202378,31.0,


Unnamed: 0,parcel_id,tothh,totemp,fbp_tra_cat_id,fbp_hra_id,fbp_pda_cat_id
0,229116,,,non-tra,HRA,non-PDA
1,244166,,,tra3,non-HRA,non-PDA
2,202378,31.0,,non-tra,non-HRA,non-PDA


Unnamed: 0_level_0,tothh,totemp
fbp_tra_cat_id,Unnamed: 1_level_1,Unnamed: 2_level_1
non-tra,1492300.0,1766335.0
tra1,231319.0,663279.0
tra2,773801.0,1145292.0
tra3,997792.0,1259607.0


Unnamed: 0_level_0,2035_FBP_tothh,2035_FBP_totemp
fbp_tra_cat_id,Unnamed: 1_level_1,Unnamed: 2_level_1
non-tra,1492300.0,1766335.0
tra1,231319.0,663279.0
tra2,773801.0,1145292.0
tra3,997792.0,1259607.0


Unnamed: 0_level_0,tothh,totemp
fbp_hra_id,Unnamed: 1_level_1,Unnamed: 2_level_1
HRA,1389026.0,1426572.0
non-HRA,2106186.0,3407941.0


Unnamed: 0_level_0,2035_FBP_tothh,2035_FBP_totemp
fbp_hra_id,Unnamed: 1_level_1,Unnamed: 2_level_1
HRA,1389026.0,1426572.0
non-HRA,2106186.0,3407941.0


Unnamed: 0_level_0,tothh,totemp
fbp_pda_cat_id,Unnamed: 1_level_1,Unnamed: 2_level_1
PDA,1275248.0,2309896.0
non-PDA,2219964.0,2524617.0


Unnamed: 0_level_0,2035_FBP_tothh,2035_FBP_totemp
fbp_pda_cat_id,Unnamed: 1_level_1,Unnamed: 2_level_1
PDA,1275248.0,2309896.0
non-PDA,2219964.0,2524617.0


2050_FBP
C:\Users\ywang\Box\Modeling and Surveys\Urban Modeling\Bay Area UrbanSim\PBA50\Final Blueprint runs\Final Blueprint (s24)\BAUS v2.25 - FINAL VERSION\run182_parcel_data_2050_UBI.csv
load 1956212 rows of parcel data


Unnamed: 0,parcel_id,tothh,totemp
0,229116,,
1,244166,,
2,202378,33.0,


Unnamed: 0,parcel_id,tothh,totemp,fbp_tra_cat_id,fbp_hra_id,fbp_pda_cat_id
0,229116,,,non-tra,HRA,non-PDA
1,244166,,,tra3,non-HRA,non-PDA
2,202378,33.0,,non-tra,non-HRA,non-PDA


Unnamed: 0_level_0,tothh,totemp
fbp_tra_cat_id,Unnamed: 1_level_1,Unnamed: 2_level_1
non-tra,1582938.0,2022766.0
tra1,309888.0,740673.0
tra2,1034305.0,1253232.0
tra3,1116181.0,1391789.0


Unnamed: 0_level_0,2050_FBP_tothh,2050_FBP_totemp
fbp_tra_cat_id,Unnamed: 1_level_1,Unnamed: 2_level_1
non-tra,1582938.0,2022766.0
tra1,309888.0,740673.0
tra2,1034305.0,1253232.0
tra3,1116181.0,1391789.0


Unnamed: 0_level_0,tothh,totemp
fbp_hra_id,Unnamed: 1_level_1,Unnamed: 2_level_1
HRA,1526366.0,1530293.0
non-HRA,2516946.0,3878167.0


Unnamed: 0_level_0,2050_FBP_tothh,2050_FBP_totemp
fbp_hra_id,Unnamed: 1_level_1,Unnamed: 2_level_1
HRA,1526366.0,1530293.0
non-HRA,2516946.0,3878167.0


Unnamed: 0_level_0,tothh,totemp
fbp_pda_cat_id,Unnamed: 1_level_1,Unnamed: 2_level_1
PDA,1671995.0,2560604.0
non-PDA,2371317.0,2847856.0


Unnamed: 0_level_0,2050_FBP_tothh,2050_FBP_totemp
fbp_pda_cat_id,Unnamed: 1_level_1,Unnamed: 2_level_1
PDA,1671995.0,2560604.0
non-PDA,2371317.0,2847856.0


2050_NP
C:\Users\ywang\Box\Modeling and Surveys\Urban Modeling\Bay Area UrbanSim\PBA50\EIR runs\Baseline Large (s25) runs\NP_v8_FINAL\run314_parcel_data_2050.csv
load 1956212 rows of parcel data


Unnamed: 0,parcel_id,tothh,totemp
0,229116,,
1,244166,,
2,202378,31.0,


Unnamed: 0,parcel_id,tothh,totemp,fbp_tra_cat_id,fbp_hra_id,fbp_pda_cat_id
0,229116,,,non-tra,HRA,non-PDA
1,244166,,,tra3,non-HRA,non-PDA
2,202378,31.0,,non-tra,non-HRA,non-PDA


Unnamed: 0_level_0,tothh,totemp
fbp_tra_cat_id,Unnamed: 1_level_1,Unnamed: 2_level_1
non-tra,1845179.0,1988704.0
tra1,245015.0,698770.0
tra2,829348.0,1327419.0
tra3,1123770.0,1393567.0


Unnamed: 0_level_0,2050_NP_tothh,2050_NP_totemp
fbp_tra_cat_id,Unnamed: 1_level_1,Unnamed: 2_level_1
non-tra,1845179.0,1988704.0
tra1,245015.0,698770.0
tra2,829348.0,1327419.0
tra3,1123770.0,1393567.0


Unnamed: 0_level_0,tothh,totemp
fbp_hra_id,Unnamed: 1_level_1,Unnamed: 2_level_1
HRA,1478364.0,1584626.0
non-HRA,2564948.0,3823834.0


Unnamed: 0_level_0,2050_NP_tothh,2050_NP_totemp
fbp_hra_id,Unnamed: 1_level_1,Unnamed: 2_level_1
HRA,1478364.0,1584626.0
non-HRA,2564948.0,3823834.0


Unnamed: 0_level_0,tothh,totemp
fbp_pda_cat_id,Unnamed: 1_level_1,Unnamed: 2_level_1
PDA,1389301.0,2606046.0
non-PDA,2654011.0,2802414.0


Unnamed: 0_level_0,2050_NP_tothh,2050_NP_totemp
fbp_pda_cat_id,Unnamed: 1_level_1,Unnamed: 2_level_1
PDA,1389301.0,2606046.0
non-PDA,2654011.0,2802414.0


In [11]:
display(tra_summary)
display(hra_summary)
display(pda_summary)

Unnamed: 0,2015_tothh,2015_totemp,2035_FBP_tothh,2035_FBP_totemp,2050_FBP_tothh,2050_FBP_totemp,2050_NP_tothh,2050_NP_totemp
non-tra,1333334.0,1501815.0,1492300.0,1766335.0,1582938.0,2022766.0,1845179.0,1988704.0
tra1,126128.0,524460.0,231319.0,663279.0,309888.0,740673.0,245015.0,698770.0
tra2,444482.0,861022.0,773801.0,1145292.0,1034305.0,1253232.0,829348.0,1327419.0
tra3,772632.0,1118021.0,997792.0,1259607.0,1116181.0,1391789.0,1123770.0,1393567.0


Unnamed: 0,2015_tothh,2015_totemp,2035_FBP_tothh,2035_FBP_totemp,2050_FBP_tothh,2050_FBP_totemp,2050_NP_tothh,2050_NP_totemp
HRA,1143799.0,1331924.0,1389026.0,1426572.0,1526366.0,1530293.0,1478364.0,1584626.0
non-HRA,1532777.0,2673394.0,2106186.0,3407941.0,2516946.0,3878167.0,2564948.0,3823834.0


Unnamed: 0,2015_tothh,2015_totemp,2035_FBP_tothh,2035_FBP_totemp,2050_FBP_tothh,2050_FBP_totemp,2050_NP_tothh,2050_NP_totemp
PDA,686790.0,1882352.0,1275248.0,2309896.0,1671995.0,2560604.0,1389301.0,2606046.0
non-PDA,1989786.0,2122966.0,2219964.0,2524617.0,2371317.0,2847856.0,2654011.0,2802414.0


### calculate hh/acre numbers

In [12]:
# read acreage data
geos_acreage = pd.read_csv(geos_acreage_file)
display(geos_acreage.head())

# create a new column for PDA/non-PDA
geos_acreage['fbp_pda_cat_id'] = 'non-PDA'
geos_acreage.loc[parcel_geos_crosswalk.pda_id_pba50_fb.notnull(), 'fbp_pda_cat_id'] = 'PDA'

# summarize acreage by the same geographies as for tothh and totemp, and add to the above summary

for geo in geo_cols:
    acreage_geo = summarize_by_geography(geos_acreage, ['ACRES'], geo)
    display(acreage_geo)
    
    if geo == 'fbp_tra_cat_id':
        tra_summary = tra_summary.merge(acreage_geo, left_index=True, right_index=True, how='outer')
    elif geo == 'fbp_hra_id':
        hra_summary = hra_summary.merge(acreage_geo, left_index=True, right_index=True, how='outer')
    elif geo == 'fbp_pda_cat_id':
        pda_summary = pda_summary.merge(acreage_geo, left_index=True, right_index=True, how='outer')

Unnamed: 0,juris,fbp_tra_id,fbp_tra_cat_id,fbp_hra_id,pda_id_pba50_fb,pda_name,ACRES
0,alameda,non-tra,non-tra,HRA,non-PDA,non-PDA,546.133813
1,alameda,non-tra,non-tra,non-HRA,alameda1,Naval Air Station,342.596916
2,alameda,non-tra,non-tra,non-HRA,alameda2,Northern Waterfront,41.088476
3,alameda,non-tra,non-tra,non-HRA,non-PDA,non-PDA,2138.351389
4,alameda,tra2a,tra2,non-HRA,alameda1,Naval Air Station,7.914514


Unnamed: 0_level_0,ACRES
fbp_tra_cat_id,Unnamed: 1_level_1
non-tra,4266355.0
tra1,8093.278
tra2,49858.36
tra3,163796.2


Unnamed: 0_level_0,ACRES
fbp_hra_id,Unnamed: 1_level_1
HRA,1334778.0
non-HRA,3153325.0


Unnamed: 0_level_0,ACRES
fbp_pda_cat_id,Unnamed: 1_level_1
PDA,60830.97
non-PDA,4427272.0


In [13]:
# consolidate all summaries
geo_summary = pd.concat([tra_summary, hra_summary, pda_summary])
display(geo_summary)

Unnamed: 0,2015_tothh,2015_totemp,2035_FBP_tothh,2035_FBP_totemp,2050_FBP_tothh,2050_FBP_totemp,2050_NP_tothh,2050_NP_totemp,ACRES
non-tra,1333334.0,1501815.0,1492300.0,1766335.0,1582938.0,2022766.0,1845179.0,1988704.0,4266355.0
tra1,126128.0,524460.0,231319.0,663279.0,309888.0,740673.0,245015.0,698770.0,8093.278
tra2,444482.0,861022.0,773801.0,1145292.0,1034305.0,1253232.0,829348.0,1327419.0,49858.36
tra3,772632.0,1118021.0,997792.0,1259607.0,1116181.0,1391789.0,1123770.0,1393567.0,163796.2
HRA,1143799.0,1331924.0,1389026.0,1426572.0,1526366.0,1530293.0,1478364.0,1584626.0,1334778.0
non-HRA,1532777.0,2673394.0,2106186.0,3407941.0,2516946.0,3878167.0,2564948.0,3823834.0,3153325.0
PDA,686790.0,1882352.0,1275248.0,2309896.0,1671995.0,2560604.0,1389301.0,2606046.0,60830.97
non-PDA,1989786.0,2122966.0,2219964.0,2524617.0,2371317.0,2847856.0,2654011.0,2802414.0,4427272.0


In [14]:
# calculate hh per acre
for col_name in [x for x in list(geo_summary) if 'tothh' in x]:
    print(col_name)
    
    new_col_name = col_name.replace('tothh', 'hh_per_acre')
    print(new_col_name)
    
    geo_summary[new_col_name] = geo_summary[col_name] / geo_summary['ACRES']

2015_tothh
2015_hh_per_acre
2035_FBP_tothh
2035_FBP_hh_per_acre
2050_FBP_tothh
2050_FBP_hh_per_acre
2050_NP_tothh
2050_NP_hh_per_acre


In [17]:
# sort columns by year+scen so copying into CARB table is easier
geo_summary.sort_index(axis=1, inplace=True)

In [18]:
# export to Carb submittal folder
geo_summary.to_csv(r'C:\Users\ywang\Box\Modeling and Surveys\Urban Modeling\Bay Area UrbanSim\PBA50\CARB report\CARB_Submittal_FollowUp_PBA50_growth_pattern_by_geographies.csv')