In [2]:
import matplotlib.pyplot as plt
import os
import yaml
import re
import pathlib
import pandas as pd
from pathlib import Path

import geopandas as gpd
user = os.getlogin()

# Helpers 

In [3]:
bayareafips_full = {'06001': 'Alameda', '06013': 'Contra Costa', '06041': 'Marin', '06055': 'Napa',
                    '06075': 'San Francisco', '06081': 'San Mateo', '06085': 'Santa Clara', '06097': 'Sonoma', '06095': 'Solano'}

superdist_names = {1: 'SF NE',
                   2: 'SF NW',
                   3: 'SF S SE',
                   4: 'SF SW',
                   5: 'Daly City Millbrae',
                   6: 'San Mateo',
                   7: 'Redwood City',
                   8: 'Palo Alto',
                   9: 'Golden Triangle',
                   10: 'West San Jose',
                   11: 'San Jose CBD',
                   12: 'East San Jose',
                   13: 'South San Jose',
                   14: 'SE Snta Clara Cnty',
                   15: 'Tri Valley',
                   16: 'Fremont',
                   17: 'S Leandro Hayward',
                   18: 'Oakland Alameda',
                   19: 'Berkeley Eville',
                   20: 'Richmond Pinole',
                   21: 'Martinez Concord',
                   22: 'Lamorinda WC',
                   23: 'S Ramon Danville',
                   24: 'East Contra Costa',
                   25: 'Vallejo Benicia',
                   26: 'Solano Remainder',
                   27: 'Napa City and S',
                   28: 'Napa Remainder',
                   29: 'Southern Sonoma',
                   30: 'Santa Rosa Area',
                   31: 'Northern Sonoma',
                   32: 'Northern Marin',
                   33: 'Central Marin',
                   34: 'Southern Marin'}

In [29]:


def get_pba_summary(pba_path=pba_path, sumlevel='county', runid='run_182', run_desc='PBA50 Final Blueprint'):
    pattern_year = re.compile(r'.*(\d{4}).*\.csv$', re.IGNORECASE)
    #run_name = runid
    county_summaries_dataframes = {}

    summary_files = os.listdir(pba_path)
    for fn in summary_files:
        if sumlevel in fn:
            if '2015' in fn or '2050' in fn or '2035' in fn:
                if not 'UBI' in fn:
                    print(fn)
                    run_year = int(pattern_year.search(fn).group(1))
                    data = pd.read_csv(os.path.join(pba_path, fn)).rename(
                        columns={'COUNTY_NAME': 'COUNTY'})
                    data['run_name2'] = runid
                    data['run_id'] = runid

                    data.columns = data.columns.str.lower()

                    county_summaries_dataframes[(
                        run_desc, runid, run_year)] = data
    return pd.concat(county_summaries_dataframes, names=['run_desc', 'run_id', 'year', 'oid']).reset_index(3, drop=True)

In [7]:
zones_path = f'/Users/{user}/Box/Modeling and Surveys/Urban Modeling/Spatial/Zones/TAZ1454/zones1454.shp'

zones = gpd.read_file(zones_path).to_crs('EPSG:26910')
zones['geom_pt'] = zones.representative_point()
zones['county_name'] = zones.fipsstco.map(bayareafips_full)
county_x_superdist = zones.groupby(
    ['county_name', 'superdistr']).size().reset_index(1).superdistr
superdist_x_county = zones.groupby(
    ['superdistr', 'county_name']).size().reset_index(1).county_name

superdistricts = zones.dissolve(
    ['superdistr', 'county_name'], as_index=False).iloc[:, :3]
superdistricts['name'] = superdistricts.superdistr.map(superdist_names)

In [8]:
parcel_cols = ['PARCEL_ID', 'juris']
parcels_path = f'/Users/{user}/Box/Modeling and Surveys/Urban Modeling/Bay Area UrbanSim/PBA50/PBA50 Final Blueprint Large General Input Data/2021_01_12_parcels_geography.csv'
parcels = pd.read_csv(parcels_path,
                      usecols=parcel_cols, index_col=0).juris

In [9]:
gdb_path = f'/Users/{user}/Box/DataViz Projects/Data Services/BASIS/Data Processing/Administrative Boundaries/Jurisdiction Boundaries/TIGER 2020/Jurisdiction_Boundary_Database.gdb'

jurisdictions_2020 = gpd.read_file(gdb_path,
                                   layer='Bay_Area_Incorp_Places_Counties_TIGER_2020', driver='FileGDB').rename(columns={'COUNTY': 'county', 'JURISDICTION': 'juris'}).to_crs('EPSG:3857')
jurisdictions_2020.county = jurisdictions_2020.county.str.replace(
    ' County', '').str.strip()
jurisdictions_2020['juris'] = jurisdictions_2020['jurisdicti'] = jurisdictions_2020.juris.str.replace(
    ' town| city', '').str.strip()

# recode Uninc to Uninc + County
jurisdictions_2020.loc[jurisdictions_2020.juris.str.contains(
    'Uninc'), 'GEOID_PLAC'] = jurisdictions_2020.loc[jurisdictions_2020.juris.str.contains('Uninc'), 'COUNTY_GEOID']+'UNINC'
jurisdictions_2020.columns = jurisdictions_2020.columns.str.lower()

jurisdictions_2020['juris_lower'] = jurisdictions_2020.juris
uninc_mask = jurisdictions_2020.juris.str.contains('Uninc')
jurisdictions_2020.loc[uninc_mask, 'juris_lower'] = jurisdictions_2020.loc[uninc_mask,
                                                                           'juris'].str.replace('Unincorporated ', '').add(' County')
jurisdictions_2020.juris_lower = jurisdictions_2020.juris_lower.str.lower(
).str.replace('.', '').str.replace(' ', '_')

  jurisdictions_2020['juris'] = jurisdictions_2020['jurisdicti'] = jurisdictions_2020.juris.str.replace(
  ).str.replace('.', '').str.replace(' ', '_')


# Mappings

In [11]:
# not used - just for reference
run_map_desc = {'Run_00': 'Run 00 Defaults (Reloc, Agglom, Dens)',
                # 'Run_02': 'Run 02 Decreased Agglomeration FPS',
                # 'Run_03': 'Run 03 Increased Agglomeration FPS',
                # 'Run_19': 'Run 19 Decreased++ Agglomeration FPS',
                # 'Run_20': 'Run 20 Increased-- Agglomeration FPS',
                # 'Run_04': 'Run 04 Space Consumption Plus 50%',
                # 'Run_07': 'Run 07 Space Consumption Plus 25%',
                # 'Run_10': 'Run 10 Space Consumption Minus 50%',
                # 'Run_13': 'Run 13 Space Consumption Minus 25%',
                #                       'Run_30': 'Future Trend - Base 2010 - Down 20%',
                #                       'Run_31': 'Future Trend - Base 2023 - Down 20%',
                #                       'Run_32': 'Future Trend - Base 2010 - Down 10%',
                #                       'Run_33': 'Future Trend - Base 2023 - Down 10%',
                #                       'Run_34': 'Future Trend - Base 2010 - Maintained',
                #                       'Run_35': 'Future Trend - Base 2023 - Maintained',
                'Run_40': 'Future Trend - Base 2010 - Down 20%',
                'Run_41': 'Future Trend - Base 2023 - Down 20%',
                'Run_42': 'Future Trend - Base 2015 - Down 10% (SD)',
                'Run_43': 'Future Trend - Base 2010 - Down 10%',
                'Run_44': 'Future Trend - Base 2023 - Down 10%',
                'Run_45': 'Future Trend - Base 2015 - Down 20% (SD)',

                # 'Run_16': 'Run 16 Space Consumption Adj (POW to occ off space) 2019-2022',
                # 'Run_76': 'Trend B: Continued Reduction Over Time',
                # 'Run_77': 'Trend A: Partial Reversal Over Time',
                # 'Run_78': 'Trend C: Constant / No Trend',
                # 'Run_96': 'Run 96 Empirical - Further Reduction Over Time',
                # 'Run_97': 'Run 97 Empirical - Partial Reversal Over Time',
                # 'Run_98': 'Run 98 Empirical - Constant',
                'run_182': 'PBA50 Final Blueprint',
                # 'PBA50Plus_DBP_InitialRun': 'DBP Initial Run'
                }

# Components

We need:
* a list of runs
* a path to each summary level of interest (buildings, counties, superdist etc)
* a descriptive mnemonic

In [32]:
exogenous_path = '/Volumes/Data/Models/urban_modeling/baus/PBA50Plus_Development/Exogenous/outputs'

# run id 182 / PBA50 Final Blueprint
pba_path = f'/Users/aolsen/Box/Modeling and Surveys/Urban Modeling/Bay Area UrbanSim/PBA50/Final Blueprint runs/Final Blueprint (s24)/BAUS v2.25 - FINAL VERSION'

# run id 314 / PBA50 No Project
no_project_path = '/Users/aolsen/Box/Modeling and Surveys/Urban Modeling/Bay Area UrbanSim/PBA50/EIR runs/Baseline Large (s25) runs/NP_v8_FINAL'

# interim_template = 'core_summaries/{run_name}_interim_zone_output_allyears.csv'
# new_building_template = 'core_summaries/{run_name}_new_buildings_summary.csv'
# county_template = 'geographic_summaries/{run_name}_county_summary_growth.csv'
# superdist_template = 'geographic_summaries/{run_name}_superdistrict_summary_growth.csv'

In [33]:
# years denote for each "template" which years exists / to load

interim_years = [2015, 2020, 2025, 2030, 2035, 2040, 2045, 2050, 'allyears']
new_building_years = ['']  # hack to just get a zero length string - we are not loading multiple buildings - year tables
new_building_years_pba50 = [2015, 2035, 2050]
county_years = [2015, 2035, 2050, 'growth']
superdist_years = [2015, 2035, 2050, 'growth']


# current versions
interim_template = {
    'path': 'core_summaries/{run_name}_interim_zone_output_{yr}.csv', 'years': interim_years}
new_building_template = {
    'path': 'core_summaries/{run_name}_new_buildings_summary{yr}.csv', 'years': new_building_years}
county_template = {
    'path': 'geographic_summaries/{run_name}_county_summary_{yr}.csv', 'years': county_years}
superdist_template = {
    'path': 'geographic_summaries/{run_name}_superdistrict_summary_{yr}.csv', 'years': superdist_years}

# pba50 vintage versions

# interim_template = {
#     'path': '{run_name}_interim_zone_output_{yr}.csv', 'years': interim_years}
building_template_pba50 = {
    'path': 'run_name}_building_data_{yr}.csv', 'years': new_building_years_pba50}
county_template_pba50 = {
    'path': '{run_name}_county_summaries_{yr}.csv', 'years': county_years}
superdist_template_pba50 = {
    'path': '{run_name}_superdistrict_summary_{yr}.csv', 'years': superdist_years}

In [34]:
run_identifier_map = {
    'Run_34': {'run_path':  'Run_34_sqft_per_job_adjusters_costar_qcew_timevarying_base_2010_stasis',
               'mnemonic': 'PBA50+: Base2010- 0',
               'root_path': exogenous_path},
    'Run_35': {'run_path':  'Run_35_sqft_per_job_adjusters_costar_qcew_timevarying_base_2023_stasis',
               'mnemonic': 'PBA50+: Base2023- 0',
               'root_path': exogenous_path},
    'Run_40': {'run_path':  'Run_40_sqft_per_job_adjusters_costar_qcew_timevarying_base_2010_0p8_reduction',
               'mnemonic': 'PBA50+: Base2010-20%',
               'root_path': exogenous_path},
    'Run_41': {'run_path':  'Run_41_sqft_per_job_adjusters_costar_qcew_timevarying_base_2023_0p8_reduction',
               'mnemonic': 'PBA50+: Base2023-20%',
               'root_path': exogenous_path},
    'Run_42': {'run_path':  'Run_42_sqft_per_job_adjusters_costar_qcew_timevarying_base_sd_0p8_reduction',
               'mnemonic': 'PBA50+: Base2015-20%-SD',
               'root_path': exogenous_path},
    'Run_43': {'run_path':  'Run_43_sqft_per_job_adjusters_costar_qcew_timevarying_base_2010_0p9_reduction',
               'mnemonic': 'PBA50+: Base2010-10%',
               'root_path': exogenous_path},
    'Run_44': {'run_path':  'Run_44_sqft_per_job_adjusters_costar_qcew_timevarying_base_2023_0p9_reduction',
               'mnemonic': 'PBA50+: Base2023-10%',
               'root_path': exogenous_path},
    'Run_45': {'run_path':  'Run_45_sqft_per_job_adjusters_costar_qcew_timevarying_base_sd_0p9_reduction',
               'mnemonic': 'PBA50+: Base2015-10%-SD',
               'root_path': exogenous_path},
#     'PBA50 Final Blueprint': {'run_path':  'BAUS v2.25 - FINAL VERSION',
#                'mnemonic': 'run182',
#                'root_path': pba_path},
    
    
}

In [35]:
def template_file_loader(run_id_map, summary_template):
    all_files = {}
    all_paths = {}
    for run_id, deets in run_id_map.items():

        for yr in summary_template['years']:

            component_list = [deets['root_path'], deets['run_path'],
                              summary_template['path'].format(run_name=deets['run_path'], yr=yr)]
            full_path = Path(*component_list)
            #all_paths[run_id] = full_path
            all_files[(run_id, yr, deets['mnemonic'])] = pd.read_csv(full_path)
    # return all_paths
    return pd.concat(all_files,
                     names=['run_id', 'year', 'run_desc', 'oid']
                     ).reset_index()

# Load the runs

Call the basic loaders - we do the long format later - and we handle the PBA50 runs separately - file names are slightly different

In [39]:
geo_string = 'county'
pba_county_summary = get_pba_summary(sumlevel=geo_string, pba_path=pba_path,
                                     runid='run182', run_desc='PBA50 Final Blueprint').set_index(geo_string, append=True)
pba_county_summary.head()

run182_county_summaries_2015.csv
run182_county_summaries_2035.csv
run182_county_summaries_2050.csv


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,agrempn,fpsempn,herempn,retempn,mwtempn,othempn,totemp,hhincq1,hhincq2,hhincq3,...,ciacre,resacre,empres,age0004,age0519,age2044,age4564,age65p,run_name2,run_id
run_desc,run_id,year,county,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
PBA50 Final Blueprint,run182,2015,San Francisco,630.0,227573.0,228478.0,46327.0,35881.0,143099.0,681988.0,123664.0,89634.0,63932.0,...,0.0,0.0,509975.0,39082.0,96757.0,374028.0,236202.0,137586.0,run182,run182
PBA50 Final Blueprint,run182,2015,San Mateo,2495.0,118394.0,106557.0,36722.0,64320.0,64594.0,393082.0,47206.0,57571.0,61713.0,...,0.0,0.0,400481.0,42210.0,133108.0,265899.0,215235.0,116976.0,run182,run182
PBA50 Final Blueprint,run182,2015,Santa Clara,5363.0,273619.0,294503.0,88220.0,227070.0,210722.0,1099497.0,163132.0,146412.0,136351.0,...,0.0,0.0,956289.0,117624.0,364153.0,684409.0,506419.0,242729.0,run182,run182
PBA50 Final Blueprint,run182,2015,Alameda,1712.0,190998.0,255222.0,71885.0,139047.0,207929.0,866793.0,167710.0,135152.0,111450.0,...,0.0,0.0,850478.0,99452.0,281466.0,616810.0,402541.0,196735.0,run182,run182
PBA50 Final Blueprint,run182,2015,Contra Costa,1517.0,93759.0,125365.0,48377.0,40712.0,93772.0,403502.0,86139.0,92872.0,89514.0,...,0.0,0.0,593398.0,67450.0,213284.0,379567.0,303629.0,159712.0,run182,run182


In [41]:
pba_np_county_summary = get_pba_summary(sumlevel=geo_string, pba_path=no_project_path,
                                        runid='run314', run_desc='PBA50 No Project').set_index(geo_string, append=True)
pba_np_county_summary.head()

run314_county_summaries_2015.csv
run314_county_summaries_2035.csv
run314_county_summaries_2050.csv


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,agrempn,fpsempn,herempn,retempn,mwtempn,othempn,totemp,hhincq1,hhincq2,hhincq3,...,ciacre,resacre,empres,age0004,age0519,age2044,age4564,age65p,run_name2,run_id
run_desc,run_id,year,county,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
PBA50 No Project,run314,2015,San Francisco,600.0,227560.0,228184.0,46255.0,35636.0,143664.0,681899.0,120812.0,89348.0,64026.0,...,0.0,0.0,508295.0,39011.0,96564.0,373449.0,235723.0,137342.0,run314,run314
PBA50 No Project,run314,2015,San Mateo,2477.0,118153.0,106627.0,36659.0,64614.0,64512.0,393042.0,46868.0,58208.0,62245.0,...,0.0,0.0,400601.0,42126.0,132851.0,265387.0,214887.0,116816.0,run314,run314
PBA50 No Project,run314,2015,Santa Clara,5429.0,273129.0,293997.0,88148.0,226912.0,210887.0,1098502.0,163449.0,146829.0,136754.0,...,0.0,0.0,957610.0,117730.0,364432.0,685070.0,506982.0,243005.0,run314,run314
PBA50 No Project,run314,2015,Alameda,1654.0,191592.0,254835.0,71890.0,139026.0,207645.0,866642.0,167952.0,134602.0,111400.0,...,0.0,0.0,850145.0,99404.0,281351.0,616740.0,402613.0,196802.0,run314,run314
PBA50 No Project,run314,2015,Contra Costa,1494.0,93580.0,125519.0,48369.0,40837.0,92983.0,402782.0,87798.0,92376.0,89184.0,...,0.0,0.0,593610.0,67450.0,213285.0,379580.0,303696.0,159846.0,run314,run314


## interim files

In [42]:
interim_runs = template_file_loader(run_identifier_map, interim_template)

interim_runs['county'] = interim_runs.zone_id.map(
    zones.set_index('taz1454').fipsstco).map(bayareafips_full)
interim_runs['SD'] = interim_runs.zone_id.map(
    zones.set_index('taz1454').superdistr)
interim_runs = interim_runs.set_index(
    ['run_desc', 'run_id', 'year', 'county', 'zone_id'])
interim_runs.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,oid,non_residential_sqft,job_spaces,residential_units,deed_restricted_units,preserved_units,inclusionary_units,subsidized_units,zoned_du,zoned_du_underbuild,...,subsidized_units_2050,zoned_du_2050,zoned_du_underbuild_2050,zoned_du_underbuild_ratio_2050,residential_vacancy_2050,non_residential_vacancy_2050,residential_price_2050,residential_rent_2050,non_residential_rent_2050,SD
run_desc,run_id,year,county,zone_id,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
PBA50+: Base2010- 0,Run_34,2015,San Francisco,1.0,0,8100537.0,19204.0,61.0,8.0,8.0,0.0,0.0,1692.0,1631.0,...,,,,,,,,,,1.0
PBA50+: Base2010- 0,Run_34,2015,San Francisco,2.0,1,12102951.0,28243.0,152.0,7.0,7.0,0.0,0.0,6909.0,6845.0,...,,,,,,,,,,1.0
PBA50+: Base2010- 0,Run_34,2015,San Francisco,3.0,2,2029891.0,3987.0,294.0,22.0,22.0,0.0,0.0,1431.0,1259.0,...,,,,,,,,,,1.0
PBA50+: Base2010- 0,Run_34,2015,San Francisco,4.0,3,9313538.0,21806.0,216.0,14.0,14.0,0.0,0.0,4115.0,3949.0,...,,,,,,,,,,1.0
PBA50+: Base2010- 0,Run_34,2015,San Francisco,5.0,4,9818849.0,21933.0,693.0,51.0,51.0,0.0,0.0,6938.0,6644.0,...,,,,,,,,,,1.0


## county files

In [43]:
county_runs = template_file_loader(run_identifier_map, county_template)

# .set_index('run_id',append=True)
county_runs = county_runs.set_index(['run_desc', 'run_id', 'year', 'county'])
county_runs

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,oid,tothh,hhincq1,hhincq2,hhincq3,hhincq4,residential_units,deed_restricted_units,sfdu,mfdu,...,deed_restricted_units_pct_of_regional_growth,deed_restricted_units_2015_regional_share,deed_restricted_units_2050_regional_share,deed_restricted_units_regional_share_change,non_residential_sqft_growth,non_residential_sqft_pct_change,non_residential_sqft_pct_of_regional_growth,non_residential_sqft_2015_regional_share,non_residential_sqft_2050_regional_share,non_residential_sqft_regional_share_change
run_desc,run_id,year,county,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
PBA50+: Base2010- 0,Run_34,2015,Alameda,0,553024.0,168456.0,136470.0,111724.0,136374.0,593002.0,42358.0,350833.0,212154.0,...,,,,,,,,,,
PBA50+: Base2010- 0,Run_34,2015,Contra Costa,1,383546.0,87718.0,93183.0,89867.0,112778.0,410219.0,18624.0,307898.0,100983.0,...,,,,,,,,,,
PBA50+: Base2010- 0,Run_34,2015,Marin,2,108636.0,20347.0,23578.0,23384.0,41327.0,112666.0,6503.0,77066.0,35600.0,...,,,,,,,,,,
PBA50+: Base2010- 0,Run_34,2015,Napa,3,50653.0,12911.0,14143.0,11785.0,11814.0,55970.0,1989.0,43083.0,12703.0,...,,,,,,,,,,
PBA50+: Base2010- 0,Run_34,2015,San Francisco,4,367114.0,124562.0,90362.0,64831.0,87359.0,390059.0,48143.0,114602.0,275283.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
PBA50+: Base2015-10%-SD,Run_45,growth,San Francisco,4,,,,,,,,,,...,21.68,0.26,0.22,-0.04,12512645.0,3.13,3.81,0.18,0.16,-0.02
PBA50+: Base2015-10%-SD,Run_45,growth,San Mateo,5,,,,,,,,,,...,10.22,0.07,0.10,0.03,36635478.0,18.83,11.16,0.09,0.09,0.00
PBA50+: Base2015-10%-SD,Run_45,growth,Santa Clara,6,,,,,,,,,,...,26.63,0.22,0.26,0.04,175690574.0,30.68,53.52,0.25,0.29,0.04
PBA50+: Base2015-10%-SD,Run_45,growth,Solano,7,,,,,,,,,,...,1.24,0.03,0.02,-0.01,33671986.0,38.47,10.26,0.04,0.05,0.01


In [44]:
county_runs = pd.concat(
    [county_runs, pba_county_summary, pba_np_county_summary], axis=0)

## superdistrict files

In [45]:
sd_runs = template_file_loader(run_identifier_map, superdist_template)
sd_runs['county'] = sd_runs.superdistrict.map(superdist_x_county)
sd_runs= sd_runs.set_index(['run_desc','run_id','year','county','superdistrict'])
sd_runs.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,oid,name,tothh,hhincq1,hhincq2,hhincq3,hhincq4,residential_units,deed_restricted_units,sfdu,...,deed_restricted_units_pct_of_regional_growth,deed_restricted_units_2015_regional_share,deed_restricted_units_2050_regional_share,deed_restricted_units_regional_share_change,non_residential_sqft_growth,non_residential_sqft_pct_change,non_residential_sqft_pct_of_regional_growth,non_residential_sqft_2015_regional_share,non_residential_sqft_2050_regional_share,non_residential_sqft_regional_share_change
run_desc,run_id,year,county,superdistrict,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
PBA50+: Base2010- 0,Run_34,2015,San Francisco,1,0,SF NE,85550.0,44131.0,20294.0,9807.0,11318.0,91655.0,15964.0,1731.0,...,,,,,,,,,,
PBA50+: Base2010- 0,Run_34,2015,San Francisco,2,1,SF NW,107875.0,33209.0,27396.0,20285.0,26985.0,111997.0,13326.0,16949.0,...,,,,,,,,,,
PBA50+: Base2010- 0,Run_34,2015,San Francisco,3,2,SF S SE,122414.0,35646.0,30286.0,24074.0,32408.0,131391.0,15750.0,60391.0,...,,,,,,,,,,
PBA50+: Base2010- 0,Run_34,2015,San Francisco,4,3,SF SW,51309.0,11588.0,12399.0,10671.0,16651.0,55050.0,3124.0,35568.0,...,,,,,,,,,,
PBA50+: Base2010- 0,Run_34,2015,San Mateo,5,4,Daly City Millbrae,97718.0,19083.0,22998.0,24130.0,31507.0,103958.0,6354.0,72473.0,...,,,,,,,,,,


## building files

In [46]:
building_runs = template_file_loader(run_identifier_map, new_building_template)

In [47]:
# add juris, county identifiers
building_runs['juris'] = building_runs.parcel_id.map(parcels)
building_runs['county'] = building_runs.juris.map(
    jurisdictions_2020.set_index('juris_lower').county)

# Transform the dataframes to long format

In [49]:
# set output path on m drive - mount point may be different on other platforms / machines
out_path = '/Volumes/Data/Models/urban_modeling/baus/PBA50Plus_Development/Exogenous/review/combined_data/updated'

## county data

In [50]:
county_runs_long = county_runs[['tothh', 'totemp', 'residential_units', 'non_residential_sqft']].stack(
).reset_index(name='value').rename(columns={'level_4': 'variable'})
county_runs_long

Unnamed: 0,run_desc,run_id,year,county,variable,value
0,PBA50+: Base2010- 0,Run_34,2015,Alameda,tothh,553024.0
1,PBA50+: Base2010- 0,Run_34,2015,Alameda,totemp,867617.0
2,PBA50+: Base2010- 0,Run_34,2015,Alameda,residential_units,593002.0
3,PBA50+: Base2010- 0,Run_34,2015,Alameda,non_residential_sqft,554287699.0
4,PBA50+: Base2010- 0,Run_34,2015,Contra Costa,tothh,383546.0
...,...,...,...,...,...,...
967,PBA50 No Project,run314,2050,Napa,totemp,91548.0
968,PBA50 No Project,run314,2050,Sonoma,tothh,235428.0
969,PBA50 No Project,run314,2050,Sonoma,totemp,284645.0
970,PBA50 No Project,run314,2050,Marin,tothh,129965.0


In [52]:
county_runs_long.to_csv(
    Path(*[out_path, 'county_runs_long.csv'])
     )

In [53]:
# County growth, 2015-2050

county_growth_cols_keep = ['tothh', 'totemp',
                           'non_residential_sqft', 'residential_units']
county_runs_growth = (county_runs.loc(0)[:, :, 2050].reset_index(2, drop=True)[
    county_growth_cols_keep] -
    county_runs.loc(0)[:, :, 2015].reset_index(2, drop=True)[
    county_growth_cols_keep])

In [54]:
county_runs_growth.to_csv(
    Path(*[out_path, 'baus_2023_run_comparison_county_growth.csv'])
)

## interim data



In [55]:
interim_runs_long = interim_runs.filter(
    regex='\d{4}').stack().reset_index(name='value')
interim_runs_long['year'] = interim_runs_long.level_5.str.extract(
    '(\d{4})', expand=False)
interim_runs_long['variable'] = interim_runs_long['level_5'].str.replace(
    '_\d{4}', '')
interim_runs_long_county = interim_runs_long.groupby(
    ['run_desc', 'run_id','county', 'year', 'variable']).value.sum().reset_index()
interim_runs_long_county.sample(7)

  interim_runs_long['variable'] = interim_runs_long['level_5'].str.replace(


Unnamed: 0,run_desc,run_id,county,year,variable,value
5404,PBA50+: Base2023- 0,Run_35,Alameda,2015,non_residential_sqft,554351900.0
1691,PBA50+: Base2010-10%,Run_43,San Mateo,2015,subsidized_units,0.0
8348,PBA50+: Base2023-20%,Run_41,Santa Clara,2035,residential_rent,880.041
725,PBA50+: Base2010- 0,Run_34,Santa Clara,2015,non_residential_vacancy,-321.6411
7866,PBA50+: Base2023-20%,Run_41,Marin,2035,preserved_units,17445.0
8379,PBA50+: Base2023-20%,Run_41,Santa Clara,2045,residential_units,1041673.0
5732,PBA50+: Base2023- 0,Run_35,Marin,2045,job_spaces,151885.0


In [56]:
# interim_runs_long_county.to_csv(
#     '/Users/aolsen/Downloads/interim_data_exog_long_county.csv')
interim_runs_long_county.to_csv(
    Path(*[out_path, 'interim_data_exog_long_county.csv'])
     )

In [57]:
interim_runs_long_county.run_desc.unique()

array(['PBA50+: Base2010- 0', 'PBA50+: Base2010-10%',
       'PBA50+: Base2010-20%', 'PBA50+: Base2015-10%-SD',
       'PBA50+: Base2015-20%-SD', 'PBA50+: Base2023- 0',
       'PBA50+: Base2023-10%', 'PBA50+: Base2023-20%'], dtype=object)

### Interim data - roll zone level vacancy up to county level
We use non_residential_sqft as weights to get from taz level to county level vacancies

In [59]:
def col_year_extract(x): return int(re.search('\d{4}', x).group(0))


geo_groups = ['zone_id', 'county']
run_identifier = ['run_desc', 'run_id']
year_col = ['year']

In [61]:
interim_runs_long_nonres = interim_runs_long.query(
    'variable=="non_residential_sqft"')
interim_runs_long_vacancy = interim_runs_long.query(
    'variable=="non_residential_vacancy"')

interim_nonres_vacancy_wt = pd.concat([
    interim_runs_long_nonres.set_index(
        run_identifier+geo_groups+year_col).value,
    interim_runs_long_vacancy.set_index(run_identifier+geo_groups+year_col).value],
    keys=['weight', 'vacancy'], axis=1)

# weight vacancy to county level using non-res sqft
interim_nonres_vacancy_county_wt = interim_nonres_vacancy_wt.groupby(
    level=run_identifier+year_col+geo_groups[1:]).apply(lambda x: (x.vacancy * x.weight).sum()/x.weight.sum()).clip(0)
interim_nonres_vacancy_county_wt = interim_nonres_vacancy_county_wt.reset_index(
    name='value')

interim_nonres_vacancy_county_wt.to_csv(
    '/Users/aolsen/Downloads/interim_nonres_vacancy_superdist_wt.csv')
interim_nonres_vacancy_county_wt.to_csv(
    Path(*[out_path, 'interim_nonres_vacancy_superdist_wt.csv'])
)

In [62]:
interim_nonres_vacancy_county_wt.run_desc.unique()

array(['PBA50+: Base2010- 0', 'PBA50+: Base2010-10%',
       'PBA50+: Base2010-20%', 'PBA50+: Base2015-10%-SD',
       'PBA50+: Base2015-20%-SD', 'PBA50+: Base2023- 0',
       'PBA50+: Base2023-10%', 'PBA50+: Base2023-20%'], dtype=object)