In [1]:
import pandas as pd
import numpy as np
import os

Compares BAUS logsum calculation (including interim data and parcel-level logsum output) with a separate calcalation that replicates the BAUS methodology.

- BAUS test runs: run 306, run 307, run 308 (based on carb_test_P, using "2035_TM152_SEN_Project_01" logsum input from Travel Model)
    - V1 run 306: using "2018_10_17_parcel_to_taz1454sub.csv", but with the PARCEL_ID round and to integer modification
    - V2 run 307: using "2018_10_17_parcel_to_taz1454sub.csv", keeping the float PARCEL_ID - this is the approach of the previous Sensitivity Test runs
    - V3 run 308: using "2020_08_17_parcel_to_taz1454sub.csv", which has integer PARCEL_ID
- Notebook calculation: using "2035_TM152_SEN_Project_01" logsum input from Travel Model

High-level summary:
- A large number of parcels (441,734 out of 1,956,212 parcels) failed to be assigned a subzone in BAUS despite having a subzone in the parcel_subzone lookup input table; another 1,514 parcels are missing subzone in parcel_subzone lookup; another 4 synthetic parcels are not included in parcel_subzone lookup but are added to BAUS later, therefore have no subzone. As a result, these parcels (443,252, or 23% of all parcels) are assigned a combo_logsum value of 1, therefore no difference between NP and P. 

In [2]:
441734/1956212

0.22581090393065784

## 1. replicate BAUS calculation

In [73]:
# logsum inputs
logsum_input_dir = 'M:\\Application\\Model One\\RTP2021\\SensitivityTests_CARB'
NP_logsum_dir = os.path.join(logsum_input_dir, r'2035_TM152_SEN_NoProject_01b\OUTPUT\logsums\ForUrbanSim')
P_logsum_dir = os.path.join(logsum_input_dir, r'2035_TM152_SEN_Project_01\OUTPUT\logsums\ForUrbanSim')

accessMarket_NP_raw = pd.read_csv(os.path.join(NP_logsum_dir, 'AccessibilityMarkets_2035_s28.csv'))
mandatory_NP_raw = pd.read_csv(os.path.join(NP_logsum_dir, 'mandatoryAccessibilities_2035_s28.csv'))
nonMandatory_NP_raw = pd.read_csv(os.path.join(NP_logsum_dir, 'nonMandatoryAccessibilities_2035_s28.csv'))

accessMarket_P_raw = pd.read_csv(os.path.join(P_logsum_dir, 'AccessibilityMarkets_2035_.csv'))
mandatory_P_raw = pd.read_csv(os.path.join(P_logsum_dir, 'mandatoryAccessibilities_2035_.csv'))
nonMandatory_P_raw = pd.read_csv(os.path.join(P_logsum_dir, 'nonMandatoryAccessibilities_2035_.csv'))

display(accessMarket_NP_raw.head())
display(mandatory_NP_raw.head())

Unnamed: 0,taz,walk_subzone,walk_subzone_label,incQ,incQ_label,autoSuff,autoSuff_label,hasAV,num_persons,num_workers,num_workers_students
0,1,1,Short-walk to transit,1,lowInc,0,0_autos,0,24,2,6
1,1,1,Short-walk to transit,1,lowInc,2,autos_ge_workers,0,8,0,0
2,1,1,Short-walk to transit,2,medInc,0,0_autos,0,8,6,6
3,1,1,Short-walk to transit,2,medInc,2,autos_ge_workers,1,4,0,0
4,1,1,Short-walk to transit,3,highInc,0,0_autos,0,4,2,2


Unnamed: 0,lowInc_0_autos_noAV,lowInc_autos_lt_workers_noAV,lowInc_autos_lt_workers_AV,lowInc_autos_ge_workers_noAV,lowInc_autos_ge_workers_AV,medInc_0_autos_noAV,medInc_autos_lt_workers_noAV,medInc_autos_lt_workers_AV,medInc_autos_ge_workers_noAV,medInc_autos_ge_workers_AV,...,highInc_autos_ge_workers_noAV,highInc_autos_ge_workers_AV,veryHighInc_0_autos_noAV,veryHighInc_autos_lt_workers_noAV,veryHighInc_autos_lt_workers_AV,veryHighInc_autos_ge_workers_noAV,veryHighInc_autos_ge_workers_AV,destChoiceAlt,taz,subzone
0,10.874994,9.982268,9.967941,9.400159,9.366022,11.528498,10.580236,10.573812,9.996845,9.979691,...,10.322719,10.300961,12.051315,11.066195,11.059474,10.547177,10.532476,0,1,0
1,11.679318,10.184146,10.1771,9.454869,9.426397,12.399662,10.837828,10.834093,10.07474,10.074739,...,10.402349,10.402373,12.90962,11.336144,11.332035,10.623651,10.623706,1,1,1
2,11.500637,10.102709,10.093733,9.42763,9.427582,12.212716,10.736712,10.732121,10.036407,10.036328,...,10.363376,10.343111,12.72068,11.230438,11.225023,10.586024,10.572062,2,1,2
3,10.859219,9.966069,9.951698,9.381974,9.347182,11.506579,10.560399,10.553224,9.975087,9.957614,...,10.310187,10.288282,12.033848,11.049353,11.042236,10.529038,10.514119,3,2,0
4,11.656095,10.160516,10.153257,9.433957,9.404593,12.362082,10.807288,10.803363,10.0484,10.048391,...,10.384099,10.384094,12.86622,11.3054,11.301104,10.59974,10.599775,4,2,1


In [75]:
mandatory_NP_raw.min()

lowInc_0_autos_noAV                  0.839873
lowInc_autos_lt_workers_noAV         0.744721
lowInc_autos_lt_workers_AV           0.514718
lowInc_autos_ge_workers_noAV         0.625858
lowInc_autos_ge_workers_AV           0.369072
medInc_0_autos_noAV                 -0.233800
medInc_autos_lt_workers_noAV        -0.336749
medInc_autos_lt_workers_AV          -0.536905
medInc_autos_ge_workers_noAV        -0.424708
medInc_autos_ge_workers_AV          -0.629956
highInc_0_autos_noAV                 1.880329
highInc_autos_lt_workers_noAV        1.588722
highInc_autos_lt_workers_AV          1.464506
highInc_autos_ge_workers_noAV        1.497787
highInc_autos_ge_workers_AV          1.369884
veryHighInc_0_autos_noAV             4.467948
veryHighInc_autos_lt_workers_noAV    3.796549
veryHighInc_autos_lt_workers_AV      3.735116
veryHighInc_autos_ge_workers_noAV    3.697180
veryHighInc_autos_ge_workers_AV      3.639635
destChoiceAlt                        0.000000
taz                               

In [76]:
mandatory_P_raw.min()

lowInc_0_autos_noAV                  0.888640
lowInc_autos_lt_workers_noAV         0.802453
lowInc_autos_lt_workers_AV           0.586713
lowInc_autos_ge_workers_noAV         0.681189
lowInc_autos_ge_workers_AV           0.438689
medInc_0_autos_noAV                 -0.252585
medInc_autos_lt_workers_noAV        -0.354968
medInc_autos_lt_workers_AV          -0.555539
medInc_autos_ge_workers_noAV        -0.442886
medInc_autos_ge_workers_AV          -0.648560
highInc_0_autos_noAV                 1.921737
highInc_autos_lt_workers_noAV        1.610245
highInc_autos_lt_workers_AV          1.488418
highInc_autos_ge_workers_noAV        1.518270
highInc_autos_ge_workers_AV          1.392674
veryHighInc_0_autos_noAV             4.432247
veryHighInc_autos_lt_workers_noAV    3.775303
veryHighInc_autos_lt_workers_AV      3.713051
veryHighInc_autos_ge_workers_noAV    3.677540
veryHighInc_autos_ge_workers_AV      3.618856
destChoiceAlt                        0.000000
taz                               

In [25]:
# parcel_subzone lookup
parcel_subzone_lookup_raw = pd.read_csv(r'X:\bayarea_urbansim\data\2018_10_17_parcel_to_taz1454sub.csv',
                                    usecols=['taz_sub', 'ZONE_ID', 'PARCEL_ID', 'county'])
print(parcel_subzone_lookup_raw.dtypes)

parcel_subzone_lookup = parcel_subzone_lookup_raw.copy()
parcel_subzone_lookup['PARCEL_ID'] = parcel_subzone_lookup['PARCEL_ID'].apply(lambda x: int(round(x)))

# some parcels have no taz_sub
parcel_lookup_nosubzone = parcel_subzone_lookup.loc[parcel_subzone_lookup.taz_sub.isnull()]
print('parcel_subzone_lookup has {} parcels, of which {} are missing subzone'.format(
    parcel_subzone_lookup.shape[0],
    parcel_lookup_nosubzone.shape[0]))

PARCEL_ID    float64
ZONE_ID      float64
county        object
taz_sub       object
dtype: object
parcel_subzone_lookup has 1956208 parcels, of which 1514 are missing subzone


## calculate cml, cnml, and comb_logsum 

In [26]:
# modify logsum files

def create_subzone(logsum_input_df_raw):
    logsum_input_df = logsum_input_df_raw.copy()
    logsum_input_df.loc[logsum_input_df.subzone == 0, 'subzone'] = 'c'  # no walk
    logsum_input_df.loc[logsum_input_df.subzone == 1, 'subzone'] = 'a'  # short walk
    logsum_input_df.loc[logsum_input_df.subzone == 2, 'subzone'] = 'b'  # long walk
    logsum_input_df['taz_sub'] = logsum_input_df.taz.astype('str') + logsum_input_df.subzone
    return logsum_input_df

def modify_accessMarket(accessMarket_df_raw):
    accessMarket_df = accessMarket_df_raw.copy()
    accessMarket_df['AV'] = accessMarket_df['hasAV'].apply(lambda x: 'AV' if x == 1 else 'noAV')
    accessMarket_df['label'] = (accessMarket_df['incQ_label'] + '_' + accessMarket_df['autoSuff_label'] + '_' + accessMarket_df['AV'])
    accessMarket_df = accessMarket_df.groupby('label').sum()
    accessMarket_df['prop'] = accessMarket_df['num_persons'] / accessMarket_df['num_persons'].sum()
    accessMarket_df = accessMarket_df[['prop']].transpose().reset_index(drop=True)
    return accessMarket_df

accessMarket_NP = modify_accessMarket(accessMarket_NP_raw)
mandatory_NP = create_subzone(mandatory_NP_raw)
nonMandatory_NP = create_subzone(nonMandatory_NP_raw)

accessMarket_P = modify_accessMarket(accessMarket_P_raw)
mandatory_P = create_subzone(mandatory_P_raw)
nonMandatory_P = create_subzone(nonMandatory_P_raw)

In [43]:
def cml(parcels, mandatory_accessibility_raw,
        accessibilities_segmentation_raw):
    mandatory_accessibility = mandatory_accessibility_raw.copy()
    accessibilities_segmentation = accessibilities_segmentation_raw.copy()
    cols_to_sum = []
    for col in mandatory_accessibility.columns[~mandatory_accessibility.columns.isin(['destChoiceAlt',
                                                        'taz', 'subzone',
                                                        'weighted_sum'])]:
        if col in accessibilities_segmentation.columns:
            mandatory_accessibility[col] = ((mandatory_accessibility[col] - mandatory_accessibility[col].min()) /
                             0.0134) * accessibilities_segmentation.loc[0, col]
            cols_to_sum.append(col)
    mandatory_accessibility['weighted_sum'] = mandatory_accessibility[cols_to_sum].sum(axis=1)
#     display(mandatory_accessibility.head())
    parcels_cml = parcels[['PARCEL_ID', 'taz_sub']].merge(
                  mandatory_accessibility[['taz_sub', 'weighted_sum']], on='taz_sub', how='left')
    parcels_cml.rename(columns={'weighted_sum': 'cml'}, inplace=True)
#     print(parcels_cml['logsum_cml'].value_counts(dropna=False))
    # check if any na
    print('{} parcels have no logsum_cml'.format(parcels_cml.loc[parcels_cml.cml.isnull()].shape[0]))
    parcels_cml['cml'].fillna(-1, inplace=True)
    return parcels_cml

In [44]:
def cnml(parcels, nonMandatory_accessibility_raw,
        accessibilities_segmentation_raw):
    nonMandatory_accessibility = nonMandatory_accessibility_raw.copy()
    accessibilities_segmentation = accessibilities_segmentation_raw.copy()
    cols_to_sum = []
    for col in nonMandatory_accessibility.columns[~nonMandatory_accessibility.columns.isin(['destChoiceAlt',
                                                        'taz', 'subzone',
                                                        'weighted_sum'])]:
        if col in accessibilities_segmentation.columns:
            nonMandatory_accessibility[col] = ((nonMandatory_accessibility[col] - nonMandatory_accessibility[col].min()) /
                             0.0175) * accessibilities_segmentation.loc[0, col]
            cols_to_sum.append(col)
    nonMandatory_accessibility['weighted_sum'] = nonMandatory_accessibility[cols_to_sum].sum(axis=1)
#     display(nonMandatory_accessibility.head())
    parcels_cnml = parcels[['PARCEL_ID', 'taz_sub']].merge(
                  nonMandatory_accessibility[['taz_sub', 'weighted_sum']], on='taz_sub', how='left')
    parcels_cnml.rename(columns={'weighted_sum': 'cnml'}, inplace=True)
#     print(parcels_cnml['logsum_cnml'].value_counts(dropna=False))
    # check if any na
    print('{} parcels have no logsum_cnml'.format(parcels_cnml.loc[parcels_cnml.cnml.isnull()].shape[0]))
    parcels_cnml['cnml'].fillna(-1, inplace=True)
    return parcels_cnml

### calculation using parcel_subzone lookup with interger parcel_id

In [8]:
## NP
parcel_cml_NP = cml(parcel_subzone_lookup, mandatory_NP, accessMarket_NP)
parcel_cnml_NP = cnml(parcel_subzone_lookup, nonMandatory_NP, accessMarket_NP)

# calculate combo logsum

parcel_logsums_NP = parcel_cml_NP.merge(parcel_cnml_NP, on=['PARCEL_ID', 'taz_sub'], how='outer')
parcel_logsums_NP['combo_logsum'] = parcel_logsums_NP['cml'] + parcel_logsums_NP['cnml']
parcel_logsums_NP['combo_logsum'] = parcel_logsums_NP['combo_logsum'] - 170
# check positive/negative values
print('NP: before adjustment, {} parcels have positive combo_logsum value, {} parcels have 0 or negative values'.format(
    parcel_logsums_NP.loc[parcel_logsums_NP.combo_logsum > 0].shape[0],
    parcel_logsums_NP.loc[parcel_logsums_NP.combo_logsum <= 0].shape[0]))
parcel_logsums_NP['combo_logsum_final'] = parcel_logsums_NP['combo_logsum']
parcel_logsums_NP.loc[parcel_logsums_NP.combo_logsum_final <= 0, 'combo_logsum_final'] = 1


## P
parcel_cml_P = cml(parcel_subzone_lookup, mandatory_P, accessMarket_P)
parcel_cnml_P = cnml(parcel_subzone_lookup, nonMandatory_P, accessMarket_P)

# calculate combo logsum

parcel_logsums_P = parcel_cml_P.merge(parcel_cnml_P, on=['PARCEL_ID', 'taz_sub'], how='outer')
parcel_logsums_P['combo_logsum'] = parcel_logsums_P['cml'] + parcel_logsums_P['cnml']
parcel_logsums_P['combo_logsum'] = parcel_logsums_P['combo_logsum'] - 170
# check positive/negative values
print('P: before adjustment, {} parcels have positive combo_logsum value, {} parcels have 0 or negative values'.format(
    parcel_logsums_P.loc[parcel_logsums_P.combo_logsum > 0].shape[0],
    parcel_logsums_P.loc[parcel_logsums_P.combo_logsum <= 0].shape[0]))

parcel_logsums_P['combo_logsum_final'] = parcel_logsums_P['combo_logsum']
parcel_logsums_P.loc[parcel_logsums_P.combo_logsum_final <= 0, 'combo_logsum_final'] = 1

Unnamed: 0,lowInc_0_autos_noAV,lowInc_autos_lt_workers_noAV,lowInc_autos_lt_workers_AV,lowInc_autos_ge_workers_noAV,lowInc_autos_ge_workers_AV,medInc_0_autos_noAV,medInc_autos_lt_workers_noAV,medInc_autos_lt_workers_AV,medInc_autos_ge_workers_noAV,medInc_autos_ge_workers_AV,...,veryHighInc_0_autos_noAV,veryHighInc_autos_lt_workers_noAV,veryHighInc_autos_lt_workers_AV,veryHighInc_autos_ge_workers_noAV,veryHighInc_autos_ge_workers_AV,destChoiceAlt,taz,subzone,taz_sub,weighted_sum
0,27.296507,7.785459,0.709883,88.771926,6.295976,10.502188,23.51327,3.907192,126.303355,14.206426,...,4.1608,25.034237,19.896163,93.080739,50.715631,0,1,c,1c,642.52487
1,29.484346,7.955603,0.725589,89.325442,6.338226,11.280023,24.068078,3.998722,127.247398,14.333697,...,4.63173,25.963851,20.636558,94.119901,51.386876,1,1,a,1a,654.131047
2,28.998317,7.886967,0.719329,89.049858,6.339055,11.113105,23.850292,3.962863,126.782824,14.282264,...,4.528063,25.599835,20.345867,93.608609,51.006894,2,1,b,1b,649.773575
3,27.253597,7.771806,0.708663,88.587944,6.282792,10.482617,23.470545,3.899952,126.03966,14.176865,...,4.151216,24.976238,19.849337,92.834258,50.580565,3,2,c,2c,641.220663
4,29.421178,7.935687,0.723799,89.11387,6.322968,11.246469,24.0023,3.987916,126.928172,14.298416,...,4.607917,25.857979,20.552536,93.794988,51.210799,4,2,a,2a,652.327666


1514 parcels have no logsum_cml


Unnamed: 0,lowInc_0_autos_noAV,lowInc_autos_lt_workers_noAV,lowInc_autos_lt_workers_AV,lowInc_autos_ge_workers_noAV,lowInc_autos_ge_workers_AV,medInc_0_autos_noAV,medInc_autos_lt_workers_noAV,medInc_autos_lt_workers_AV,medInc_autos_ge_workers_noAV,medInc_autos_ge_workers_AV,...,veryHighInc_0_autos_noAV,veryHighInc_autos_lt_workers_noAV,veryHighInc_autos_lt_workers_AV,veryHighInc_autos_ge_workers_noAV,veryHighInc_autos_ge_workers_AV,destChoiceAlt,taz,subzone,taz_sub,weighted_sum
0,21.096521,5.957522,0.544231,70.13449,4.957873,6.678469,14.99255,2.471701,82.299521,9.179001,...,3.937101,23.836684,18.726723,91.32201,49.608687,0,1,c,1c,512.400012
1,21.084255,5.95734,0.544232,70.134304,4.957887,6.671002,14.997822,2.471454,82.30469,9.178567,...,3.925214,23.834817,18.737352,91.332498,49.607792,1,1,a,1a,512.397747
2,21.09475,5.957949,0.544254,70.134955,4.957743,6.685784,14.992746,2.471706,82.292932,9.178541,...,3.928306,23.844402,18.740929,91.332175,49.603431,2,1,b,1b,512.425753
3,21.243645,5.997232,0.547928,70.57347,4.990435,6.714259,15.088242,2.486692,82.777832,9.234556,...,3.943839,23.96525,18.829444,91.840401,49.885814,3,2,c,2c,515.383802
4,21.239039,5.99729,0.547929,70.575446,4.990454,6.712876,15.085875,2.486361,82.782472,9.234959,...,3.945202,23.953519,18.842388,91.831047,49.88298,4,2,a,2a,515.369794


1514 parcels have no logsum_cnml
NP: before adjustment, 1945257 parcels have positive combo_logsum value, 10951 parcels have 0 or negative values


Unnamed: 0,lowInc_0_autos_noAV,lowInc_autos_lt_workers_noAV,lowInc_autos_lt_workers_AV,lowInc_autos_ge_workers_noAV,lowInc_autos_ge_workers_AV,medInc_0_autos_noAV,medInc_autos_lt_workers_noAV,medInc_autos_lt_workers_AV,medInc_autos_ge_workers_noAV,medInc_autos_ge_workers_AV,...,veryHighInc_0_autos_noAV,veryHighInc_autos_lt_workers_noAV,veryHighInc_autos_lt_workers_AV,veryHighInc_autos_ge_workers_noAV,veryHighInc_autos_ge_workers_AV,destChoiceAlt,taz,subzone,taz_sub,weighted_sum
0,27.264307,7.775401,0.751339,87.883301,6.243109,10.5569,23.550085,3.922424,126.457872,14.29718,...,4.17104,25.039509,20.190017,93.406536,50.599725,0,1,c,1c,642.015734
1,29.455221,7.945512,0.768007,88.432187,6.285151,11.338143,24.105047,4.014187,127.402348,14.425074,...,4.641035,25.966421,20.938924,94.44576,51.266894,1,1,a,1a,653.624132
2,28.967925,7.87681,0.761356,88.158809,6.286113,11.170456,23.88704,3.97821,126.937449,14.373339,...,4.537555,25.603344,20.6448,93.934343,50.889221,2,1,b,1b,649.266016
3,27.225824,7.762985,0.750162,87.711715,6.230758,10.536504,23.505733,3.914897,126.186803,14.26659,...,4.161309,24.981262,20.142284,93.158824,50.464502,3,2,c,2c,640.723451
4,29.400761,7.927171,0.766257,88.235084,6.270873,11.303047,24.037209,4.003014,127.074601,14.388666,...,4.616984,25.859655,20.852932,94.118657,51.090672,4,2,a,2a,651.832377


1514 parcels have no logsum_cml


Unnamed: 0,lowInc_0_autos_noAV,lowInc_autos_lt_workers_noAV,lowInc_autos_lt_workers_AV,lowInc_autos_ge_workers_noAV,lowInc_autos_ge_workers_AV,medInc_0_autos_noAV,medInc_autos_lt_workers_noAV,medInc_autos_lt_workers_AV,medInc_autos_ge_workers_noAV,medInc_autos_ge_workers_AV,...,veryHighInc_0_autos_noAV,veryHighInc_autos_lt_workers_noAV,veryHighInc_autos_lt_workers_AV,veryHighInc_autos_ge_workers_noAV,veryHighInc_autos_ge_workers_AV,destChoiceAlt,taz,subzone,taz_sub,weighted_sum
0,21.197381,5.994423,0.581094,69.966927,4.961085,6.701078,14.987317,2.476565,82.237891,9.219288,...,3.92948,23.779323,18.951729,91.409659,49.362227,0,1,c,1c,512.411596
1,21.185079,5.994239,0.581095,69.96675,4.961098,6.693596,14.992583,2.476316,82.243047,9.218853,...,3.917619,23.777442,18.962498,91.420168,49.361342,1,1,a,1a,512.40947
2,21.195619,5.994853,0.581118,69.967399,4.960955,6.708435,14.987517,2.476568,82.231307,9.218824,...,3.920702,23.787015,18.966125,91.419855,49.357014,2,1,b,1b,512.43768
3,21.345106,6.03435,0.58504,70.404668,4.99367,6.736861,15.082792,2.491561,82.715007,9.275025,...,3.936131,23.907237,19.055443,91.927196,49.637329,3,2,c,2c,515.390009
4,21.340466,6.034407,0.58504,70.406624,4.993689,6.735478,15.080426,2.491229,82.719634,9.275429,...,3.937495,23.895549,19.068538,91.917822,49.634504,4,2,a,2a,515.376167


1514 parcels have no logsum_cnml
P: before adjustment, 1945257 parcels have positive combo_logsum value, 10951 parcels have 0 or negative values


In [52]:
# rename columns for later merge
parcel_logsums_P_copy = parcel_logsums_P.copy().rename(
    columns={'PARCEL_ID': 'PARCEL_ID_from_lookup',
             'taz_sub':   'taz_sub_from_lookup',
             'cml':        'cml_Notebook',
             'cnml':       'cnml_Notebook',
             'combo_logsum': 'combo_logsum_Notebook',
             'combo_logsum_final': 'combo_logsum_final_Notebook'})
parcel_logsums_P_copy.head()

Unnamed: 0,PARCEL_ID_from_lookup,taz_sub_from_lookup,cml_Notebook,cnml_Notebook,combo_logsum_Notebook,combo_logsum_final_Notebook
0,229116,725b,424.787944,365.643917,620.431861,620.431861
1,244166,715a,425.969991,348.53925,604.509242,604.509242
2,202378,820a,492.116127,359.639886,681.756013,681.756013
3,2004420,1401c,246.652741,183.330399,259.98314,259.98314
4,340332,763a,506.431033,400.876065,737.307097,737.307097


## 2. BAUS interim data and parcel_logsums output (P)

### 2.1 V1 run 306

In [30]:
# output 
baus_output_2010V1 = pd.read_csv(r'X:\bayarea_urbansim\runs\run306_parcel_logsums_2010.csv')
baus_output_2010V1.columns = [x+'_baus' for x in list(baus_output_2010V1)]

print('BAUS parcel_logsums output: \n out of {} parcels, {} have combo_logsum = 1'.format(
    baus_output_2010V1.shape[0], baus_output_2010V1.loc[baus_output_2010V1.combo_logsum_baus == 1].shape[0]))

BAUS parcel_logsums output: 
 out of 1956212 parcels, 10955 have combo_logsum = 1


In [47]:
# interim data
parcel_chk_cml_V1 = pd.read_csv(r'X:\bayarea_urbansim\runs\run306_parcel_chk_cml_2010.csv')
missing_subzone_cml_V1 = parcel_chk_cml_V1.loc[parcel_chk_cml_V1.subzone.isnull()]
print('in cnml step: \n master parcels table has {} parcels, of which {} are missing subzone'.format(
    parcel_chk_cml_V1.shape[0], missing_subzone_cml_V1.shape[0]))

df_cml_V1 = pd.read_csv(r'X:\bayarea_urbansim\runs\run306_df_cml_2010.csv')
df_cml_V1.rename(columns = {'right': 'interim_cml'}, inplace=True)
missing_cml_V1 = df_cml_V1.loc[df_cml_V1.interim_cml.isnull()]
print('cml_logsum table based on parcel_subzone_lookup has {} parcels, of which {} are missing cml'.format(
    df_cml_V1.shape[0], missing_cml_V1.shape[0]))


parcel_chk_cnml_V1 = pd.read_csv(r'X:\bayarea_urbansim\runs\run306_parcel_chk_cnml_2010.csv')
missing_subzone_cnml_V1 = parcel_chk_cnml_V1.loc[parcel_chk_cnml_V1.subzone.isnull()]
print('in cnml step: \n master parcels table has {} parcels, of which {} are missing subzone'.format(
    parcel_chk_cnml_V1.shape[0], missing_subzone_cnml_V1.shape[0]))

df_cnml_V1 = pd.read_csv(r'X:\bayarea_urbansim\runs\run306_df_cnml_2010.csv')
df_cnml_V1.rename(columns = {'right': 'interim_cnml'}, inplace=True)
missing_cnml_V1 = df_cnml_V1.loc[df_cnml_V1.interim_cnml.isnull()]
print('cnml_logsum table based on parcel_subzone_lookup has {} parcels, of which {} are missing ncml'.format(
    df_cnml_V1.shape[0],
    missing_cnml_V1.shape[0]))


interim_combo_logsum_beforeAdjust_V1 = pd.read_csv(r'X:\bayarea_urbansim\runs\run306_df_combo_logsum_beforeAdjust_2010.csv')
interim_combo_logsum_beforeAdjust_V1.rename(columns = {'0': 'combo_interim_beforeAdjust'}, inplace=True)

interim_combo_logsum_afterAdjust_V1 = pd.read_csv(r'X:\bayarea_urbansim\runs\run306_df_combo_logsum_afterAdjust_2010.csv')
interim_combo_logsum_afterAdjust_V1.rename(columns = {'0': 'combo_interim_afterAdjust'}, inplace=True)
# display(df_combo_logsum_afterAdjust.loc[df_combo_logsum_afterAdjust.combo_interim_afterAdjust <= 0])

interim_combo_logsum_V1 = interim_combo_logsum_beforeAdjust_V1[['parcel_id', 'combo_interim_beforeAdjust']].merge(
                          interim_combo_logsum_afterAdjust_V1[['parcel_id', 'combo_interim_afterAdjust']],
                          on='parcel_id', how='outer')

negative_beforeAdjust_V1 = interim_combo_logsum_V1.loc[interim_combo_logsum_V1.combo_interim_beforeAdjust <= 0]

print('in combo_logsum step: \n {} parcels have 0 or negative combo_logsum before adjusting to 1'.format(
    negative_beforeAdjust_V1.shape[0]))


# consolidate cml, cnml subzone, and merge into combo_logsum

interim_subzone_V1 = parcel_chk_cml_V1.merge(parcel_chk_cnml_V1, on=['parcel_id', 'subzone'], how='outer')
interim_subzone_V1.rename(columns = {'subzone': 'subzone_interim'}, inplace=True)

interim_data_V1 = interim_subzone_V1.merge(interim_combo_logsum_V1, on='parcel_id', how='outer')
interim_data_V1

in cnml step: 
 master parcels table has 1956212 parcels, of which 1518 are missing subzone
cml_logsum table based on parcel_subzone_lookup has 1956208 parcels, of which 1514 are missing cml
in cnml step: 
 master parcels table has 1956212 parcels, of which 1518 are missing subzone
cnml_logsum table based on parcel_subzone_lookup has 1956208 parcels, of which 1514 are missing ncml
in combo_logsum step: 
 10955 parcels have 0 or negative combo_logsum before adjusting to 1


Unnamed: 0,parcel_id,subzone_interim,combo_interim_beforeAdjust,combo_interim_afterAdjust
0,229116,725b,620.431861,620.431861
1,244166,715a,604.509242,604.509242
2,202378,820a,681.756013,681.756013
3,2004420,1401c,259.983140,259.983140
4,340332,763a,737.307097,737.307097
...,...,...,...,...
1956207,782909,1448a,674.994403,674.994403
1956208,2054504,,-172.000000,1.000000
1956209,2054505,,-172.000000,1.000000
1956210,2054506,,-172.000000,1.000000


In [38]:
# join BAUS interim subzone data with output
BAUS_logsum_V1 = interim_data_V1.merge(baus_output_2010V1, left_on='parcel_id', right_on='parcel_id_baus', how='outer')
print(BAUS_logsum_V1.shape[0])
display(BAUS_logsum_V1.head())

1956212


Unnamed: 0,parcel_id,subzone_interim,combo_interim_beforeAdjust,combo_interim_afterAdjust,parcel_id_baus,cnml_baus,cml_baus,combo_logsum_baus
0,229116,725b,620.431861,620.431861,229116,365.643917,424.787944,620.431861
1,244166,715a,604.509242,604.509242,244166,348.53925,425.969991,604.509242
2,202378,820a,681.756013,681.756013,202378,359.639886,492.116127,681.756013
3,2004420,1401c,259.98314,259.98314,2004420,183.330399,246.652741,259.98314
4,340332,763a,737.307097,737.307097,340332,400.876065,506.431033,737.307097


### 2.2 V2 run 307

In [40]:
# output 
baus_output_2010V2 = pd.read_csv(r'X:\bayarea_urbansim\runs\run307_parcel_logsums_2010.csv')
baus_output_2010V2.columns = [x+'_baus' for x in list(baus_output_2010V2)]

print('BAUS parcel_logsums output: \n out of {} parcels, {} have combo_logsum = 1'.format(
    baus_output_2010V2.shape[0], baus_output_2010V2.loc[baus_output_2010V2.combo_logsum_baus == 1].shape[0]))

BAUS parcel_logsums output: 
 out of 1956212 parcels, 449162 have combo_logsum = 1


In [45]:
# interim data
parcel_chk_cml_V2 = pd.read_csv(r'X:\bayarea_urbansim\runs\run307_parcel_chk_cml_2010.csv')
missing_subzone_cml_V2 = parcel_chk_cml_V2.loc[parcel_chk_cml_V2.subzone.isnull()]
print('in cnml step: \n master parcels table has {} parcels, of which {} are missing subzone'.format(
    parcel_chk_cml_V2.shape[0], missing_subzone_cml_V2.shape[0]))

df_cml_V2 = pd.read_csv(r'X:\bayarea_urbansim\runs\run307_df_cml_2010.csv')
df_cml_V2.rename(columns = {'right': 'interim_cml'}, inplace=True)
missing_cml_V2 = df_cml_V2.loc[df_cml_V2.interim_cml.isnull()]
print('cml_logsum table based on parcel_subzone_lookup has {} parcels, of which {} are missing cml'.format(
    df_cml_V2.shape[0], missing_cml_V2.shape[0]))


parcel_chk_cnml_V2 = pd.read_csv(r'X:\bayarea_urbansim\runs\run307_parcel_chk_cnml_2010.csv')
missing_subzone_cnml_V2 = parcel_chk_cnml_V2.loc[parcel_chk_cnml_V2.subzone.isnull()]
print('in cnml step: \n master parcels table has {} parcels, of which {} are missing subzone'.format(
    parcel_chk_cnml_V2.shape[0], missing_subzone_cnml_V2.shape[0]))

df_cnml_V2 = pd.read_csv(r'X:\bayarea_urbansim\runs\run307_df_cnml_2010.csv')
df_cnml_V2.rename(columns = {'right': 'interim_cnml'}, inplace=True)
missing_cnml_V2 = df_cnml_V2.loc[df_cnml_V2.interim_cnml.isnull()]
print('cnml_logsum table based on parcel_subzone_lookup has {} parcels, of which {} are missing ncml'.format(
    df_cnml_V2.shape[0],
    missing_cnml_V2.shape[0]))


interim_combo_logsum_beforeAdjust_V2 = pd.read_csv(r'X:\bayarea_urbansim\runs\run307_df_combo_logsum_beforeAdjust_2010.csv')
interim_combo_logsum_beforeAdjust_V2.rename(columns = {'0': 'combo_interim_beforeAdjust'}, inplace=True)

interim_combo_logsum_afterAdjust_V2 = pd.read_csv(r'X:\bayarea_urbansim\runs\run307_df_combo_logsum_afterAdjust_2010.csv')
interim_combo_logsum_afterAdjust_V2.rename(columns = {'0': 'combo_interim_afterAdjust'}, inplace=True)
# display(df_combo_logsum_afterAdjust.loc[df_combo_logsum_afterAdjust.combo_interim_afterAdjust <= 0])

interim_combo_logsum_V2 = interim_combo_logsum_beforeAdjust_V2[['parcel_id', 'combo_interim_beforeAdjust']].merge(
                          interim_combo_logsum_afterAdjust_V2[['parcel_id', 'combo_interim_afterAdjust']],
                          on='parcel_id', how='outer')

negative_beforeAdjust_V2 = interim_combo_logsum_V2.loc[interim_combo_logsum_V2.combo_interim_beforeAdjust <= 0]

print('in combo_logsum step: \n {} parcels have 0 or negative combo_logsum before adjusting to 1'.format(
    negative_beforeAdjust_V2.shape[0]))

in cnml step: 
 master parcels table has 1956212 parcels, of which 443252 are missing subzone
cml_logsum table based on parcel_subzone_lookup has 1956208 parcels, of which 1514 are missing cml
in cnml step: 
 master parcels table has 1956212 parcels, of which 443252 are missing subzone
cnml_logsum table based on parcel_subzone_lookup has 1956208 parcels, of which 1514 are missing ncml
in combo_logsum step: 
 449162 parcels have 0 or negative combo_logsum before adjusting to 1


In [46]:
# consolidate cml, cnml subzone, and merge into combo_logsum

interim_subzone_V2 = parcel_chk_cml_V2.merge(parcel_chk_cnml_V2, on=['parcel_id', 'subzone'], how='outer')
interim_subzone_V2.rename(columns = {'subzone': 'subzone_interim'}, inplace=True)

interim_data_V2 = interim_subzone_V2.merge(interim_combo_logsum_V2, on='parcel_id', how='outer')
interim_data_V2


# join BAUS interim subzone data with output
BAUS_logsum_V2 = interim_data_V2.merge(baus_output_2010V2, left_on='parcel_id', right_on='parcel_id_baus', how='outer')
print(BAUS_logsum_V2.shape[0])
display(BAUS_logsum_V2.head())

1956212


Unnamed: 0,parcel_id,subzone_interim,combo_interim_beforeAdjust,combo_interim_afterAdjust,parcel_id_baus,cml_baus,cnml_baus,combo_logsum_baus
0,229116,725b,620.431861,620.431861,229116,424.787944,365.643917,620.431861
1,244166,715a,604.509242,604.509242,244166,425.969991,348.53925,604.509242
2,202378,820a,681.756013,681.756013,202378,492.116127,359.639886,681.756013
3,2004420,1401c,259.98314,259.98314,2004420,246.652741,183.330399,259.98314
4,340332,763a,737.307097,737.307097,340332,506.431033,400.876065,737.307097


### 2.3 V3 run 308

In [48]:
# output 
baus_output_2010V3 = pd.read_csv(r'X:\bayarea_urbansim\runs\run308_parcel_logsums_2010.csv')
baus_output_2010V3.columns = [x+'_baus' for x in list(baus_output_2010V3)]

print('BAUS parcel_logsums output: \n out of {} parcels, {} have combo_logsum = 1'.format(
    baus_output_2010V3.shape[0], baus_output_2010V3.loc[baus_output_2010V3.combo_logsum_baus == 1].shape[0]))

BAUS parcel_logsums output: 
 out of 1956212 parcels, 10955 have combo_logsum = 1


In [49]:
# interim data
parcel_chk_cml_V3 = pd.read_csv(r'X:\bayarea_urbansim\runs\run308_parcel_chk_cml_2010.csv')
missing_subzone_cml_V3 = parcel_chk_cml_V3.loc[parcel_chk_cml_V3.subzone.isnull()]
print('in cnml step: \n master parcels table has {} parcels, of which {} are missing subzone'.format(
    parcel_chk_cml_V3.shape[0], missing_subzone_cml_V3.shape[0]))

df_cml_V3 = pd.read_csv(r'X:\bayarea_urbansim\runs\run308_df_cml_2010.csv')
df_cml_V3.rename(columns = {'right': 'interim_cml'}, inplace=True)
missing_cml_V3 = df_cml_V3.loc[df_cml_V3.interim_cml.isnull()]
print('cml_logsum table based on parcel_subzone_lookup has {} parcels, of which {} are missing cml'.format(
    df_cml_V3.shape[0], missing_cml_V3.shape[0]))


parcel_chk_cnml_V3 = pd.read_csv(r'X:\bayarea_urbansim\runs\run308_parcel_chk_cnml_2010.csv')
missing_subzone_cnml_V3 = parcel_chk_cnml_V3.loc[parcel_chk_cnml_V3.subzone.isnull()]
print('in cnml step: \n master parcels table has {} parcels, of which {} are missing subzone'.format(
    parcel_chk_cnml_V3.shape[0], missing_subzone_cnml_V3.shape[0]))

df_cnml_V3 = pd.read_csv(r'X:\bayarea_urbansim\runs\run308_df_cnml_2010.csv')
df_cnml_V3.rename(columns = {'right': 'interim_cnml'}, inplace=True)
missing_cnml_V3 = df_cnml_V3.loc[df_cnml_V3.interim_cnml.isnull()]
print('cnml_logsum table based on parcel_subzone_lookup has {} parcels, of which {} are missing ncml'.format(
    df_cnml_V3.shape[0],
    missing_cnml_V3.shape[0]))


interim_combo_logsum_beforeAdjust_V3 = pd.read_csv(r'X:\bayarea_urbansim\runs\run308_df_combo_logsum_beforeAdjust_2010.csv')
interim_combo_logsum_beforeAdjust_V3.rename(columns = {'0': 'combo_interim_beforeAdjust'}, inplace=True)

interim_combo_logsum_afterAdjust_V3 = pd.read_csv(r'X:\bayarea_urbansim\runs\run308_df_combo_logsum_afterAdjust_2010.csv')
interim_combo_logsum_afterAdjust_V3.rename(columns = {'0': 'combo_interim_afterAdjust'}, inplace=True)
# display(df_combo_logsum_afterAdjust.loc[df_combo_logsum_afterAdjust.combo_interim_afterAdjust <= 0])

interim_combo_logsum_V3 = interim_combo_logsum_beforeAdjust_V3[['parcel_id', 'combo_interim_beforeAdjust']].merge(
                          interim_combo_logsum_afterAdjust_V3[['parcel_id', 'combo_interim_afterAdjust']],
                          on='parcel_id', how='outer')

negative_beforeAdjust_V3 = interim_combo_logsum_V3.loc[interim_combo_logsum_V3.combo_interim_beforeAdjust <= 0]

print('in combo_logsum step: \n {} parcels have 0 or negative combo_logsum before adjusting to 1'.format(
    negative_beforeAdjust_V3.shape[0]))


# consolidate cml, cnml subzone, and merge into combo_logsum

interim_subzone_V3 = parcel_chk_cml_V3.merge(parcel_chk_cnml_V3, on=['parcel_id', 'subzone'], how='outer')
interim_subzone_V3.rename(columns = {'subzone': 'subzone_interim'}, inplace=True)

interim_data_V3 = interim_subzone_V3.merge(interim_combo_logsum_V3, on='parcel_id', how='outer')
interim_data_V3

in cnml step: 
 master parcels table has 1956212 parcels, of which 1518 are missing subzone
cml_logsum table based on parcel_subzone_lookup has 1956208 parcels, of which 1514 are missing cml
in cnml step: 
 master parcels table has 1956212 parcels, of which 1518 are missing subzone
cnml_logsum table based on parcel_subzone_lookup has 1956208 parcels, of which 1514 are missing ncml
in combo_logsum step: 
 10955 parcels have 0 or negative combo_logsum before adjusting to 1


Unnamed: 0,parcel_id,subzone_interim,combo_interim_beforeAdjust,combo_interim_afterAdjust
0,229116,725b,620.431861,620.431861
1,244166,715a,604.509242,604.509242
2,202378,820a,681.756013,681.756013
3,2004420,1401c,259.983140,259.983140
4,340332,763a,737.307097,737.307097
...,...,...,...,...
1956207,782909,1448a,674.994403,674.994403
1956208,2054504,,-172.000000,1.000000
1956209,2054505,,-172.000000,1.000000
1956210,2054506,,-172.000000,1.000000


In [50]:
# join BAUS interim subzone data with output
BAUS_logsum_V3 = interim_data_V3.merge(baus_output_2010V3, left_on='parcel_id', right_on='parcel_id_baus', how='outer')
print(BAUS_logsum_V3.shape[0])
display(BAUS_logsum_V3.head())

1956212


Unnamed: 0,parcel_id,subzone_interim,combo_interim_beforeAdjust,combo_interim_afterAdjust,parcel_id_baus,cnml_baus,cml_baus,combo_logsum_baus
0,229116,725b,620.431861,620.431861,229116,365.643917,424.787944,620.431861
1,244166,715a,604.509242,604.509242,244166,348.53925,425.969991,604.509242
2,202378,820a,681.756013,681.756013,202378,359.639886,492.116127,681.756013
3,2004420,1401c,259.98314,259.98314,2004420,183.330399,246.652741,259.98314
4,340332,763a,737.307097,737.307097,340332,400.876065,506.431033,737.307097


## 3. compare BAUS data with Notebook calculations on the 3 test runs

### 3.1 V1 run 306

In [53]:
# merge Notebook calculation with BAUS data
logsum_compare_V1 = BAUS_logsum_V1.merge(parcel_logsums_P_copy, left_on='parcel_id', right_on='PARCEL_ID_from_lookup', how='outer')
print(logsum_compare_V1.shape[0])
logsum_compare_V1.head()

1956212


Unnamed: 0,parcel_id,subzone_interim,combo_interim_beforeAdjust,combo_interim_afterAdjust,parcel_id_baus,cnml_baus,cml_baus,combo_logsum_baus,PARCEL_ID_from_lookup,taz_sub_from_lookup,cml_Notebook,cnml_Notebook,combo_logsum_Notebook,combo_logsum_final_Notebook
0,229116,725b,620.431861,620.431861,229116,365.643917,424.787944,620.431861,229116.0,725b,424.787944,365.643917,620.431861,620.431861
1,244166,715a,604.509242,604.509242,244166,348.53925,425.969991,604.509242,244166.0,715a,425.969991,348.53925,604.509242,604.509242
2,202378,820a,681.756013,681.756013,202378,359.639886,492.116127,681.756013,202378.0,820a,492.116127,359.639886,681.756013,681.756013
3,2004420,1401c,259.98314,259.98314,2004420,183.330399,246.652741,259.98314,2004420.0,1401c,246.652741,183.330399,259.98314,259.98314
4,340332,763a,737.307097,737.307097,340332,400.876065,506.431033,737.307097,340332.0,763a,506.431033,400.876065,737.307097,737.307097


In [54]:
noSubzone_both_V1 = logsum_compare_V1.loc[(logsum_compare_V1['subzone_interim'].isnull()) & (logsum_compare_V1['taz_sub_from_lookup'].isnull())]
print('{} parcels are missing subzone in both BAUS and Notebook calculation'.format(noSubzone_both_V1.shape[0]))
print('including {} synthetic parcels and {} parcels missing subzone in the lookup input'.format(
    noSubzone_both_V1.loc[noSubzone_both_V1['PARCEL_ID_from_lookup'].isnull()].shape[0],
    parcel_lookup_nosubzone.shape[0]))

1518 parcels are missing subzone in both BAUS and Notebook calculation
including 4 synthetic parcels and 1514 parcels missing subzone in the lookup input


In [55]:
noSubzone_lookupOnly_V1 = logsum_compare_V1.loc[(logsum_compare_V1['subzone_interim'].notnull()) & (logsum_compare_V1['taz_sub_from_lookup'].isnull())]
print('{} parcels have subzone in BAUS but not in the lookup'.format(noSubzone_lookupOnly_V1.shape[0]))

0 parcels have subzone in BAUS but not in the lookup


In [57]:
noSubzone_BAUSonly_V1 = logsum_compare_V1.loc[(logsum_compare_V1['subzone_interim'].isnull()) & (logsum_compare_V1['taz_sub_from_lookup'].notnull())]
print('{} parcels have subzone in the lookup, but missing subzone in BAUS. Of these parcels: \n'.format(noSubzone_BAUSonly_V1.shape[0]))
print('combo_interim_beforeAdjust stats: \n{}\n'.format(noSubzone_BAUSonly_V1.combo_interim_beforeAdjust.value_counts()))
print('combo_interim_afterAdjust stats: \n{}\n'.format(noSubzone_BAUSonly_V1.combo_interim_afterAdjust.value_counts()))
print('combo_logsum in BAUS output: \n{}\n'.format(noSubzone_BAUSonly_V1.combo_logsum_baus.value_counts()))
print('combo_logsum before Adjustment calculated in Notebook: \n{}\n'.format(noSubzone_BAUSonly_V1.combo_logsum_Notebook.value_counts()))
print('combo_logsum after Adjustment calculated in Notebook: \n{}\n'.format(noSubzone_BAUSonly_V1.combo_logsum_final_Notebook.value_counts()))

0 parcels have subzone in the lookup, but missing subzone in BAUS. Of these parcels: 

combo_interim_beforeAdjust stats: 
Series([], Name: combo_interim_beforeAdjust, dtype: int64)

combo_interim_afterAdjust stats: 
Series([], Name: combo_interim_afterAdjust, dtype: int64)

combo_logsum in BAUS output: 
Series([], Name: combo_logsum_baus, dtype: int64)

combo_logsum before Adjustment calculated in Notebook: 
Series([], Name: combo_logsum_Notebook, dtype: int64)

combo_logsum after Adjustment calculated in Notebook: 
Series([], Name: combo_logsum_final_Notebook, dtype: int64)



In [59]:
haveSubzone_both_V1 = logsum_compare_V1.loc[(logsum_compare_V1['subzone_interim'].notnull()) & (logsum_compare_V1['taz_sub_from_lookup'].notnull())]
print('{} parcels have subzone in both the lookup and BAUS. Compare logsum values: \n'.format(haveSubzone_both_V1.shape[0]))
print('parcels where combo_interim_beforeAdjust != combo_logsum_Notebook before Adjustment calculated in Notebook: \n{}'.format(
    haveSubzone_both_V1.loc[abs(haveSubzone_both_V1.combo_interim_beforeAdjust - haveSubzone_both_V1.combo_logsum_Notebook) > 1e-10]))

print('parcels where combo_interim_afterAdjust != combo_logsum_Notebook after Adjustment calculated in Notebook: \n{}'.format(
    haveSubzone_both_V1.loc[abs(haveSubzone_both_V1.combo_interim_afterAdjust - haveSubzone_both_V1.combo_logsum_final_Notebook) > 1e-10]))

print('parcels where BAUS output combo_logsum != combo_logsum_Notebook after Adjustment calculated in Notebook: \n{}'.format(
    haveSubzone_both_V1.loc[abs(haveSubzone_both_V1.combo_logsum_baus - haveSubzone_both_V1.combo_logsum_final_Notebook) > 1e-10]))

1954694 parcels have subzone in both the lookup and BAUS. Compare logsum values: 

parcels where combo_interim_beforeAdjust != combo_logsum_Notebook before Adjustment calculated in Notebook: 
Empty DataFrame
Columns: [parcel_id, subzone_interim, combo_interim_beforeAdjust, combo_interim_afterAdjust, parcel_id_baus, cnml_baus, cml_baus, combo_logsum_baus, PARCEL_ID_from_lookup, taz_sub_from_lookup, cml_Notebook, cnml_Notebook, combo_logsum_Notebook, combo_logsum_final_Notebook]
Index: []
parcels where combo_interim_afterAdjust != combo_logsum_Notebook after Adjustment calculated in Notebook: 
Empty DataFrame
Columns: [parcel_id, subzone_interim, combo_interim_beforeAdjust, combo_interim_afterAdjust, parcel_id_baus, cnml_baus, cml_baus, combo_logsum_baus, PARCEL_ID_from_lookup, taz_sub_from_lookup, cml_Notebook, cnml_Notebook, combo_logsum_Notebook, combo_logsum_final_Notebook]
Index: []
parcels where BAUS output combo_logsum != combo_logsum_Notebook after Adjustment calculated in Notebo

### 3.2 V2 run 307

In [60]:
# merge Notebook calculation with BAUS data
logsum_compare_V2 = BAUS_logsum_V2.merge(parcel_logsums_P_copy, left_on='parcel_id', right_on='PARCEL_ID_from_lookup', how='outer')
print(logsum_compare_V2.shape[0])
logsum_compare_V2.head()

1956212


Unnamed: 0,parcel_id,subzone_interim,combo_interim_beforeAdjust,combo_interim_afterAdjust,parcel_id_baus,cml_baus,cnml_baus,combo_logsum_baus,PARCEL_ID_from_lookup,taz_sub_from_lookup,cml_Notebook,cnml_Notebook,combo_logsum_Notebook,combo_logsum_final_Notebook
0,229116,725b,620.431861,620.431861,229116,424.787944,365.643917,620.431861,229116.0,725b,424.787944,365.643917,620.431861,620.431861
1,244166,715a,604.509242,604.509242,244166,425.969991,348.53925,604.509242,244166.0,715a,425.969991,348.53925,604.509242,604.509242
2,202378,820a,681.756013,681.756013,202378,492.116127,359.639886,681.756013,202378.0,820a,492.116127,359.639886,681.756013,681.756013
3,2004420,1401c,259.98314,259.98314,2004420,246.652741,183.330399,259.98314,2004420.0,1401c,246.652741,183.330399,259.98314,259.98314
4,340332,763a,737.307097,737.307097,340332,506.431033,400.876065,737.307097,340332.0,763a,506.431033,400.876065,737.307097,737.307097


In [61]:
noSubzone_both_V2 = logsum_compare_V2.loc[(logsum_compare_V2['subzone_interim'].isnull()) & (logsum_compare_V2['taz_sub_from_lookup'].isnull())]
print('{} parcels are missing subzone in both BAUS and Notebook calculation'.format(noSubzone_both_V2.shape[0]))
print('including {} synthetic parcels and {} parcels missing subzone in the lookup input'.format(
    noSubzone_both_V2.loc[noSubzone_both_V2['PARCEL_ID_from_lookup'].isnull()].shape[0],
    parcel_lookup_nosubzone.shape[0]))

1518 parcels are missing subzone in both BAUS and Notebook calculation
including 4 synthetic parcels and 1514 parcels missing subzone in the lookup input


In [62]:
noSubzone_lookupOnly_V2 = logsum_compare_V2.loc[(logsum_compare_V2['subzone_interim'].notnull()) & (logsum_compare_V2['taz_sub_from_lookup'].isnull())]
print('{} parcels have subzone in BAUS but not in the lookup'.format(noSubzone_lookupOnly_V2.shape[0]))

0 parcels have subzone in BAUS but not in the lookup


In [63]:
noSubzone_BAUSonly_V2 = logsum_compare_V2.loc[(logsum_compare_V2['subzone_interim'].isnull()) & (logsum_compare_V2['taz_sub_from_lookup'].notnull())]
print('{} parcels have subzone in the lookup, but missing subzone in BAUS. Of these parcels: \n'.format(noSubzone_BAUSonly_V2.shape[0]))
print('combo_interim_beforeAdjust stats: \n{}\n'.format(noSubzone_BAUSonly_V2.combo_interim_beforeAdjust.value_counts()))
print('combo_interim_afterAdjust stats: \n{}\n'.format(noSubzone_BAUSonly_V2.combo_interim_afterAdjust.value_counts()))
print('combo_logsum in BAUS output: \n{}\n'.format(noSubzone_BAUSonly_V2.combo_logsum_baus.value_counts()))
print('combo_logsum before Adjustment calculated in Notebook: \n{}\n'.format(noSubzone_BAUSonly_V2.combo_logsum_Notebook.value_counts()))
print('combo_logsum after Adjustment calculated in Notebook: \n{}\n'.format(noSubzone_BAUSonly_V2.combo_logsum_final_Notebook.value_counts()))

441734 parcels have subzone in the lookup, but missing subzone in BAUS. Of these parcels: 

combo_interim_beforeAdjust stats: 
-172.0    441734
Name: combo_interim_beforeAdjust, dtype: int64

combo_interim_afterAdjust stats: 
1.0    441734
Name: combo_interim_afterAdjust, dtype: int64

combo_logsum in BAUS output: 
1.0    441734
Name: combo_logsum_baus, dtype: int64

combo_logsum before Adjustment calculated in Notebook: 
-168.384975    3435
 488.520420    2163
 355.788628    1652
 440.183672    1620
 268.321540    1465
               ... 
 763.741848       1
 657.534979       1
 807.186695       1
 801.247820       1
 766.012799       1
Name: combo_logsum_Notebook, Length: 1610, dtype: int64

combo_logsum after Adjustment calculated in Notebook: 
1.000000      3527
488.520420    2163
355.788628    1652
440.183672    1620
268.321540    1465
              ... 
768.147129       1
564.416138       1
698.542562       1
674.879929       1
766.012799       1
Name: combo_logsum_final_Notebook

In [64]:
haveSubzone_both_V2 = logsum_compare_V2.loc[(logsum_compare_V2['subzone_interim'].notnull()) & (logsum_compare_V2['taz_sub_from_lookup'].notnull())]
print('{} parcels have subzone in both the lookup and BAUS. Compare logsum values: \n'.format(haveSubzone_both_V2.shape[0]))
print('parcels where combo_interim_beforeAdjust != combo_logsum_Notebook before Adjustment calculated in Notebook: \n{}'.format(
    haveSubzone_both_V2.loc[abs(haveSubzone_both_V2.combo_interim_beforeAdjust - haveSubzone_both_V2.combo_logsum_Notebook) > 1e-10]))

print('parcels where combo_interim_afterAdjust != combo_logsum_Notebook after Adjustment calculated in Notebook: \n{}'.format(
    haveSubzone_both_V2.loc[abs(haveSubzone_both_V2.combo_interim_afterAdjust - haveSubzone_both_V2.combo_logsum_final_Notebook) > 1e-10]))

print('parcels where BAUS output combo_logsum != combo_logsum_Notebook after Adjustment calculated in Notebook: \n{}'.format(
    haveSubzone_both_V2.loc[abs(haveSubzone_both_V2.combo_logsum_baus - haveSubzone_both_V2.combo_logsum_final_Notebook) > 1e-10]))

1512960 parcels have subzone in both the lookup and BAUS. Compare logsum values: 

parcels where combo_interim_beforeAdjust != combo_logsum_Notebook before Adjustment calculated in Notebook: 
Empty DataFrame
Columns: [parcel_id, subzone_interim, combo_interim_beforeAdjust, combo_interim_afterAdjust, parcel_id_baus, cml_baus, cnml_baus, combo_logsum_baus, PARCEL_ID_from_lookup, taz_sub_from_lookup, cml_Notebook, cnml_Notebook, combo_logsum_Notebook, combo_logsum_final_Notebook]
Index: []
parcels where combo_interim_afterAdjust != combo_logsum_Notebook after Adjustment calculated in Notebook: 
Empty DataFrame
Columns: [parcel_id, subzone_interim, combo_interim_beforeAdjust, combo_interim_afterAdjust, parcel_id_baus, cml_baus, cnml_baus, combo_logsum_baus, PARCEL_ID_from_lookup, taz_sub_from_lookup, cml_Notebook, cnml_Notebook, combo_logsum_Notebook, combo_logsum_final_Notebook]
Index: []
parcels where BAUS output combo_logsum != combo_logsum_Notebook after Adjustment calculated in Notebo

### 3.3 V3 run 308

In [65]:
# merge Notebook calculation with BAUS data
logsum_compare_V3 = BAUS_logsum_V3.merge(parcel_logsums_P_copy, left_on='parcel_id', right_on='PARCEL_ID_from_lookup', how='outer')
print(logsum_compare_V3.shape[0])
logsum_compare_V3.head()

1956212


Unnamed: 0,parcel_id,subzone_interim,combo_interim_beforeAdjust,combo_interim_afterAdjust,parcel_id_baus,cnml_baus,cml_baus,combo_logsum_baus,PARCEL_ID_from_lookup,taz_sub_from_lookup,cml_Notebook,cnml_Notebook,combo_logsum_Notebook,combo_logsum_final_Notebook
0,229116,725b,620.431861,620.431861,229116,365.643917,424.787944,620.431861,229116.0,725b,424.787944,365.643917,620.431861,620.431861
1,244166,715a,604.509242,604.509242,244166,348.53925,425.969991,604.509242,244166.0,715a,425.969991,348.53925,604.509242,604.509242
2,202378,820a,681.756013,681.756013,202378,359.639886,492.116127,681.756013,202378.0,820a,492.116127,359.639886,681.756013,681.756013
3,2004420,1401c,259.98314,259.98314,2004420,183.330399,246.652741,259.98314,2004420.0,1401c,246.652741,183.330399,259.98314,259.98314
4,340332,763a,737.307097,737.307097,340332,400.876065,506.431033,737.307097,340332.0,763a,506.431033,400.876065,737.307097,737.307097


In [66]:
noSubzone_both_V3 = logsum_compare_V3.loc[(logsum_compare_V3['subzone_interim'].isnull()) & (logsum_compare_V3['taz_sub_from_lookup'].isnull())]
print('{} parcels are missing subzone in both BAUS and Notebook calculation'.format(noSubzone_both_V3.shape[0]))
print('including {} synthetic parcels and {} parcels missing subzone in the lookup input'.format(
    noSubzone_both_V3.loc[noSubzone_both_V3['PARCEL_ID_from_lookup'].isnull()].shape[0],
    parcel_lookup_nosubzone.shape[0]))

1518 parcels are missing subzone in both BAUS and Notebook calculation
including 4 synthetic parcels and 1514 parcels missing subzone in the lookup input


In [67]:
noSubzone_lookupOnly_V3 = logsum_compare_V3.loc[(logsum_compare_V3['subzone_interim'].notnull()) & (logsum_compare_V3['taz_sub_from_lookup'].isnull())]
print('{} parcels have subzone in BAUS but not in the lookup'.format(noSubzone_lookupOnly_V3.shape[0]))

0 parcels have subzone in BAUS but not in the lookup


In [68]:
noSubzone_BAUSonly_V3 = logsum_compare_V3.loc[(logsum_compare_V3['subzone_interim'].isnull()) & (logsum_compare_V3['taz_sub_from_lookup'].notnull())]
print('{} parcels have subzone in the lookup, but missing subzone in BAUS. Of these parcels: \n'.format(noSubzone_BAUSonly_V3.shape[0]))
print('combo_interim_beforeAdjust stats: \n{}\n'.format(noSubzone_BAUSonly_V3.combo_interim_beforeAdjust.value_counts()))
print('combo_interim_afterAdjust stats: \n{}\n'.format(noSubzone_BAUSonly_V3.combo_interim_afterAdjust.value_counts()))
print('combo_logsum in BAUS output: \n{}\n'.format(noSubzone_BAUSonly_V3.combo_logsum_baus.value_counts()))
print('combo_logsum before Adjustment calculated in Notebook: \n{}\n'.format(noSubzone_BAUSonly_V3.combo_logsum_Notebook.value_counts()))
print('combo_logsum after Adjustment calculated in Notebook: \n{}\n'.format(noSubzone_BAUSonly_V3.combo_logsum_final_Notebook.value_counts()))

0 parcels have subzone in the lookup, but missing subzone in BAUS. Of these parcels: 

combo_interim_beforeAdjust stats: 
Series([], Name: combo_interim_beforeAdjust, dtype: int64)

combo_interim_afterAdjust stats: 
Series([], Name: combo_interim_afterAdjust, dtype: int64)

combo_logsum in BAUS output: 
Series([], Name: combo_logsum_baus, dtype: int64)

combo_logsum before Adjustment calculated in Notebook: 
Series([], Name: combo_logsum_Notebook, dtype: int64)

combo_logsum after Adjustment calculated in Notebook: 
Series([], Name: combo_logsum_final_Notebook, dtype: int64)



In [69]:
haveSubzone_both_V3 = logsum_compare_V3.loc[(logsum_compare_V3['subzone_interim'].notnull()) & (logsum_compare_V3['taz_sub_from_lookup'].notnull())]
print('{} parcels have subzone in both the lookup and BAUS. Compare logsum values: \n'.format(haveSubzone_both_V3.shape[0]))
print('parcels where combo_interim_beforeAdjust != combo_logsum_Notebook before Adjustment calculated in Notebook: \n{}'.format(
    haveSubzone_both_V3.loc[abs(haveSubzone_both_V3.combo_interim_beforeAdjust - haveSubzone_both_V3.combo_logsum_Notebook) > 1e-10]))

print('parcels where combo_interim_afterAdjust != combo_logsum_Notebook after Adjustment calculated in Notebook: \n{}'.format(
    haveSubzone_both_V3.loc[abs(haveSubzone_both_V3.combo_interim_afterAdjust - haveSubzone_both_V3.combo_logsum_final_Notebook) > 1e-10]))

print('parcels where BAUS output combo_logsum != combo_logsum_Notebook after Adjustment calculated in Notebook: \n{}'.format(
    haveSubzone_both_V3.loc[abs(haveSubzone_both_V3.combo_logsum_baus - haveSubzone_both_V3.combo_logsum_final_Notebook) > 1e-10]))

1954694 parcels have subzone in both the lookup and BAUS. Compare logsum values: 

parcels where combo_interim_beforeAdjust != combo_logsum_Notebook before Adjustment calculated in Notebook: 
Empty DataFrame
Columns: [parcel_id, subzone_interim, combo_interim_beforeAdjust, combo_interim_afterAdjust, parcel_id_baus, cnml_baus, cml_baus, combo_logsum_baus, PARCEL_ID_from_lookup, taz_sub_from_lookup, cml_Notebook, cnml_Notebook, combo_logsum_Notebook, combo_logsum_final_Notebook]
Index: []
parcels where combo_interim_afterAdjust != combo_logsum_Notebook after Adjustment calculated in Notebook: 
Empty DataFrame
Columns: [parcel_id, subzone_interim, combo_interim_beforeAdjust, combo_interim_afterAdjust, parcel_id_baus, cnml_baus, cml_baus, combo_logsum_baus, PARCEL_ID_from_lookup, taz_sub_from_lookup, cml_Notebook, cnml_Notebook, combo_logsum_Notebook, combo_logsum_final_Notebook]
Index: []
parcels where BAUS output combo_logsum != combo_logsum_Notebook after Adjustment calculated in Notebo

## tag parcel subzone situation of V2 (run 307) to visualize the impact

In [72]:
# tag parcels in order to map them
parcels_subzone_logsum_tag = logsum_compare_V2.copy()
parcels_subzone_logsum_tag['subzone_chk'] = ''
parcels_subzone_logsum_tag.loc[(parcels_subzone_logsum_tag['subzone_interim'].notnull()) & (
                                parcels_subzone_logsum_tag['taz_sub_from_lookup'].notnull()), 'subzone_chk'] = 'have_subzone_in_both'
parcels_subzone_logsum_tag.loc[(parcels_subzone_logsum_tag['subzone_interim'].isnull()) & (
                                parcels_subzone_logsum_tag['taz_sub_from_lookup'].notnull()), 'subzone_chk'] = 'missing_subzone_in_BAUS'
parcels_subzone_logsum_tag.loc[(parcels_subzone_logsum_tag['subzone_interim'].isnull()) & (
                                parcels_subzone_logsum_tag['taz_sub_from_lookup'].isnull()), 'subzone_chk'] = 'missing_subzone_in_both'

parcels_subzone_logsum_tag.to_csv(r'M:\Data\Urban\BAUS\PBA50\explore_developer_model\runs_feasibility_all_included\parcels_subzone_logsum_tag_defaultLookup.csv', index=False)

In [71]:
parcels_subzone_logsum_tag.subzone_chk.value_counts()

have_subzone_in_both       1512960
missing_subzone_in_BAUS     441734
missing_subzone_in_both       1518
Name: subzone_chk, dtype: int64