In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import pandas as pd
import geopandas as gpd
import numpy as np
import os
import fiona
from datetime import datetime

In [2]:
if os.getenv('USERNAME')=='ywang':
    data_folder      = 'C:\\Users\\ywang\\Documents\\Files_for_Py\\BASIS\\BOC\\outputs'
    
ALLOWED_BUILDING_TYPE_CODES = ["HS","HT","HM","OF","HO","SC","IL","IW","IH","RS","RB","MR","MT","ME"]
RES_BUILDING_TYPE_CODES     = ["HS","HT","HM",                                        "MR"          ]
NONRES_BUILDING_TYPE_CODES  = [               "OF","HO","SC","IL","IW","IH","RS","RB","MR","MT","ME"]

# used in calculate_capacity()
SQUARE_FEET_PER_ACRE                = 43560.0
SQUARE_FEET_PER_DU                  = 1200.0
FEET_PER_STORY                      = 11.0
PARCEL_USE_EFFICIENCY               = 0.5
SQUARE_FEET_PER_EMPLOYEE            = 350.0
SQUARE_FEET_PER_EMPLOYEE_OFFICE     = 175.0
SQUARE_FEET_PER_EMPLOYEE_INDUSTRIAL = 500.0

## export data // will visualize in Tableau
today = datetime.today().strftime('%Y_%m_%d')

## Precessed PLU BOC data

version = '_pba40Type'  # refers to different versions of the hybrid plu data
p10_plu_boc = pd.read_csv(os.path.join(data_folder, today+'_p10_plu_boc' + version + '.csv'))

In [3]:
print("p10_plu_boc.COUNTY_ID.value_counts()")
display(p10_plu_boc.COUNTY_ID.value_counts())

# nodev_zmod value counts
print("p10_plu_boc.nodev_zmod.value_counts()")
display(p10_plu_boc.nodev_zmod.value_counts())

display(p10_plu_boc.dtypes)

p10_plu_boc.COUNTY_ID.value_counts()


85    436446
1     389884
13    325208
81    195487
97    171997
75    153355
95    137961
41     96816
55     49053
0          1
Name: COUNTY_ID, dtype: int64

p10_plu_boc.nodev_zmod.value_counts()


0    1893087
1      63121
Name: nodev_zmod, dtype: int64

PARCEL_ID        float64
COUNTY_ID          int64
CTY_NAME          object
ACRES            float64
juris_id_zmod     object
                  ...   
RS_idx            object
RB_idx            object
MR_idx            object
MT_idx            object
ME_idx            object
Length: 74, dtype: object

In [4]:
# Fill 'NaN' in allowed development types with 0 in order to calculate 'allowRes' and 'allowNonRes' next

def fill_dev_type_NA(df, boc_source):
    for dev_type in ALLOWED_BUILDING_TYPE_CODES:
        df[dev_type+"_"+boc_source].fillna(value=0.0, inplace = True)
    return df

In [5]:
# Assign allow residential and/or non-residential

def allow_dev_Type(df,boc_source,nodev_source):
    
    # allow_res is sum of allowed building types that are residential
    res_allowed_columns = [btype+'_'+boc_source for btype in RES_BUILDING_TYPE_CODES]
    df['allow_res_' +boc_source] = df[res_allowed_columns].sum(axis=1)
    df['status_dua_'+boc_source] = "good"
    
    # allow_nonres is the sum of allowed building types that are non-residential
    nonres_allowed_columns = [btype+'_'+boc_source for btype in NONRES_BUILDING_TYPE_CODES]
    df['allow_nonres_'+boc_source] = df[nonres_allowed_columns].sum(axis=1)
    df['status_far_'  +boc_source] = "good"

    """    
    df['allowRes'+zoning_yr] = np.nan    
    # a parcel is 'allowNonRes' is at least one of the non-residential development types is allowed
    df['allowNonRes'+zoning_yr] = df[nonResLs].sum(axis=1) > 0 
    
    df['allowNonRes'+zoning_yr] = np.nan
    # a parcel is 'allowRes' is at least one of the residential development types is allowed
    df['allowRes'+zoning_yr] = df[reLs].sum(axis=1) > 0"""
    
    return df[['PARCEL_ID', 'ACRES',
               'nodev_'        +nodev_source,
               'max_dua_'      +boc_source,
               'max_far_'      +boc_source,
               'max_height_'   +boc_source, 
               "allow_res_"    +boc_source,
               "status_dua_"   +boc_source,
               "allow_nonres_" +boc_source,
               "status_far_"   +boc_source] + [dev_type+"_"+boc_source for dev_type in ALLOWED_BUILDING_TYPE_CODES]]

In [6]:
# Fill nan in max_FAR / max_DUA 

def fill_intensity(df,boc_source,nodev_source):
    
#    df['calc_DUA'+zoning_yr] = np.nan
#    df['calc_FAR'+zoning_yr] = np.nan
    
    # fill in missing NaN for Max DUA assuming a HU is 1200 square feet and a floor is 11 feet high
    missing_dua_idx = ((df['allow_res_'+boc_source  ] > 0     ) &
                       (df['max_dua_'  +boc_source  ].isnull()) &
                       (df['nodev_'    +nodev_source] == 0    ))
    print("Developable residential parcels missing 'max_dua_{}': {:,}".format(
        boc_source, len(df.loc[missing_dua_idx])))
    
    # we can only fill in missing if either max_far or max_height is not null   
    max_dua_from_far    = df['max_far_'    +boc_source] * SQUARE_FEET_PER_ACRE / SQUARE_FEET_PER_DU
    max_far_from_height = df['max_height_' +boc_source] / FEET_PER_STORY * PARCEL_USE_EFFICIENCY
    max_dua_from_height = max_far_from_height * SQUARE_FEET_PER_ACRE / SQUARE_FEET_PER_DU
    
    max_dua_calculate = pd.concat([
                        df['max_dua_'+boc_source],
                        max_dua_from_far,
                        max_dua_from_height], axis=1).min(axis=1)
    
    df['max_dua_new_'+boc_source] = max_dua_calculate
    dua_calculated_idx = df['max_dua_'+boc_source].isnull() & df['max_dua_new_'+boc_source].notnull()
    df.loc[dua_calculated_idx,'max_dua_'   +boc_source] = df.loc[dua_calculated_idx,'max_dua_new_'+boc_source]
    df.loc[dua_calculated_idx,'calc_dua_'  +boc_source] = 'Yuqi'
    df.loc[dua_calculated_idx,'status_dua_'+boc_source] = "calculated"
        
    # recalculate missing_dua_idx
    missing_dua_idx = ((df['allow_res_'+boc_source  ] > 0     ) &
                       (df['max_dua_'  +boc_source  ].isnull()) &
                       (df['nodev_'    +nodev_source] == 0    ))
    print("After filling in missing values, developable residential parcels missing 'max_dua_{}': {:,}\n".format(
         boc_source, len(df.loc[missing_dua_idx])))
    df.loc[missing_dua_idx,'status_dua_'+boc_source] = "missing"  # this shouldn't happen
    
    
    # fill in missing values for max_far assuming a floor is eleven feet tall and land coverage 50%
    missing_far_idx = ((df['allow_nonres_'+boc_source  ] > 0      ) & 
                       (df['max_far_'     +boc_source  ].isnull() ) & 
                       (df['nodev_'       +nodev_source] == 0     ))
    print("Developable non-residential parcels missing 'max_far_{}: {:,}".format(
        boc_source, len(df.loc[missing_far_idx])))
    
    # we can only fill in missing if max_height is not null
    max_far_from_height = df['max_height_' +boc_source] / FEET_PER_STORY * PARCEL_USE_EFFICIENCY
    
    max_far_calculate = pd.concat([
                        df['max_far_'+boc_source],
                        max_far_from_height], axis=1).min(axis=1)

    df['max_far_new_'+boc_source] = max_far_calculate
    far_calculated_idx = df['max_far_'+boc_source].isnull() & df['max_far_new_'+boc_source].notnull()
    df.loc[far_calculated_idx,'max_far_'   +boc_source] = df.loc[far_calculated_idx,'max_far_new_'+boc_source]
    df.loc[far_calculated_idx,'calc_far_'  +boc_source] = 'Yuqi'
    df.loc[far_calculated_idx,'status_far_'+boc_source] = "calculated"
    
    # recalculate missing_far_idx
    missing_far_idx = ((df['allow_nonres_'+boc_source  ] > 0      ) & 
                       (df['max_far_'     +boc_source  ].isnull() ) & 
                       (df['nodev_'       +nodev_source] == 0     ))
    print("After filling in missing values: developable non-residential parcels missing 'max_far_{}': {:,}\n".format(
        boc_source, len(df.loc[missing_far_idx])))
    df.loc[missing_far_idx,'status_far_'+boc_source] = "missing"
    
    return df[['PARCEL_ID', 'ACRES',
               'nodev_'        +nodev_source,
               'max_dua_'      +boc_source,
               'max_far_'      +boc_source,
               'max_height_'   +boc_source, 
               "allow_res_"    +boc_source,
               "calc_dua_"     +boc_source,
               "status_dua_"   +boc_source,
               "allow_nonres_" +boc_source,
               "calc_far_"     +boc_source,
               "status_far_"   +boc_source] + [dev_type+"_"+boc_source for dev_type in ALLOWED_BUILDING_TYPE_CODES]]

In [7]:
# Calculate capacity

def calculate_capacity(df,boc_source,nodev_source):
    
    # DUA calculations apply to parcels 'allowRes' and not marked as "nodev"
    df['units_'+boc_source] = df['ACRES'] * df['max_dua_'+boc_source]   
    
    # zero out units for 'nodev' parcels or parcels that don't allow residential
    zero_unit_idx = (df['allow_res_'+boc_source] == 0) | (df['nodev_'+nodev_source] == 1)
    df.loc[zero_unit_idx,'units_'   +boc_source] = 0
        
    # FAR calculations apply to parcels 'allowNonRes' and not marked as "nodev"
    df['sqft_' +boc_source] = df['ACRES'] * df['max_far_'+boc_source] * SQUARE_FEET_PER_ACRE 
    
    # zero out sqft for 'nodev' parcels or parcels that don't allow non-residential
    zero_sqft_idx = (df['allow_nonres_'+boc_source] == 0) | (df['nodev_'+nodev_source] == 1)
    df.loc[zero_sqft_idx,'sqft_'       +boc_source] = 0
    
    df['Ksqft_'+boc_source] = df['sqft_'+boc_source]*0.001

    # of nonresidential uses, only office allowed
    office_idx   = (df['OF_'+boc_source] == 1) & (df['allow_nonres_'+boc_source]== 1)
    # of nonresidential uses, only industrial allowed
    allow_indust = df[['IL_'+boc_source,'IW_'+boc_source,'IH_'+boc_source]].sum(axis = 1)
    indust_idx   = (allow_indust > 0) & (df['allow_nonres_'+boc_source] == allow_indust)
    # calculate non-residential capacity in employment
    df[               'emp_'+boc_source] = df['sqft_'+boc_source] / SQUARE_FEET_PER_EMPLOYEE
    df.loc[office_idx,'emp_'+boc_source] = df['sqft_'+boc_source] / SQUARE_FEET_PER_EMPLOYEE_OFFICE
    df.loc[indust_idx,'emp_'+boc_source] = df['sqft_'+boc_source] / SQUARE_FEET_PER_EMPLOYEE_INDUSTRIAL
    
    if ('calc_dua_' +boc_source in df.columns) & ('calc_far_'+boc_source in df.columns):
        return df[['PARCEL_ID', 'ACRES',
                   'nodev_'        +nodev_source,
                   'max_dua_'      +boc_source,
                   'max_far_'      +boc_source,
                   'max_height_'   +boc_source, 
                   "allow_res_"    +boc_source,
                   "calc_dua_"     +boc_source,
                   "status_dua_"   +boc_source,
                   "units_"        +boc_source,
                   "allow_nonres_" +boc_source,
                   "calc_far_"     +boc_source,
                   "status_far_"   +boc_source,
                   "sqft_"         +boc_source,
                   "Ksqft_"        +boc_source,
                   "emp_"          +boc_source]]
    else:
        return df[['PARCEL_ID', 'ACRES',
                   'nodev_'        +nodev_source,
                   'max_dua_'      +boc_source,
                   'max_far_'      +boc_source,
                   'max_height_'   +boc_source, 
                   "allow_res_"    +boc_source,
                   "status_dua_"   +boc_source,
                   "units_"        +boc_source,
                   "allow_nonres_" +boc_source,
                   "status_far_"   +boc_source,
                   "sqft_"         +boc_source,
                   "Ksqft_"        +boc_source,
                   "emp_"          +boc_source]]

In [8]:
# Calculate PBA50 BOC capacity

p10_plu_boc_basis = p10_plu_boc.copy()
p10_plu_boc_basis_fill_dev_type = fill_dev_type_NA(p10_plu_boc_basis, 'basis')
p10_plu_boc_basis_allow_dev_type = allow_dev_Type(p10_plu_boc_basis_fill_dev_type,'basis','zmod')
p10_plu_boc_basis_fill_intensity = fill_intensity(p10_plu_boc_basis_fill_dev_type,'basis','zmod')
cap_basis_all_attrs = calculate_capacity(p10_plu_boc_basis_fill_intensity, "basis", "zmod")

# Calculate PBA40 BOC capacity

p10_plu_boc_pba40 = p10_plu_boc.copy()
p10_plu_boc_pba40_fill_dev_type = fill_dev_type_NA(p10_plu_boc_pba40, 'pba40')
p10_plu_boc_pba40_allow_dev_type = allow_dev_Type(p10_plu_boc_pba40_fill_dev_type,'pba40','zmod')
p10_plu_boc_pba40_fill_intensity = fill_intensity(p10_plu_boc_pba40_allow_dev_type,'pba40','zmod')
cap_pba40_all_attrs = calculate_capacity(p10_plu_boc_pba40_fill_intensity, "pba40",'zmod')

Developable residential parcels missing 'max_dua_basis': 116,160
After filling in missing values, developable residential parcels missing 'max_dua_basis': 60,369

Developable non-residential parcels missing 'max_far_basis: 26,989
After filling in missing values: developable non-residential parcels missing 'max_far_basis': 26,649



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https:/

Developable residential parcels missing 'max_dua_pba40': 227,930


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)


After filling in missing values, developable residential parcels missing 'max_dua_pba40': 7,636

Developable non-residential parcels missing 'max_far_pba40: 226,113


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


After filling in missing values: developable non-residential parcels missing 'max_far_pba40': 44,371



In [9]:
## Export intensity data to evaluate

# get raw intensity data to compare with imputed intensity data
basis_intensity_raw = p10_plu_boc[['PARCEL_ID','max_dua_basis','max_far_basis','max_height_basis']]
basis_intensity_raw.columns = ['PARCEL_ID','max_dua_basis_raw','max_far_basis_raw','max_height_basis_raw']

pba40_intensity_raw = p10_plu_boc[['PARCEL_ID','max_dua_pba40','max_far_pba40','max_height_pba40']]
pba40_intensity_raw.columns = ['PARCEL_ID','max_dua_pba40_raw','max_far_pba40_raw','max_height_pba40_raw']

intensity = p10_plu_boc_basis_fill_intensity.merge(
    p10_plu_boc_pba40_fill_intensity,
    on  = ['PARCEL_ID','ACRES','nodev_zmod'],
    how = 'inner')[['PARCEL_ID','ACRES','nodev_zmod',
                    'max_dua_basis','max_dua_pba40',
                    'max_far_basis','max_far_pba40',
                    'max_height_basis','max_height_pba40']].merge(
    p10_plu_boc[['PARCEL_ID','COUNTY_ID', 'juris_id_zmod', 'building_types_source_basis',
                 'source_basis','plu_id_basis', 'plu_jurisdiction_basis', 'plu_description_basis']],
    on  = ['PARCEL_ID'],
    how = 'inner').merge(
            basis_intensity_raw, 
            on = 'PARCEL_ID', 
            how = 'left').merge(
                pba40_intensity_raw, 
                on = 'PARCEL_ID', 
                how = 'left')

intensity.to_csv(os.path.join(data_folder, today+"_devIntensity.csv"), index = False)

In [10]:
print("cap_pba40 has {:,} rows; head:".format(len(cap_pba40_all_attrs)))
display(cap_pba40_all_attrs.head())

print("cap_basis has {:,} rows; head:".format(len(cap_basis_all_attrs)))
display(cap_basis_all_attrs.head())

cap_pba40 has 1,956,208 rows; head:


Unnamed: 0,PARCEL_ID,ACRES,nodev_zmod,max_dua_pba40,max_far_pba40,max_height_pba40,allow_res_pba40,calc_dua_pba40,status_dua_pba40,units_pba40,allow_nonres_pba40,calc_far_pba40,status_far_pba40,sqft_pba40,Ksqft_pba40,emp_pba40
0,229116.0,3.36052,0,2.0,,,1.0,,good,6.721041,0.0,,good,0.0,0.0,0.0
1,244166.0,1.294423,0,3.0,,,1.0,,good,3.883268,0.0,,good,0.0,0.0,0.0
2,202378.0,14.993605,0,8.7,1.363636,30.0,2.0,,good,130.444362,0.0,Yuqi,calculated,0.0,0.0,0.0
3,2004420.0,316.247146,0,0.00417,1.590909,35.0,1.0,,good,1.318751,0.0,Yuqi,calculated,0.0,0.0,0.0
4,340332.0,0.621275,1,23.0,2.363636,52.0,3.0,,good,0.0,1.0,Yuqi,calculated,0.0,0.0,0.0


cap_basis has 1,956,208 rows; head:


Unnamed: 0,PARCEL_ID,ACRES,nodev_zmod,max_dua_basis,max_far_basis,max_height_basis,allow_res_basis,calc_dua_basis,status_dua_basis,units_basis,allow_nonres_basis,calc_far_basis,status_far_basis,sqft_basis,Ksqft_basis,emp_basis
0,229116.0,3.36052,0,0.0,0.0,,1.0,,good,0.0,0.0,,good,0.0,0.0,0.0
1,244166.0,1.294423,0,14.0,0.35,35.0,1.0,,good,18.121919,0.0,,good,0.0,0.0,0.0
2,202378.0,14.993605,0,0.0,0.0,,2.0,,good,0.0,0.0,,good,0.0,0.0,0.0
3,2004420.0,316.247146,0,0.01666,0.0,35.0,1.0,,good,5.268677,0.0,,good,0.0,0.0,0.0
4,340332.0,0.621275,1,1.0,0.01,30.0,3.0,,good,0.0,1.0,,good,0.0,0.0,0.0


In [11]:
# output all attributes

capacity_all_atts = pd.merge(left=cap_pba40_all_attrs, 
                             right=cap_basis_all_attrs, 
                             how="inner", 
                             on=["PARCEL_ID","ACRES",'nodev_zmod']).merge(
                                 p10_plu_boc[['PARCEL_ID','COUNTY_ID', 'juris_id_zmod', 'building_types_source_basis',
                                          'source_basis','plu_id_basis', 'plu_jurisdiction_basis', 'plu_description_basis']+[
                                           dev_type+'_pba40' for dev_type in ALLOWED_BUILDING_TYPE_CODES] + [
                                          dev_type+'_basis' for dev_type in ALLOWED_BUILDING_TYPE_CODES]], 
                                 on = 'PARCEL_ID', 
                                 how = 'inner')
print("capacity has {:,} rows; head:".format(len(capacity_all_atts)))

for i in ['PARCEL_ID', 'nodev_zmod',
          'allow_res_pba40', 'allow_res_basis','allow_nonres_pba40','allow_nonres_basis'] + [
          dev_type+'_pba40' for dev_type in ALLOWED_BUILDING_TYPE_CODES] + [
          dev_type+'_basis' for dev_type in ALLOWED_BUILDING_TYPE_CODES]:
    capacity_all_atts[i] = capacity_all_atts[i].fillna(-1).astype(np.int64)

print(capacity_all_atts.dtypes)

capacity_all_atts.to_csv(os.path.join(data_folder, today+'_devCapacity_allAttrs'+ version + '.csv'), index = False)

capacity has 1,956,208 rows; head:
PARCEL_ID          int64
ACRES            float64
nodev_zmod         int64
max_dua_pba40    float64
max_far_pba40    float64
                  ...   
RS_basis           int64
RB_basis           int64
MR_basis           int64
MT_basis           int64
ME_basis           int64
Length: 64, dtype: object


In [12]:
# output some subset of capacity data to visualize in Tableau

capacity = capacity_all_atts.drop(columns = [
    dev_type+'_pba40' for dev_type in ALLOWED_BUILDING_TYPE_CODES] + [
    dev_type+'_basis' for dev_type in ALLOWED_BUILDING_TYPE_CODES] + [
    'max_dua_pba40','max_far_pba40','max_dua_basis','max_far_basis','max_height_pba40','max_height_basis',
    'building_types_source_basis','source_basis','plu_id_basis', 'plu_jurisdiction_basis', 'plu_description_basis'])

print("capacity has {:,} rows; head:".format(len(capacity)))
display(capacity.head())

capacity.to_csv(os.path.join(data_folder, today+'_devCapacity'+ version + '.csv'), index = False)

capacity has 1,956,208 rows; head:


Unnamed: 0,PARCEL_ID,ACRES,nodev_zmod,allow_res_pba40,calc_dua_pba40,status_dua_pba40,units_pba40,allow_nonres_pba40,calc_far_pba40,status_far_pba40,...,status_dua_basis,units_basis,allow_nonres_basis,calc_far_basis,status_far_basis,sqft_basis,Ksqft_basis,emp_basis,COUNTY_ID,juris_id_zmod
0,229116,3.36052,0,1,,good,6.721041,0,,good,...,good,0.0,0,,good,0.0,0.0,0.0,1,livr
1,244166,1.294423,0,1,,good,3.883268,0,,good,...,good,18.121919,0,,good,0.0,0.0,0.0,1,livr
2,202378,14.993605,0,2,,good,130.444362,0,Yuqi,calculated,...,good,0.0,0,,good,0.0,0.0,0.0,1,hayw
3,2004420,316.247146,0,1,,good,1.318751,0,Yuqi,calculated,...,good,5.268677,0,,good,0.0,0.0,0.0,97,uson
4,340332,0.621275,1,3,,good,0.0,1,Yuqi,calculated,...,good,0.0,1,,good,0.0,0.0,0.0,1,frem
