In [2]:
import pandas as pd

### update on 7/31/2024: make sure the earliest project on a parcel has "build" action

In [3]:
# pipeline data used in Draft Blueprint modeling
pipeline_dbp_version = pd.read_csv(
    r'M:\urban_modeling\baus\BAUS Inputs\basis_inputs\parcels_buildings_agents\development_pipeline_NP_2024-03-08.csv'
)
print('row count: {}'.format(len(pipeline_dbp_version)))
print('\nunique development_projects_id: {}\n'.format(pipeline_dbp_version['development_projects_id'].nunique()))
print(pipeline_dbp_version['action'].value_counts(dropna=False), '\n')
print(pipeline_dbp_version[['non_residential_sqft', 'residential_units']].sum(), '\n')
print(pipeline_dbp_version.groupby('action')[['non_residential_sqft', 'residential_units']].sum())

row count: 58723

unique development_projects_id: 58723

build    53713
add       5010
Name: action, dtype: int64 

non_residential_sqft    392460056.0
residential_units          439465.0
dtype: float64 

        non_residential_sqft  residential_units
action                                         
add               82823884.0            58031.0
build            309636172.0           381434.0


  pipeline_dbp_version = pd.read_csv(


In [4]:
# a function to modify the project list to ensure the earliest project on a parcel has "build" action

def ensure_earliest_proj_build(df):
    df = df.sort_values(["PARCEL_ID", "year_built"])
    prev_parcel_id = None
    for index, rec in df.iterrows():
        if rec.PARCEL_ID != prev_parcel_id:
            df.loc[index, "action"] = "build"
        prev_parcel_id = rec.PARCEL_ID
    
    return df

In [5]:
# apply the modification
pipeline_updated = ensure_earliest_proj_build(pipeline_dbp_version)

print(pipeline_updated['action'].value_counts(dropna=False), '\n')
print(pipeline_updated[['non_residential_sqft', 'residential_units']].sum(), '\n')
print(pipeline_updated.groupby('action')[['non_residential_sqft', 'residential_units']].sum())

build    54503
add       4220
Name: action, dtype: int64 

non_residential_sqft    392460056.0
residential_units          439465.0
dtype: float64 

        non_residential_sqft  residential_units
action                                         
add               56993041.0            38754.0
build            335467015.0           400711.0


In [6]:
# drop "geom_id" column
pipeline_updated.drop(['geom_id'], axis=1, inplace=False)
print(list(pipeline_updated))

['development_projects_id', 'OBJECTID', 'raw_id', 'building_name', 'site_name', 'action', 'address', 'city', 'zip', 'county', 'x', 'y', 'geom_id', 'year_built', 'building_type_det', 'building_type', 'building_type_id', 'development_type_id', 'building_sqft', 'non_residential_sqft', 'residential_units', 'unit_ave_sqft', 'tenure', 'rent_type', 'stories', 'parking_spaces', 'average_weighted_rent', 'last_sale_year', 'last_sale_price', 'deed_restricted_units', 'source', 'PARCEL_ID', 'ZONE_ID', 'edit_date', 'editor', 'data_source', 'parcel_id_basis', 'building_id_basis', 'residential_sqft', 'preserved_units', 'inclusionary_units', 'subsidized_units']


In [6]:
# # write out
# pipeline_updated.to_csv(
#     r'M:\urban_modeling\baus\BAUS Inputs\basis_inputs\parcels_buildings_agents\development_pipeline_2024-07-31.csv', index=False
# )

### update on 8/13/2024: remove "manual" projects from pipeline (while still making sure the earliest project on a parcel has "build" action)

In [7]:
# pipeline data used in Draft Blueprint modeling

pipeline_dbp_version = pd.read_csv(
    r'M:\urban_modeling\baus\BAUS Inputs\basis_inputs\parcels_buildings_agents\development_pipeline_NP_2024-03-08.csv'
)
print('row count: {}'.format(len(pipeline_dbp_version)))
print('\nunique development_projects_id: {}\n'.format(pipeline_dbp_version['development_projects_id'].nunique()))

print(pipeline_dbp_version['source'].value_counts(dropna=False), '\n')
print(pipeline_dbp_version[['non_residential_sqft', 'residential_units']].sum(), '\n')
print(pipeline_dbp_version.groupby('source')[['non_residential_sqft', 'residential_units']].sum())

row count: 58723

unique development_projects_id: 58723

basis_buildingsv0    44797
bas_bp_new            5807
cs                    3105
rf                    2785
basis                 1898
manual                 331
Name: source, dtype: int64 

non_residential_sqft    392460056.0
residential_units          439465.0
dtype: float64 

                   non_residential_sqft  residential_units
source                                                    
bas_bp_new                    7185174.0             8598.0
basis                        49555908.0            75564.0
basis_buildingsv0            54698303.0           108945.0
cs                          225220281.0           170523.0
manual                       55800390.0            73012.0
rf                                  0.0             2823.0


  pipeline_dbp_version = pd.read_csv(


In [8]:
# drop "manual"
pipeline_wo_manual = pipeline_dbp_version.loc[pipeline_dbp_version['source'] != 'manual']
print(len(pipeline_wo_manual))
print(pipeline_wo_manual['action'].value_counts(dropna=False), '\n')
print(pipeline_wo_manual[['non_residential_sqft', 'residential_units']].sum(), '\n')
print(pipeline_wo_manual.groupby('action')[['non_residential_sqft', 'residential_units']].sum())

# apply the modification on "action"
pipeline_wo_manual_updated = ensure_earliest_proj_build(pipeline_wo_manual)

print(pipeline_wo_manual_updated['action'].value_counts(dropna=False), '\n')
print(pipeline_wo_manual_updated[['non_residential_sqft', 'residential_units']].sum(), '\n')
print(pipeline_wo_manual_updated.groupby('action')[['non_residential_sqft', 'residential_units']].sum())

58392
build    53527
add       4865
Name: action, dtype: int64 

non_residential_sqft    336659666.0
residential_units          366453.0
dtype: float64 

        non_residential_sqft  residential_units
action                                         
add               56923039.0            31624.0
build            279736627.0           334829.0
build    54275
add       4117
Name: action, dtype: int64 

non_residential_sqft    336659666.0
residential_units          366453.0
dtype: float64 

        non_residential_sqft  residential_units
action                                         
add               36660146.0            21370.0
build            299999520.0           345083.0


In [9]:
# drop "geom_id" column
pipeline_wo_manual_updated.drop(['geom_id'], axis=1, inplace=False)
print(list(pipeline_wo_manual_updated))

['development_projects_id', 'OBJECTID', 'raw_id', 'building_name', 'site_name', 'action', 'address', 'city', 'zip', 'county', 'x', 'y', 'geom_id', 'year_built', 'building_type_det', 'building_type', 'building_type_id', 'development_type_id', 'building_sqft', 'non_residential_sqft', 'residential_units', 'unit_ave_sqft', 'tenure', 'rent_type', 'stories', 'parking_spaces', 'average_weighted_rent', 'last_sale_year', 'last_sale_price', 'deed_restricted_units', 'source', 'PARCEL_ID', 'ZONE_ID', 'edit_date', 'editor', 'data_source', 'parcel_id_basis', 'building_id_basis', 'residential_sqft', 'preserved_units', 'inclusionary_units', 'subsidized_units']


In [10]:
# write out
pipeline_wo_manual_updated.to_csv(
    r'M:\urban_modeling\baus\BAUS Inputs\basis_inputs\parcels_buildings_agents\development_pipeline_wo_manual_2024-08-13.csv', index=False
)