In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', None)


# Steps
1. Load Data
2. Find adjacent parcels
3. Associate buildings with parcels
4. Find distances between buildings on adjacent parcels
5. Format data

# Load Data

In [2]:
buildings_df = gpd.read_file('../data/Buildings/')
parcels_df = gpd.read_file('../data/Parcels_shape/')

# Find buildings in parcels

In [3]:
buildings_df_temp = buildings_df.copy()

In [4]:
buildings_df_temp.geometry = buildings_df_temp.geometry.centroid
buildings_df_temp = buildings_df_temp[buildings_df_temp.is_valid]
parcels_df = parcels_df[parcels_df.is_valid]

In [5]:
building_to_parcels = gpd.sjoin(buildings_df_temp[['OBJECTID','AddNum','Street','geometry']], parcels_df[['MBL','geometry','AddNum','Street']], how = 'inner', op = 'within')


In [6]:
len(building_to_parcels)

16356

In [7]:
len(buildings_df)

16486

In [8]:
buildings_df = buildings_df.merge(building_to_parcels[['OBJECTID','MBL']])

# Find adjacent parcels

In [23]:
parcel_to_neighbours = gpd.sjoin(parcels_df, parcels_df, how="inner", op='intersects')

In [26]:
parcel_to_neighbours = parcel_to_neighbours[['MBL_left','MBL_right']]

In [27]:
parcel_to_neighbours

Unnamed: 0,MBL_left,MBL_right
0,19-F-1,19-F-1
2375,20-F-7,19-F-1
3719,20-F-6,19-F-1
14568,,19-F-1
0,19-F-1,20-F-7
...,...,...
14560,87-C-1A,87-C-1
14568,,87-C-1
14552,87-C-1,87-C-1A
14560,87-C-1A,87-C-1A


# Get Neighbouring Buildings

In [48]:
neighbouring_buildings = (
    buildings_df
    .merge(parcel_to_neighbours, left_on = 'MBL', right_on = 'MBL_left')
    .merge(buildings_df, left_on = 'MBL_right', right_on = 'MBL')
)

# Get Distances

In [59]:
has_valid_MBL_pair = (
    ~ neighbouring_buildings.MBL_left.isna() 
    & ~ neighbouring_buildings.MBL_right.isna() 
    & (neighbouring_buildings.MBL_right != neighbouring_buildings.MBL_left)
)
neighbouring_buildings = neighbouring_buildings[has_valid_MBL_pair]

In [60]:
neighbouring_buildings['dist'] = neighbouring_buildings.apply(
    lambda row: row['geometry_x'].distance(row['geometry_y']), axis = 1
)  

In [61]:
neighbouring_buildings = neighbouring_buildings[neighbouring_buildings.dist != 0]

In [65]:
neighbouring_buildings.head()

Unnamed: 0,OBJECTID_x,AddNum_x,Street_x,AddNum2_x,Street2_x,AddNum3_x,Street3_x,Name_x,Name2_x,Owner_x,UpdateStat_x,Shape_Leng_x,Shape_Area_x,geometry_x,MBL_x,MBL_left,MBL_right,OBJECTID_y,AddNum_y,Street_y,AddNum2_y,Street2_y,AddNum3_y,Street3_y,Name_y,Name2_y,Owner_y,UpdateStat_y,Shape_Leng_y,Shape_Area_y,geometry_y,MBL_y,dist
170955,1,600.0,MYSTIC VALLEY PKWY,,,,,,,,Complete,870.54028,41386.681078,"POLYGON ((756465.867 2977205.199, 756545.334 2...",1-C-12,1-C-12,1-C-3,4,196,BOSTON AVE,,,,,,,,Complete,512.515613,10876.223761,"POLYGON ((756691.951 2976976.663, 756762.243 2...",1-C-3,37.586404
170957,10287,184.0,BOSTON AVE,,,,,,,,Complete,200.014821,1715.468629,"POLYGON ((756564.361 2976863.974, 756578.287 2...",1-C-7,1-C-7,1-C-3,4,196,BOSTON AVE,,,,,,,,Complete,512.515613,10876.223761,"POLYGON ((756691.951 2976976.663, 756762.243 2...",1-C-3,77.844084
170958,10288,,,,,,,,,,Complete,69.466575,294.479665,"POLYGON ((756602.005 2976870.008, 756585.785 2...",1-C-7,1-C-7,1-C-3,4,196,BOSTON AVE,,,,,,,,Complete,512.515613,10876.223761,"POLYGON ((756691.951 2976976.663, 756762.243 2...",1-C-3,59.21576
170959,10289,,,,,,,,,,Complete,38.907833,85.31057,"POLYGON ((756616.682 2976873.776, 756621.916 2...",1-C-7,1-C-7,1-C-3,4,196,BOSTON AVE,,,,,,,,Complete,512.515613,10876.223761,"POLYGON ((756691.951 2976976.663, 756762.243 2...",1-C-3,60.948365
170960,10290,,,,,,,,,,Complete,30.01007,51.27367,"POLYGON ((756614.699 2976856.125, 756621.306 2...",1-C-6,1-C-6,1-C-3,4,196,BOSTON AVE,,,,,,,,Complete,512.515613,10876.223761,"POLYGON ((756691.951 2976976.663, 756762.243 2...",1-C-3,66.109377


In [63]:
nearest_neighbors = (
    neighbouring_buildings
    .groupby('OBJECTID_x')['dist']
    .nsmallest(2)
    .reset_index()
    .groupby('OBJECTID_x')['dist']
    .apply(list)
    .reset_index()
)

In [64]:
nearest_neighbors.head()

Unnamed: 0,OBJECTID_x,dist
0,1,[37.58640352210986]
1,3,[70.54990076205233]
2,4,"[37.58640352210986, 59.21575983944597]"
3,5,"[3.2471458356227716, 6.426435927324226]"
4,6,"[4.31626086611907, 11.993298789951508]"


In [68]:
buildings_df = buildings_df.merge(nearest_neighbors, left_on = 'OBJECTID', right_on = 'OBJECTID_x')

# Clean and Save

In [73]:
def get_kth(l, k):
    l = sorted(l)
    try:
        return l[k]
    except (TypeError, IndexError):
        return np.nan

buildings_df['1ST_CLOSEST'] = buildings_df['dist'].apply(lambda x: get_kth(x, 0))
buildings_df['2ND_CLOSEST'] = buildings_df['dist'].apply(lambda x: get_kth(x, 1))

In [83]:
buildings_df.columns

Index(['OBJECTID', 'AddNum', 'Street', 'AddNum2', 'Street2', 'AddNum3',
       'Street3', 'Name', 'Name2', 'Owner', 'UpdateStat', 'Shape_Leng',
       'Shape_Area', 'geometry', 'MBL', 'OBJECTID_x', 'dist', '1ST_CLOSEST',
       '2ND_CLOSEST'],
      dtype='object')

In [84]:
buildings_df[['AddNum', 'Street','MBL', '1ST_CLOSEST', '2ND_CLOSEST']].to_csv('../data/buildings_clean.csv')