In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', None)


# Steps
1. Load Data
2. Find adjacent parcels
3. Associate buildings with parcels
4. Find distances between buildings on adjacent parcels
5. Format data

# Load Data

In [None]:
buildings_df = gpd.read_file('../data/Buildings/')
parcels_df = gpd.read_file('../data/Parcels_shape/')

# Find buildings in parcels

In [None]:
buildings_df_temp = buildings_df.copy()

In [None]:
buildings_df_temp.geometry = buildings_df_temp.geometry.centroid
buildings_df_temp = buildings_df_temp[buildings_df_temp.is_valid]
parcels_df = parcels_df[parcels_df.is_valid]

In [None]:
building_to_parcels = gpd.sjoin(buildings_df_temp[['OBJECTID','AddNum','Street','geometry']], parcels_df[['MBL','geometry','AddNum','Street']], how = 'inner', op = 'within')


In [None]:
len(building_to_parcels)

In [None]:
len(buildings_df)

In [None]:
buildings_df = buildings_df.merge(building_to_parcels[['OBJECTID','MBL']])

# Find adjacent parcels

In [None]:
parcel_to_neighbours = gpd.sjoin(parcels_df, parcels_df, how="inner", op='intersects')

In [None]:
parcel_to_neighbours = parcel_to_neighbours[['MBL_left','MBL_right']]

In [None]:
parcel_to_neighbours

# Get Neighbouring Buildings

In [None]:
neighbouring_buildings = (
    buildings_df
    .merge(parcel_to_neighbours, left_on = 'MBL', right_on = 'MBL_left')
    .merge(buildings_df, left_on = 'MBL_right', right_on = 'MBL')
)

# Get Distances

In [None]:
has_valid_MBL_pair = (
    ~ neighbouring_buildings.MBL_left.isna() 
    & ~ neighbouring_buildings.MBL_right.isna() 
    & (neighbouring_buildings.MBL_right != neighbouring_buildings.MBL_left)
)
neighbouring_buildings = neighbouring_buildings[has_valid_MBL_pair]

In [None]:
neighbouring_buildings['dist'] = neighbouring_buildings.apply(
    lambda row: row['geometry_x'].distance(row['geometry_y']), axis = 1
)  

In [None]:
neighbouring_buildings = neighbouring_buildings[neighbouring_buildings.dist != 0]

In [None]:
neighbouring_buildings.head()

In [None]:
nearest_neighbors = (
    neighbouring_buildings
    .groupby('OBJECTID_x')['dist']
    .nsmallest(2)
    .reset_index()
    .groupby('OBJECTID_x')['dist']
    .apply(list)
    .reset_index()
)

In [None]:
nearest_neighbors.head()

In [None]:
buildings_df = buildings_df.merge(nearest_neighbors, left_on = 'OBJECTID', right_on = 'OBJECTID_x')

# Clean and Save

In [None]:
def get_kth(l, k):
    l = sorted(l)
    try:
        return l[k]
    except (TypeError, IndexError):
        return np.nan

buildings_df['1ST_CLOSEST'] = buildings_df['dist'].apply(lambda x: get_kth(x, 0))
buildings_df['2ND_CLOSEST'] = buildings_df['dist'].apply(lambda x: get_kth(x, 1))

In [None]:
buildings_df.columns

In [None]:
buildings_df[['AddNum', 'Street','MBL', '1ST_CLOSEST', '2ND_CLOSEST']].to_csv('../data/buildings_clean.csv')