In [None]:
import pandas as pd
import geopandas as gpd
from shapely import wkb, wkt
from tqdm import tqdm

# --- Load data ---
path = r"INPUT"
output_file = r"OUTPUT"
df = pd.read_parquet(path)

#if df['POLYGON_COORDINATES'].dtype == object:
#    df['POLYGON_COORDINATES'] = df['POLYGON_COORDINATES'].apply(wkb.loads)

gdf = gpd.GeoDataFrame(df, geometry=df['POLYGON_COORDINATES'].apply(wkt.loads), crs="EPSG:4326")
gdf = gdf.to_crs("EPSG:3857")
gdf.insert(0, column='FDR_PART', value=0.0)
gdf.insert(0, column='VOLUME_PART', value=0.0)
gdf["FDR_PART"] = gdf[["PERIMETER_IN_METERS","HEIGHT"]].apply(lambda building: building.PERIMETER_IN_METERS*building.HEIGHT, axis = 1)
gdf["VOLUME_PART"] = gdf[["AREA_IN_METERS","HEIGHT"]].apply(lambda building: building.AREA_IN_METERS*building.HEIGHT, axis = 1)
target_gdf = gdf.copy()
all_gdf = gdf.copy()
print(gdf.columns)

# --- Build spatial index once ---
sindex = all_gdf.sindex

# --- Set distances ---
distances = [100]

# --- Batch processing ---
batch_size = 100  # process 100 points at a time 
results = []

for start in tqdm(range(0, len(target_gdf), batch_size), desc="Processing batches"):
    end = min(start + batch_size, len(target_gdf))
    target_batch = target_gdf.iloc[start:end].copy()

    for dist in distances:
        target_batch[f"building_density_{dist}"] = 0
        target_batch[f"FDR"] = 0.0
        target_batch[f"BUILT_UP_VOLUME"] = 0.0

        # create buffers for the batch
        buffer_batch = target_batch.copy()
        buffer_batch["geometry"] = buffer_batch.geometry.buffer(dist)

        # pre-filter all_gdf using spatial index for each buffer
        possible_indices = set()
        for geom in buffer_batch.geometry:
            possible_indices.update(list(sindex.intersection(geom.bounds)))
        candidates = all_gdf.iloc[list(possible_indices)]

        # exact spatial join using within to match original results
        buffer_batch = buffer_batch.drop(columns=["index_right"], errors="ignore")
        candidates = candidates.drop(columns=["index_right"], errors="ignore")

        joined = gpd.sjoin(candidates, buffer_batch, how="inner", predicate="within")
        counts = joined.groupby("index_right").size()
        fdr = joined.groupby("index_right")["FDR_PART_right"].sum()/(dist*dist)
        volume = joined.groupby("index_right")["VOLUME_PART_right"].sum()/(dist*dist)
        target_batch.loc[counts.index, f"building_density_{dist}"] = counts.values
        target_batch.loc[fdr.index, f"FDR"] = fdr.values
        target_batch.loc[fdr.index, f"BUILT_UP_VOLUME"] = volume.values

    results.append(target_batch)

# --- Concatenate all batches ---
target_gdf = pd.concat(results)
target_gdf = target_gdf.to_crs("EPSG:4326")
target_gdf.to_parquet(output_file)
print(target_gdf.head())


Index(['VOLUME_PART', 'FDR_PART', 'ID', 'LATITUDE', 'LONGITUDE',
       'POLYGON_COORDINATES', 'AREA_IN_METERS', 'FOOTPRINT_SOURCE', 'HEIGHT',
       'HEIGHT_MEDIAN', 'HEIGHT_MEAN', 'HEIGHT_MAX', 'VIDA_CONFIDENCE',
       'URBAN_SPLIT', 'GHSL_SMOD', 'FLOORS', 'GFA_IN_METERS',
       'PERIMETER_IN_METERS', 'BUILDING_FACES', 'ELEVATION', 'geometry'],
      dtype='object')


Processing batches: 100%|██████████| 3535/3535 [02:44<00:00, 21.55it/s]


   VOLUME_PART    FDR_PART                       ID   LATITUDE  LONGITUDE  \
0    902.13525  304.502376  92.36907384:23.39320112  23.393201  92.369074   
1    376.99920  165.556634  92.36969899:23.39298631  23.392986  92.369699   
2    252.91980  134.966273  92.36981597:23.39312758  23.393128  92.369816   
3    176.99355  114.142107  92.37755474:23.38434691  23.384347  92.377555   
4    229.08000  171.102456  92.37709417:23.38461214  23.384612  92.377094   

                                 POLYGON_COORDINATES  AREA_IN_METERS  \
0  POLYGON ((92.36912901 23.39312183, 92.36908839...        200.4745   
1  POLYGON ((92.36975045 23.39295130, 92.36972617...         83.7776   
2  POLYGON ((92.36985742 23.39309879, 92.36984568...         56.2044   
3  POLYGON ((92.37756196 23.38430653, 92.37759453...         39.3319   
4  POLYGON ((92.37710023 23.38458357, 92.37713380...         30.5440   

  FOOTPRINT_SOURCE  HEIGHT  HEIGHT_MEDIAN  ...      GHSL_SMOD  FLOORS  \
0           google     4.5     