In [None]:
#Processing the DSM-DTM data within each building polygon (computational intensive)

import geopandas as gpd
import os
import rasterio
from rasterstats import zonal_stats
import math

# Directory paths
vector_dir = 'D:/FOLDER FROM THESIS/THESIS/Data/VektorDanmark/Bygning total simple'
raster_file_path = 'D:/FOLDER FROM THESIS/THESIS/Processed data/DSMminusDTM/dsmdtm.tif'
output_dir = 'D:/FOLDER FROM THESIS/THESIS/Processed data/DSMminusDTM/batched'


# Function to process buildings in batches and save as separate files
def process_shapefile_in_batches(shapefile_path, raster_path, output_dir, batch_size=400000):
    # Open the raster file
    with rasterio.open(raster_path) as raster_src:
        # Load the shapefile into a GeoDataFrame
        buildings_gdf = gpd.read_file(shapefile_path)
        total_buildings = len(buildings_gdf)
        print(f"Total buildings to process: {total_buildings}")

        # Calculate the number of batches needed
        num_batches = math.ceil(total_buildings / batch_size)
        
        # Process each batch
        for batch in range(num_batches):
            # Define batch range
            start_index = batch * batch_size
            end_index = min((batch + 1) * batch_size, total_buildings)
            print(f"Processing batch {batch + 1} of {num_batches}, buildings {start_index + 1} to {end_index}")
            
            # Process the batch
            buildings_batch = buildings_gdf.iloc[start_index:end_index].copy()

            # Initialize a list to store the mean height values for the batch
            height_means = []

            # Loop over each building in the batch to calculate height
            for idx, building in buildings_batch.iterrows():
                window = rasterio.windows.from_bounds(*building.geometry.bounds, transform=raster_src.transform)
                raster_window = raster_src.read(1, window=window)
                affine_trans = raster_src.window_transform(window)
                # Calculate zonal stats for the current building polygon
                stats = zonal_stats(building.geometry, raster_window, affine=affine_trans, stats="mean", nodata=raster_src.nodata, all_touched=True)
                height_means.append(stats[0]['mean'] if stats else None)

            # Add the mean height values to the current batch's GeoDataFrame subset
            buildings_batch['height_mean'] = height_means

            # Construct the batch's output file path
            batch_output_path = os.path.join(output_dir, f"batch_{batch + 1}_{os.path.basename(shapefile_path)}")
            
            # Save the batch's data to a new shapefile
            buildings_batch.to_file(batch_output_path)
            print(f"Saved batch {batch + 1} to {batch_output_path}")

# Process each shapefile
for filename in os.listdir(vector_dir):
    if filename.endswith('.shp'):
        print(f"Processing {filename}...")
        shp_file_path = os.path.join(vector_dir, filename)
        process_shapefile_in_batches(shp_file_path, raster_file_path, output_dir)

print("Finished processing all shapefiles.")
