In [None]:
import os
import glob
import geopandas as gpd
from pathlib import Path

def merge_shapefiles(input_directory, output_shapefile):
    """
    Recursively finds all shapefiles in the given directory and its subdirectories,
    and merges them into a single output shapefile.
    
    Args:
        input_directory (str): Directory to search for shapefiles
        output_shapefile (str): Path to the output merged shapefile
    """
    print(f"Searching for shapefiles in: {input_directory}")
    
    # Use recursive glob to find all .shp files in directory and subdirectories
    all_shapefiles = glob.glob(os.path.join(input_directory, "**", "*.shp"), recursive=True)
    
    if not all_shapefiles:
        print("No shapefiles found!")
        return
    
    print(f"Found {len(all_shapefiles)} shapefiles:")
    for shp in all_shapefiles:
        print(f"  - {shp}")
    
    # Create an empty list to store GeoDataFrames
    gdfs = []
    
    # Read each shapefile into a GeoDataFrame and append to our list
    for shapefile in all_shapefiles:
        try:
            print(f"Reading: {shapefile}")
            gdf = gpd.read_file(shapefile)
            print(f"  - Found {len(gdf)} features with CRS: {gdf.crs}")
            gdfs.append(gdf)
        except Exception as e:
            print(f"  - Error reading {shapefile}: {e}")
    
    if not gdfs:
        print("No valid shapefiles could be read!")
        return
    
    # Concatenate all GeoDataFrames
    print("Merging shapefiles...")
    merged_gdf = gpd.pd.concat(gdfs, ignore_index=True)
    
    # Create output directory if it doesn't exist
    output_dir = os.path.dirname(output_shapefile)
    if output_dir and not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # Save the merged GeoDataFrame to a new shapefile
    print(f"Saving merged shapefile to: {output_shapefile}")
    merged_gdf.to_file(output_shapefile)
    
    print(f"Successfully merged {len(gdfs)} shapefiles into {output_shapefile}")
    print(f"Total features in output: {len(merged_gdf)}")

if __name__ == "__main__":
    # Set the input directory (directory containing shapefiles and subdirectories)
    input_dir = "../Hackathon-Daten/Datenquellen/Solarpotenzial"  # Current directory - modify this if needed
    
    # Set the output shapefile path
    output_shapefile = "../Hackathon-Daten/SolarMerged/potenzial.shp"
    
    # Run the merge function
    merge_shapefiles(input_dir, output_shapefile)