In [1]:
import geopandas as gpd
import pandas as pd
from shapely.ops import unary_union

# Notebook overview

In this notebook we merge the filtered crosswalks polygons with their processed extensions. 

**Input**: 
- Processed crosswalk polygons as created in notebook 3.
- Processed extended polygons as created in notebook 4.

**Output**: Shapely file with the final, fully processed crosswalk polygons.

**Previous notebook**: 4. Extended polygon filtering.

# Load data

In [2]:
# Load polygons without extension
CW_polygons = gpd.read_file("../data/output/filtered polygons.shp")

In [3]:
# Load processed extensions
extension_polygons = gpd.read_file("../data/output/filtered extended polygons.shp")

In [4]:
# Function to match the extended polygons with the original crosswalk polygon they stem from
def match_extension(extension_polygons, CW_polygons): 
    matches = []

    # Loop over extended polygons
    for index, row in extension_polygons.iterrows():
        match = []

        # Loop over original crosswalk polygons
        for index2, row2 in CW_polygons.iterrows():

            # Check if the polygons intersect
            if row['geometry'].intersects(row2['geometry']):
                match.append(row2['geometry'])
        matches.append([row['geometry'], match])

    return matches

In [5]:
matches = match_extension(extension_polygons, CW_polygons)

In [6]:
# Function to merge polygons
def merge_polygons(matches): 
    final_pols = []

    # Loop over list with matches
    for match in matches:
        final = match[0]

        # If there is more than one item, merge them
        if len(match[1]) > 0:
            for m in match[1]:
                final = final.union(m)
                
        final_pols.append(final)
    return final_pols

In [7]:
final_pols = merge_polygons(matches)

In [8]:
# Create GeoDataFrame of final polygons
df = pd.DataFrame({"geometry": final_pols})
polygons_gdf = gpd.GeoDataFrame(df, crs='epsg:28992', geometry='geometry')

In [9]:
# Function to merge polygons that overlap 
# This is necessary as due to how the processing works, we sometimes end up with multiple polygons for the same crosswalk
def merge_intersecting_polygons(gdf):
    merged_polygons = []
    seen = set()

    for idx, row in gdf.iterrows():
        if idx not in seen:
            intersecting = gdf[gdf.geometry.intersects(row.geometry)]
            merged_polygon = unary_union(intersecting.geometry)
            merged_polygons.append(merged_polygon)
            seen.update(intersecting.index)

    return merged_polygons

In [10]:
# Merge intersecting polygons
merged_polygons = merge_intersecting_polygons(polygons_gdf)

# Convert the merged polygons list back into a GeoDataFrame
polygons_gdf = gpd.GeoDataFrame(geometry=merged_polygons, crs=polygons_gdf.crs)

In [11]:
# Save final polygons
path = "../data/output/final merged polygons.shp"
polygons_gdf.to_file(path)