# Vector holes eraser.
- Iterate over every polygon and delete the internal holes
- A limit to the hole size can be set

In [1]:
import geopandas as gpd
from shapely import wkt
from shapely.geometry import Polygon, MultiPolygon
import os
import numpy as np

In [2]:
def convert_coords_gdf(gdf):
    def convert_coords(geom):
        if isinstance(geom, Polygon):
            exterior_coords = np.array(geom.exterior.coords)
            exterior_xy_coords = exterior_coords[:, :2]  # Extract only the first two columns (X and Y)

            interior_coords = [np.array(interior.coords)[:, :2] for interior in geom.interiors]
            interior_xy_coords = [coords for coords in interior_coords]  # Extract only the first two columns (X and Y) for each interior ring

            transformed_exterior = Polygon(exterior_xy_coords, holes=interior_xy_coords)

            return transformed_exterior
        elif isinstance(geom, MultiPolygon):
            parts = [convert_coords(part) for part in geom.geoms] # Esto es una función recursiva
            return MultiPolygon(parts)
        else:
            raise ValueError("Unsupported geometry type")

    gdf['geometry'] = gdf['geometry'].apply(convert_coords)
    return gdf


def get_all_interiors(gdf):
    multipolygon_wkt = gdf['geometry']
    multipolygon = wkt.loads(str(multipolygon_wkt[0]))

    all_interiors = []
    for polygon in multipolygon.geoms:
        for interior in polygon.interiors: # Interior es un linestring, no se puede calcular area, solo longitud
            p = Polygon(interior)

            all_interiors.append(p.area)
    
    # interior_lengths = sorted([interior.length for interior in all_interiors])       
    return sorted(all_interiors)

def holes_filter(gdf, epsg, hole_area):
    multipolygon_wkt = gdf['geometry']
    multipolygon = wkt.loads(str(multipolygon_wkt[0]))

    list_parts = []
    for polygon in multipolygon.geoms:
        list_interiors = []

        for interior in polygon.interiors: # Interior es un linestring, no se puede calcular area, solo longitud
            p = Polygon(interior)
            if p.area > hole_area: # Filter by the size
                list_interiors.append(interior)
        
        # Put all together
        temp_pol = Polygon(polygon.exterior, holes=list_interiors)
        list_parts.append(temp_pol)
        
    new_multipolygon = MultiPolygon(list_parts)
    no_holes_gdf = gpd.GeoDataFrame(geometry=[new_multipolygon], crs=gdf.crs) # Get the crs of the input gdf
    
    # Copy all columns and values from the input gdf
    for col in gdf.columns:
        if col != 'geometry':
            no_holes_gdf[col] = gdf[col].iloc[0]
    return no_holes_gdf

In [3]:
vector_file = r"Z:\z_resources\ruben\tin_buildings\ken_adm3_agg_dissolv.shp"
# vector_file = r"Z:\data\im-nca-germany\bfn_grosslandschaften\test_2.shp"
output_path = os.path.dirname(vector_file)
epsg = 25832 # Projection of all the inputs 

In [4]:
gdf = gpd.read_file(vector_file)
# Delete the third dimension
has_z = gdf.geometry.has_z # this is a series of [0]
if has_z[0]:
    print("We have the z dimension")
    gdf = convert_coords_gdf(gdf)

interior_areas = get_all_interiors(gdf)

hole_length = 7000000

In [5]:
# See the results
interior_areas

[]

In [6]:
no_holes_gdf = holes_filter(gdf, epsg, hole_length)
no_holes_gdf.to_file(os.path.join(output_path, "final.shp"))