# Simplify multipolygon
We are going to simplify a multipolygon:
- Simplify the multipolygon
- Make sure that the boundaries are respected

- import layers:
    -  multipolygon layer

- Documentation:
    - geopandas simplify: https://geopandas.org/en/stable/docs/reference/api/geopandas.GeoSeries.simplify.html
    - shapely simplify: https://shapely.readthedocs.io/en/latest/manual.html#object.simplify

In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
from shapely.geometry import Polygon, MultiPolygon, Point
from shapely.ops import snap
import os
from sys import path
import matplotlib.pyplot as plt
import topojson as tp

In [2]:
"""Check memory usage"""
# https://stackoverflow.com/questions/39100971/how-do-i-release-memory-used-by-a-pandas-dataframe
import psutil
def usage():
    process = psutil.Process(os.getpid())
    return print("Memory status: ", process.memory_info()[0] / float(2 ** 20))


In [3]:
"""Read the input as a geodataframe"""
input_path = r"Z:\z_resources\ruben\gadm_col2\gadm41_col_2.shp"

gdf = gpd.read_file(input_path)
usage()

Memory status:  225.1484375


In [4]:
"""
Here we check the geometry type and the number of polygons of the import file.
Idea taken from: https://stackoverflow.com/questions/60780959/how-to-filter-a-geodataframe-by-geometry-type
"""
geometry_dic = {}
geometry_type = gdf.geom_type.unique().tolist() #we create a list of unique geometries
if len(geometry_type) > 0: #if there are more than one type we list them
    for i in geometry_type:
        geometry_gdf = gdf[gdf.geom_type == i] #new df with the geometry
        geometry_rows = geometry_gdf.shape[0] #number of elements
        geometry_dic.update({i:geometry_rows}) #add to dic
        
        
print(geometry_dic)
usage()
# geometry_str = ''.join(geometry_gdf.geom_type.unique().tolist()) #transform the list to string

{'Polygon': 1089, 'MultiPolygon': 30}
Memory status:  225.59765625


# Simplification by Topojson

In [None]:
"""Basic concept, don't execute"""
# https://gis.stackexchange.com/questions/325766/geopandas-simplify-results-in-gaps-between-polygons
unit = 0.005
topo = tp.Topology(gdf.to_crs(epsg='4326'), prequantize=False)
simplification = topo.toposimplify(unit).to_gdf()
simplification.plot()

Here we are going to compare different preccision units and see the differences

In [5]:
def get_area(gdf):
    # https://gis.stackexchange.com/questions/218450/getting-polygon-areas-using-geopandas
    if gdf.crs.is_geographic is True: # projection must be projected to calculate the area in meters
        # gdf_copy = gdf.copy() # make a copy of the df
        """https://epsg.io/ Search for the optimal projection of the country"""
        # For specific areas, you might want to use a local CRS
        # Here, we use EPSG:3857 (Web Mercator) for simplicity
        gdf = gdf.to_crs("EPSG:3857")

    calculated_area = gdf.area / 10**6 # Calculate the area in square meters and convert to square kilometers
    return calculated_area.iloc[0] # We take the first element of the series.

def get_number_of_vertex(gdf):
    # https://gis.stackexchange.com/questions/328884/counting-number-of-vertices-in-geopandas
    for x, row in gdf.iterrows():
        geom = row.geometry
        if geom.geom_type == "MultiPolygon":
            n = 0
            # iterate over all parts of multigeometry
            for polygon in geom.geoms:
                n += len(polygon.exterior.coords)
        else:
            n = len(row.geometry.exterior.coords) 
    return n

In [6]:
"""Prepare the df and get the intial data parameters"""
info = [] # Create empty list
df_output = pd.DataFrame([], columns=['precision', 'size_km2', 'area_percentage', 'number_of_vertex', 'vertex_percentage', 'area_vertex_ratio']) # create empty dataframe

"""init value of list"""
info.append(0)

"""get the area"""
initial_calc_area = get_area(gdf)
info.append(initial_calc_area)

"""area_percentage"""
info.append(100)

"""get number of coords"""
initial_vertex_number = get_number_of_vertex(gdf)
info.append(initial_vertex_number)

"""init percentage"""
info.append(100)

"""area_ratio"""
info.append(initial_calc_area/initial_vertex_number)

"""concat the info into the dataframe"""
df_output = pd.concat([df_output, pd.DataFrame([info], columns=['precision', 'size_km2','area_percentage', 'number_of_vertex', 'vertex_percentage', 'area_vertex_ratio'])], axis=0)

"""empty memory"""
# del gdf
usage()

Memory status:  227.9375


In [8]:
"""make an output for different levels"""

topo = tp.Topology(gdf.to_crs(epsg='4326'), prequantize=False)

# units = [0.00005, 0.00015, 0.00020]
# units = [0.000005, 0.000015, 0.000020]
units = [0.000005, 0.000015, 0.00002, 0.00005, 0.00015, 0.0002] # Descending order

info = []

for i in units:
    info.append(i) #append the unit
    simplification = topo.toposimplify(i).to_gdf() #do the simplification
        
    """get the area"""
    calc_area = get_area(simplification)
    info.append(calc_area)

    """get the area percentage"""
    info.append(calc_area * 100 / initial_calc_area)
    
    """get number of vertex"""
    vertex_number = get_number_of_vertex(simplification)
    info.append(vertex_number)
    
    """get the percentage"""
    info.append(vertex_number * 100 / initial_vertex_number)

    """area_ratio"""
    info.append(calc_area/vertex_number)
    
    """Pass the list values to a df"""
    df_output = pd.concat([df_output, pd.DataFrame([info], columns=['precision', 'size_km2', 'area_percentage', 'number_of_vertex', 'vertex_percentage', 'area_vertex_ratio'])], axis=0)
    info = [] #empty the list
    
    """Export of the data"""
    simplification.to_file('\\'.join((input_path.split('\\'))[0:-1]) + '\colombia_simplified_level_2_{}.shp'.format(i),index=False)
    print("finished: ", i)
    """Release memory"""
    # source: https://stackoverflow.com/questions/39100971/how-do-i-release-memory-used-by-a-pandas-dataframe
    
    

finished:  5e-06
finished:  1.5e-05
finished:  2e-05
finished:  5e-05
finished:  0.00015
finished:  0.0002


# Simplification by Geopandas

In [4]:
"""It wont work for multipolygons since the algorithm is not topology aware"""
# Issue source:  https://gis.stackexchange.com/questions/325766/geopandas-simplify-results-in-gaps-between-polygons
gdf_simplified = gdf.simplify(100, preserve_topology=True)
# 7 minutes topology false
# 10 minutes topology true