# Simplify Node
1 -> 1

Documentation: https://toblerity.org/shapely/manual.html#object.simplify

~~*Note:* make sure to edit `simplifyThreshold`!~~ Niet nodig, want nu zit er een dynamische simplificatie in met als threshold 0.1% van de omtrek van de polygoon.

In [None]:
###
### USER DEFINED
###

# Threshold in dezelfde eenheid als de projectie
#   * RD New: meters
#   * WGS84: graden. In Nederland 1 m ~= 1e-5 graden
#simplifyThreshold = 1e-5    # niet nodig met dynamic simplify!

In [120]:
###
### HEADER
###
import geopandas as gpd
import pandas as pd
from shapely import wkt as WKT
import re
import numpy as np

# geopandas (geometry) to pandas (wkt)
def gdfToDf(gdf):
    df = pd.DataFrame(gdf, copy=True)
    df['wkt'] = gdf.geometry.apply(WKT.dumps, trim=True)
    df.drop(columns='geometry', inplace=True)
    return df

# pandas (wkt) to geopandas (geometry)
def dfToGdf(df):
    gdf = gpd.GeoDataFrame(df, copy=True)
    gdf['geometry'] = df.wkt.apply(WKT.loads)
    gdf.drop(columns='wkt', inplace=True)
    return gdf

# bereken wat extra info (vind ik interessant)
def extractInfo(wkt, *args):
    # calculate specs
    info = {'points': len(wkt.split(',')),\
            'chars': len(wkt),\
            'precision': np.mean([len(decimals) for decimals in re.findall('\.([0-9]*)', wkt)])}
    # return dictionary or list with values of one spec
    if not args:
        return info
    else:
        return info.get(args[0])

In [123]:
###
### SIMPLIFY
###

# input
gdf = dfToGdf(input_table)
print('Shape input_table:', input_table.shape)
print('Average # points/polygon: %.1f' % np.mean(input_table.wkt.apply(extractInfo, args=('points',))))

# simplify with fixed threshold
#gdf.geometry = gdf.geometry.simplify(simplifyThreshold, preserve_topology=True)

# dynamic simplify. Threshold is 0.1% van de omtrek
dynamicSimplify = lambda geom: geom.simplify(geom.length*0.001, preserve_topology=True)
gdf.geometry = gdf.geometry.apply(dynamicSimplify)

# output
output_table = gdfToDf(gdf)
print('Shape output_table:', output_table.shape)
print('Average # points/polygon: %.1f' % np.mean(output_table.wkt.apply(extractInfo, args=('points',))))

Shape input_table: (12, 3)
Average # points/polygon: 12379.6
Shape output_table: (12, 3)
Average # points/polygon: 163.2


In [124]:
###
### FOOTER
###
print('Preview output_table (first 5 rows):')
try:
    # try pretty print in Jupyter Notebook
    display(output_table.head())
except NameError:
    print(output_table.head())

Preview output_table (first 5 rows):


Unnamed: 0,id,provincien,wkt
0,1,Noord-Holland,MULTIPOLYGON (((5.16595803513613 53.0011795601...
1,2,Groningen,MULTIPOLYGON (((6.287547703816826 53.342509935...
2,3,Overijssel,MULTIPOLYGON (((6.110121318419511 52.441553119...
3,4,Zeeland,MULTIPOLYGON (((3.83923160158486 51.7592703536...
4,5,Friesland,MULTIPOLYGON (((6.191971949520441 53.412179416...


## Testing and showing results
Don't add this in KNIME

In [None]:
%matplotlib inline
gdf.plot()

## Load some input data to test the cells above
Run `Header` node first!

In [122]:
###
### SOURCE - ONLY FOR PREPARATION
###
from os.path import join
folder = '/home/ab/i/Open-data/shapefiles/shp-provincie'
filename = 'provincie-grenzen.shp'

# read the file
gdf = gpd.read_file( join(folder, filename) )
gdf = gdf.to_crs(epsg=4326)    # WGS84

# output
output_table = gdfToDf(gdf)

# copy output to input
input_table = output_table.copy()

## Onderzoek hoeveel punten willen we per polygon?
Om zo tot max 32k characters per WKT te komen

In [None]:
# pak even rij nummer n
n = 3
wkt0 = output_table.loc[n, 'wkt']
geom = gdf.loc[n, 'geometry']

nChars = len(wkt0)
nPoints = len(geom.exterior.coords.xy[0])
print('Amount of characters in WKT:', nChars)
print('Amount of points in polygon:', nPoints)
print('Amount of characters per point:', nChars/nPoints)
print('Length (perimeter):', geom.length)

excelMax = 32767    # chars per cell
maxPoints = excelMax/(nChars/nPoints)
print('Approx. max points per polygon for Excel:', maxPoints)
geom

In [None]:
# simplify met 0.1% van omtrek
O = geom.length*0.001
geomS = geom.simplify(O, preserve_topology=True)

nChars = len(geomS.wkt)
nPoints = len(geomS.exterior.coords.xy[0])
print('Amount of characters in WKT:', nChars)
print('Amount of points in polygon:', nPoints)
geomS