# Source Node
0 -> 1

Documentation: http://geopandas.org/io.html

*Let op:* Niet vergeten `folder` en `filename` aan te passen!

**Extra info: projecties**
* *WGS84*
    * https://epsg.io/4326
    * Amersfoort = `(52.1561110, 5.3878270)`
    * In graden
    * ~ 110km/degree longitude (horizontaal)
    * ~ 70km/degree latitude (verticaal)
* *RD New Amersfoort*
    * https://epsg.io/28992
    * Amersfoort = `(142892.19, 470783.87)`
    * In meters

In [1]:
###
### USER DEFINED
###

# Gebruik ALTIJD raw strings (r''). Backslash (\) is de Python-escape-karakter.
folder = r'/home/ab/i/Open-data/shapefiles/shp-provincie'
filename = r'provincie-grenzen.shp'

In [2]:
###
### HEADER
###
import geopandas as gpd
import pandas as pd
from shapely import wkt as WKT
import re
import numpy as np

# geopandas (geometry) to pandas (wkt)
def gdfToDf(gdf):
    df = pd.DataFrame(gdf, copy=True)
    df['wkt'] = gdf.geometry.apply(lambda wkt: WKT.dumps(wkt, trim=True))
    df.drop(columns='geometry', inplace=True)
    return df

# pandas (wkt) to geopandas (geometry)
def dfToGdf(df):
    gdf = gpd.GeoDataFrame(df, copy=True)
    gdf['geometry'] = df.wkt.apply(WKT.loads)
    gdf.drop(columns='wkt', inplace=True)
    return gdf

# bereken wat extra info (vind ik interessant)
def extractInfo(wkt, *args):
    # calculate specs
    info = {'points': len(wkt.split(',')),\
            'chars': len(wkt),\
            'precision': np.mean([len(decimals) for decimals in re.findall('\.([0-9]*)', wkt)])}
    # return dictionary or list with values of one spec
    if not args:
        return info
    else:
        return info.get(args[0])

In [3]:
###
### SOURCE
###
from os.path import join

# read the file
print('Reading file:', join(folder, filename))
gdf = gpd.read_file( join(folder, filename) )

# re-project to coordinate reference system (CRS), uncomment the one which is needed
#gdf = gdf.to_crs(epsg=28992)    # RD New Amersfoort
gdf = gdf.to_crs(epsg=4326)    # WGS84

# output
output_table = gdfToDf(gdf)

Reading file: /home/ab/i/Open-data/shapefiles/shp-provincie/provincie-grenzen.shp


In [9]:
###
### FOOTER
###
print('Shape output_table:', output_table.shape)
print('Preview output_table (first 5 rows):')
try:
    # try pretty print in Jupyter Notebook
    display(output_table.head())
except NameError:
    print(output_table.head())
    
# print extra info
print('Average # points/polygon: %.1f' % np.mean(output_table.wkt.apply(extractInfo, args=('points',))))
print('Average # characters/WKT: %.1f' % np.mean(output_table.wkt.apply(extractInfo, args=('chars',))))
print('Average precision: %.1f decimals' % np.mean(output_table.wkt.apply(extractInfo, args=('precision',))))

Shape output_table: (380, 3)
Preview output_table (first 5 rows):


Unnamed: 0,Gemeente_c,Gemeente_n,wkt
0,10,Delfzijl,"MULTIPOLYGON (((6.984386000000001 53.318369, 6..."
1,106,Assen,"POLYGON ((6.6345 53.0096, 6.63448 53.00955, 6...."
2,109,Coevorden,"POLYGON ((6.872629 52.654037, 6.872506 52.6532..."
3,114,Emmen,"POLYGON ((7.088798 52.832941, 7.08528 52.82852..."
4,118,Hoogeveen,"POLYGON ((6.63313 52.707555, 6.629633 52.70688..."


Average # points/polygon: 1045.7
Average # characters/WKT: 20893.4
Average precision: 6.0 decimals


## Testing and showing results
Don't add this in KNIME

In [None]:
%matplotlib inline
gdf.plot()