In [1]:
#import packages and functions
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd
import os
import fiona
import shapely
from shapely.geometry import Polygon, MultiPolygon, shape, Point


In [2]:
from general_functions import *

def convert_3D_2D(geometry):
    '''
    Takes a GeoSeries of 3D Multi/Polygons (has_z) and returns a list of 2D Multi/Polygons
    '''
    new_geo = []
    for p in geometry:
        if p.has_z:
            if p.geom_type == 'Polygon':
                lines = [xy[:2] for xy in list(p.exterior.coords)]
                new_p = Polygon(lines)
                new_geo.append(new_p)
            elif p.geom_type == 'MultiPolygon':
                new_multi_p = []
                for ap in p:
                    lines = [xy[:2] for xy in list(ap.exterior.coords)]
                    new_p = Polygon(lines)
                    new_multi_p.append(new_p)
                new_geo.append(MultiPolygon(new_multi_p))
    return new_geo

def convert_to_polygon(line):
    if line.is_ring:  # Checks if the LineString is closed
        return Polygon(line)
    else:
        return line  # Returns the line string as is if it's not closed


In [3]:
pd.set_option('display.max_columns', None)

In [4]:
#define relative paths
waterschap = 'Zuiderzeeland'
path_zzl = '..\..\Data_preprocessed\Waterschappen\Zuiderzeeland'
output_gpkg_path = "../../Data_postprocessed/Waterschappen/Zuiderzeeland"

# Zuiderzeeland

In [5]:
Zuiderzeeland = {}

Zuiderzeeland['gemaal'] = gpd.read_file(path_zzl + '\gemalen.gpkg')
Zuiderzeeland['hevels'] = gpd.read_file(path_zzl + '\overigekunstwerken.gpkg')
# Zuiderzeeland['peilgebied'] = gpd.read_file(path_zzl + '\peilgebieden.gpkg')
Zuiderzeeland['peilgebied'] = gpd.read_file(path_zzl + '\peilvakken_nalevering.gpkg')


#use fiona for the duikersifonhevels and watergangen due to unexpted geometry types
with fiona.open(path_zzl + '/Duikers.gpkg', 'r') as file:
    # Read the contents and store them in the GeoDataFrame
    Zuiderzeeland['duikersifonhevel'] = gpd.GeoDataFrame.from_features(file, crs = 'EPSG:28992')
    
with fiona.open(path_zzl + '/zzl_watergangen_nalevering/zzl_Watergangen.shp', 'r') as file:
    # Read the contents and store them in the GeoDataFrame
    Zuiderzeeland['hydroobject'] = gpd.GeoDataFrame.from_features(file)

In [6]:
Zuiderzeeland['hydroobject'] = Zuiderzeeland['hydroobject'].set_crs(crs = 'WGS84', allow_override=True)
Zuiderzeeland['hydroobject'] = Zuiderzeeland['hydroobject'].to_crs(crs = 'EPSG:28992')

In [7]:
KWKSOORT_stuw = ['Constructie',
                 'inlaat',
                 'uitlaat',
                 'keerwand'] #gebasseerd op de geleverde data van Zuiderzeeland

Zuiderzeeland['stuw'] = Zuiderzeeland['hevels'].loc[Zuiderzeeland['hevels']['KWKSOORT'].isin(KWKSOORT_stuw)].reset_index(drop=True)
Zuiderzeeland['stuw'].geometry = Zuiderzeeland['stuw'].centroid #prevent pointZ geometries

In [8]:
#distinguish multiple parameters from the same gpkg
Zuiderzeeland['afsluitmiddel'] = Zuiderzeeland['hevels'].loc[Zuiderzeeland['hevels']['KWKSOORT'] == 'Afsluitmiddel (groot)'].reset_index(drop=True)
Zuiderzeeland['hevels'] = Zuiderzeeland['hevels'].loc[Zuiderzeeland['hevels']['KWKSOORT'] == 'Hevel'].reset_index(drop=True)

In [9]:
#determine aanvoer en afvoer gemalen
Zuiderzeeland['gemaal']['func_aanvoer'], Zuiderzeeland['gemaal']['func_afvoer'], Zuiderzeeland['gemaal']['func_circulatie']  = False, False, False #default is False
Zuiderzeeland['gemaal']['functiegemaal'] = Zuiderzeeland['gemaal']['KGMFUNC'].astype(str) 
Zuiderzeeland['gemaal'].loc[Zuiderzeeland['gemaal']['functiegemaal'] == 'onbekend', 'functiegemaal'] = np.nan #replace onbekend with nan, will be filled up later see one line below
Zuiderzeeland['gemaal']['functiegemaal'].fillna(Zuiderzeeland['gemaal']['KGMSOORT'], inplace = True) #some additional is given in this column

Zuiderzeeland['gemaal'].loc[Zuiderzeeland['gemaal'].functiegemaal.str.contains('af-|afvoer|onderbemaling'), 'func_afvoer'] = True
Zuiderzeeland['gemaal'].loc[Zuiderzeeland['gemaal'].functiegemaal.str.contains('aanvoergemaal|opmaling'), 'func_aanvoer'] = True
Zuiderzeeland['gemaal'].loc[Zuiderzeeland['gemaal'].functiegemaal.str.contains('circulatie'), 'func_circulatie'] = True
Zuiderzeeland['gemaal'].loc[(Zuiderzeeland['gemaal'].func_afvoer == False) &
                       (Zuiderzeeland['gemaal'].func_aanvoer == False) &
                       (Zuiderzeeland['gemaal'].func_circulatie == False), 'func_afvoer'] = True #set to afvoergemaal is there the function is unknown

In [10]:
# Zuiderzeeland['peilgebied']['geometry'] = convert_3D_2D(Zuiderzeeland['peilgebied'].geometry)
Zuiderzeeland['peilgebied'] = Zuiderzeeland['peilgebied'].explode(ignore_index=True)
Zuiderzeeland['peilgebied'] = Zuiderzeeland['peilgebied'][Zuiderzeeland['peilgebied'].geometry.type.isin(['Polygon', 'MultiPolygon'])] #also only select polygons


In [16]:
from shapely.ops import unary_union,cascaded_union 

Zuiderzeeland['peilgebied'] = Zuiderzeeland['peilgebied'].explode(ignore_index=True)
Zuiderzeeland['peilgebied'] = Zuiderzeeland['peilgebied'][Zuiderzeeland['peilgebied'].geometry.type.isin(['Polygon', 'MultiPolygon'])] #also only select polygons


Zuiderzeeland['peilgebied']['area'] = Zuiderzeeland['peilgebied'].area
dissolved = Zuiderzeeland['peilgebied'].dissolve()
exterior = gpd.GeoDataFrame(geometry=dissolved['geometry'].boundary).to_crs(crs='EPSG:28992')
Zuiderzeeland['peilgebied'] = Zuiderzeeland['peilgebied'].to_crs(crs='EPSG:28992')
exterior = exterior.explode().reset_index(drop=True)
exterior['geometry'] = exterior['geometry'].apply(convert_to_polygon)
exterior['area'] = exterior.area
exterior = exterior.sort_values(by='area', ascending=False).reset_index(drop=True)
exterior = exterior.iloc[2::] #-2 as all rows should be taken into consideration, except the last two. These two polygons represent the flevopolder and the noordoost polder
exterior = exterior.reset_index(drop=True)


# new_gdf = gpd.GeoDataFrame(columns=Zuiderzeeland['peilgebied'].columns())

for i in range(len(exterior)):
# for i in range(10):

    buffered_peilgebied = Zuiderzeeland['peilgebied'].buffer(0.)

    exterior_sample = exterior.iloc[i:i+1] #just pick a single sample
    exterior_sample.geometry = exterior_sample.buffer(0.0001) #let op! hierdoor komt er alsnog een kleine overlap. Maar anders werkt de code even niet, en vanwege tijdgebrek kan dit nu niet worden opgelost. In het bepalen van de crossings wordt hier echter mee omgegaan.
    
    intersects_mask = buffered_peilgebied.intersects(exterior_sample.iloc[0].geometry)
    intersecting_polygons = Zuiderzeeland['peilgebied'][intersects_mask].sort_values(by='area', ascending=False)

    if len(intersecting_polygons) > 0:
        # print(i)
        # polygon_to_dissolve = intersecting_polygons.iloc[0:1]#.geometry.unary_union  
        # sample_geometry = exterior_sample.geometry#.unary_union
        intersecting_polygons = gpd.GeoDataFrame(intersecting_polygons.iloc[0:1], geometry = 'geometry')
        
        # dissolved_polygon =   # dissolve/union them
        all_geometries = list(intersecting_polygons.geometry) + list(exterior_sample.geometry)

        # Use unary_union to dissolve all polygons in the list
        dissolved_polygon = unary_union(all_geometries)
        
        original_index = intersecting_polygons.index[0]

        # Ensure it's a single geometry object.
        # print(len(Zuiderzeeland['peilgebied'].loc[Zuiderzeeland['peilgebied'].index == original_index, 'geometry']))
        # print(len(dissolved_polygon))
        Zuiderzeeland['peilgebied'].loc[Zuiderzeeland['peilgebied'].index == original_index, 'geometry'] = dissolved_polygon#['geometry']
        # new_gdf = pd.concat([new_gdf, dissolved_polygon])
    else:
        print('No intersection found for iteration ', i)

  exterior = exterior.explode().reset_index(drop=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from 

In [18]:
#Gemaal
Zuiderzeeland['gemaal'] = Zuiderzeeland['gemaal'][['KGMIDENT', 'GLOBALID', 'func_aanvoer', 'func_afvoer', 'func_circulatie', 'geometry']]
Zuiderzeeland['gemaal'] = Zuiderzeeland['gemaal'].rename(columns={'KGMIDENT': 'code', 'GLOBALID': 'globalid'})
Zuiderzeeland['gemaal']['nen3610id'] = 'dummy_nen3610id_gemaal_' + Zuiderzeeland['gemaal'].index.astype(str)

#Hydroobject
Zuiderzeeland['hydroobject'] = Zuiderzeeland['hydroobject'][['OWAIDENT', 'GLOBALID', 'geometry']]
Zuiderzeeland['hydroobject'] = Zuiderzeeland['hydroobject'].rename(columns={'OWAIDENT':'code', 'GLOBALID':'globalid'})
Zuiderzeeland['hydroobject']['nen3610id'] = 'dummy_nen3610id_hydroobject_' + Zuiderzeeland['hydroobject'].index.astype(str)

#duikersifonhevel
Zuiderzeeland['duikersifonhevel'] = Zuiderzeeland['duikersifonhevel'][['KDUIDENT', 'GLOBALID', 'geometry']]
Zuiderzeeland['duikersifonhevel'] = Zuiderzeeland['duikersifonhevel'].rename(columns={'KDUIDENT':'code', 'GLOBALID': 'globalid'})
Zuiderzeeland['duikersifonhevel']['nen3610id'] = 'dummy_nen3610id_duikersifonhevel_' + Zuiderzeeland['duikersifonhevel'].index.astype(str)

#hevels
Zuiderzeeland['hevels'] = Zuiderzeeland['hevels'][['KWKIDENT', 'GLOBALID', 'geometry']]
Zuiderzeeland['hevels'] = Zuiderzeeland['hevels'].rename(columns={'KWKIDENT':'code', 'GLOBALID': 'globalid'})
Zuiderzeeland['hevels']['nen3610id'] = 'dummy_nen3610id_hevels_' + Zuiderzeeland['hevels'].index.astype(str)
#add to the duikersifonhevel
Zuiderzeeland['duikersifonhevel'] = gpd.GeoDataFrame(pd.concat((Zuiderzeeland['duikersifonhevel'], Zuiderzeeland['hevels']))) 

#stuw
Zuiderzeeland['stuw'] = Zuiderzeeland['stuw'][['KWKIDENT', 'GLOBALID', 'geometry', 'KWKSOORT']]
Zuiderzeeland['stuw'] = Zuiderzeeland['stuw'].rename(columns={'KWKIDENT':'code', 'GLOBALID': 'globalid', 'KWKSOORT':'KWKsoort'})
Zuiderzeeland['stuw'] = Zuiderzeeland['stuw'].set_crs('EPSG:28992')
Zuiderzeeland['stuw']['nen3610id'] = 'dummy_nen3610id_stuw_' + Zuiderzeeland['stuw'].index.astype(str)

#afsluitmiddel
Zuiderzeeland['afsluitmiddel'] = Zuiderzeeland['afsluitmiddel'][['KWKIDENT', 'GLOBALID', 'geometry']]
Zuiderzeeland['afsluitmiddel'] = Zuiderzeeland['afsluitmiddel'].rename(columns={'KWKIDENT':'code', 'GLOBALID': 'globalid'})
Zuiderzeeland['afsluitmiddel']['nen3610id'] = 'dummy_nen3610id_hevels_' + Zuiderzeeland['afsluitmiddel'].index.astype(str)

#peilgebied
Zuiderzeeland['peilgebied'] = Zuiderzeeland['peilgebied'][['DHYDRO_ZMRPL', 'GPGIDENT', 'geometry']]
Zuiderzeeland['peilgebied']['nen3610id'] = 'dummy_nen3610id_peilgebied_' + Zuiderzeeland['peilgebied'].index.astype(str)
Zuiderzeeland['peilgebied']['globalid'] = 'dummy_globalid_peilgebied_' + Zuiderzeeland['peilgebied'].index.astype(str)
Zuiderzeeland['peilgebied'] = Zuiderzeeland['peilgebied'].rename(columns={'DHYDRO_ZMRPL': 'streefpeil', 'GPGIDENT':'code'})
Zuiderzeeland['peilgebied']['globalid'] = 'dummy_globalid_peilgebied_' + Zuiderzeeland['peilgebied'].index.astype(str)

#streefpeil
Zuiderzeeland['streefpeil'] = Zuiderzeeland['peilgebied'][['streefpeil', 'globalid']]
Zuiderzeeland['streefpeil']['geometry'] = np.nan
Zuiderzeeland['streefpeil'].rename(columns = {'streefpeil': 'waterhoogte'}, inplace=True)
Zuiderzeeland['streefpeil'] = gpd.GeoDataFrame(Zuiderzeeland['streefpeil'], geometry = 'geometry')

#delete the streefpeil in the peilgebied for consistency
Zuiderzeeland['peilgebied'] = Zuiderzeeland['peilgebied'][['code', 'globalid', 'nen3610id', 'geometry']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Zuiderzeeland['streefpeil']['geometry'] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Zuiderzeeland['streefpeil'].rename(columns = {'streefpeil': 'waterhoogte'}, inplace=True)


In [19]:
Zuiderzeeland['aggregation_area'] = Zuiderzeeland['peilgebied'].copy()
Zuiderzeeland['aggregation_area']['globalid'] = 'dummy_globalid_agg_area_' + Zuiderzeeland['aggregation_area'].index.astype(str)
Zuiderzeeland['aggregation_area']['code'] = Zuiderzeeland['aggregation_area']['code'].astype(str) + '_dummy_id_' + Zuiderzeeland['aggregation_area'].index.astype(str)


In [30]:
Zuiderzeeland['streefpeil'].waterhoogte = Zuiderzeeland['streefpeil'].waterhoogte.astype(float)
Zuiderzeeland['streefpeil'].waterhoogte = Zuiderzeeland['streefpeil'].waterhoogte.round(2)

In [31]:
# test = pd.merge(Zuiderzeeland['peilgebied'], 
#                 Zuiderzeeland['streefpeil'],
#                 on = 'globalid',
#                 suffixes = ('', '_sp'))

# test.waterhoogte = test.waterhoogte.astype(float)
# # test.loc[test.waterhoogte.isna()]

### Check for the correct keys and columns

In [32]:
show_layers_and_columns(waterschap = Zuiderzeeland)

gemaal
['code' 'globalid' 'func_aanvoer' 'func_afvoer' 'func_circulatie'
 'geometry' 'nen3610id']
type =  <class 'geopandas.geodataframe.GeoDataFrame'>
crs =  epsg:28992

hevels
['code' 'globalid' 'geometry' 'nen3610id']
type =  <class 'geopandas.geodataframe.GeoDataFrame'>
crs =  epsg:28992

peilgebied
['code' 'globalid' 'nen3610id' 'geometry']
type =  <class 'geopandas.geodataframe.GeoDataFrame'>
crs =  epsg:28992

duikersifonhevel
['code' 'globalid' 'geometry' 'nen3610id']
type =  <class 'geopandas.geodataframe.GeoDataFrame'>
crs =  EPSG:28992

hydroobject
['code' 'globalid' 'geometry' 'nen3610id']
type =  <class 'geopandas.geodataframe.GeoDataFrame'>
crs =  EPSG:28992

stuw
['code' 'globalid' 'geometry' 'KWKsoort' 'nen3610id']
type =  <class 'geopandas.geodataframe.GeoDataFrame'>
crs =  EPSG:28992

afsluitmiddel
['code' 'globalid' 'geometry' 'nen3610id']
type =  <class 'geopandas.geodataframe.GeoDataFrame'>
crs =  epsg:28992

streefpeil
['waterhoogte' 'globalid' 'geometry']
type = 

### Store data

In [33]:
# Check if the directory exists. If it doesn't exist, create it

if not os.path.exists(output_gpkg_path):
    os.makedirs(output_gpkg_path)
    
store_data(waterschap = Zuiderzeeland, 
           output_gpkg_path = output_gpkg_path + '/Zuiderzeeland')
