# HHSK

This script adds a new column "peilgebied_cat" and make sure the peilgebieden allign witgh the HWS layer (Daniel):
- peilgebied_cat = 0 -> peilgebied
- peigelbied_cat = 1 -> RHWS (boezem)
- peilgebied_cat = 2 -> NHWS Notes:


In [None]:
import geopandas as gpd
import numpy as np
import pandas as pd
import shapely
from general_functions import read_gpkg_layers

## HHSK

In [None]:
# define relative paths
waterschap = "HHSK"

data_path = f"../projects/4750_20/Data_postprocessed/Waterschappen/{waterschap}/{waterschap}.gpkg"

# Waterschaps boundaries
grens_path = "../projects/4750_30/Data_overig/Waterschapsgrenzen/Waterschapsgrenzen.geojson"
# Hoofdwatersysteem boundaries
hws_path = "../projects/4750_30/Data_overig/HWS/krw_basins_vlakken.gpkg"
# Buffer boundaries
buffer_path = "../projects/4750_30/Data_overig/HWS/hws_buffer_HHSK.gpkg"
# Output folder
output_folder = f"./Waterschappen/{waterschap}"

## Load Files

In [None]:
# Load HHNK files
HHSK = read_gpkg_layers(
    gpkg_path=data_path,
    variables=[
        "stuw",
        "gemaal",
        "hydroobject",
        "duikersifonhevel",
        "peilgebied",
        "streefpeil",
    ],
)
HHSK["peilgebied"] = HHSK["peilgebied"].to_crs("EPSG:28992")

# Load waterschap boundaries
gdf_grens = gpd.read_file(grens_path)
gdf_grens = gdf_grens.to_crs("EPSG:28992")
gdf_grens = gdf_grens.set_index("waterschap")

# Load hws
gdf_hws = gpd.read_file(hws_path)

# Load buffer
gdf_buffer = gpd.read_file(buffer_path)
gdf_buffer = gdf_buffer.to_crs("EPSG:28992")
gdf_buffer = gdf_buffer.dissolve()

## Select waterschap boundaries and clip hws layer

In [None]:
# Select boundaries HH Amstel, Gooi en Vecht
gdf_grens = gdf_grens.loc[["Schieland en de Krimpenerwaard"]]

# Use waterschap boudnaries to clip HWS layer
gdf_hws = gpd.overlay(gdf_grens, gdf_hws, how="intersection")

## Check Peilgebied and HWS layer overlap:
1. Identify the overlapping areas
2. Clip
3. Calculate overlapping area percentage
4. Filter

In [None]:
# Step 1: Identify the Overlapping Areas and clip
overlaps = gpd.overlay(HHSK["peilgebied"], gdf_hws, how="intersection", keep_geom_type=True)
gdf_hws = gpd.overlay(gdf_hws, HHSK["peilgebied"], how="difference")

# # Step 2: Subtract Overlapping Areas from the original polygons in each DataFrame
non_overlapping_peilgebied = gpd.overlay(HHSK["peilgebied"], overlaps, how="difference", keep_geom_type=True)
overlaps = gpd.overlay(non_overlapping_peilgebied, gdf_hws, how="intersection", keep_geom_type=False)

# Step 3: Calculate Area Percentages
# Calculate the area of overlaps
overlaps["overlap_area"] = overlaps.area

# Step 4: Filter based on area Area Percentages
minimum_area = 50
print(f"Number of overlapping shapes without filter: {len(overlaps)}")
overlap_ids = overlaps.loc[overlaps["overlap_area"] > minimum_area]
overlap_ids = overlap_ids.globalid.to_list()
print(f"Number of overlapping shapes with filter: {len(overlap_ids)}")

# gdf_hws = gdf_hws_clipped

## Create peilgebied_cat column

In [None]:
# Add to geodataframe
peilgebieden_cat = []

for index, row in HHSK["peilgebied"].iterrows():
    if row.code == "GPG-399":
        peilgebieden_cat.append(1)
    elif row.code == "GPG-403":
        peilgebieden_cat.append(1)
    elif row.code == "GPG-144_RV1":
        peilgebieden_cat.append(1)
    elif row.code == "GPG-144_RV2":
        peilgebieden_cat.append(1)
    elif row.code == "GPG-144_RV3":
        peilgebieden_cat.append(1)

    else:
        peilgebieden_cat.append(0)

HHSK["peilgebied"]["peilgebied_cat"] = peilgebieden_cat

## Add nhws to ['peilgebied','streefpeil']

In [None]:
# update peilgebied dict key
gdf_hws["globalid"] = "dummy_globalid_nhws_" + gdf_hws.index.astype(str)
gdf_hws["code"] = "dummy_code_nhws_" + gdf_hws.index.astype(str)
gdf_hws["nen3610id"] = "dummy_nen3610id_nhws_" + gdf_hws.index.astype(str)
gdf_hws["peilgebied_cat"] = 2

gdf_hws = gdf_hws[["globalid", "code", "nen3610id", "peilgebied_cat", "geometry"]]

HHSK["peilgebied"] = pd.concat([gdf_hws, HHSK["peilgebied"]])

In [None]:
# update streefpeil dict key
streefpeil_hws = pd.DataFrame()
streefpeil_hws["waterhoogte"] = [np.nan] * len(gdf_hws)
streefpeil_hws["globalid"] = "dummy_globalid_nhws_" + gdf_hws.index.astype(str)
streefpeil_hws["geometry"] = [None] * len(gdf_hws)

HHSK["streefpeil"] = pd.concat([streefpeil_hws, HHSK["streefpeil"]])
HHSK["streefpeil"] = gpd.GeoDataFrame(HHSK["streefpeil"])

### Create buffer polygon between NHWS and peilgebied/RHWS

In [None]:
buffer_polygon = gdf_buffer.geometry.iat[0].intersection(gdf_grens.geometry.iat[0])
buffer_polygon = buffer_polygon.difference(shapely.geometry.MultiPolygon(gdf_hws.geometry.tolist()))
buffer_polygon = buffer_polygon.difference(shapely.ops.unary_union(HHSK["peilgebied"].geometry.tolist()))

buffer_polygon = gpd.GeoDataFrame(buffer_polygon)
buffer_polygon = buffer_polygon.set_geometry(0)
buffer_polygon = buffer_polygon.dissolve()
buffer_polygon = buffer_polygon.rename(columns={0: "geometry"})
buffer_polygon = buffer_polygon.set_geometry("geometry")
buffer_polygon = buffer_polygon.set_crs("EPSG:28992")

### Add buffer to ['peilgebied','streefpeil']

In [None]:
# update peilgebied dict key
buffer_polygon = gpd.GeoDataFrame(buffer_polygon)
buffer_polygon["globalid"] = "dummy_globalid_nhws_buffer_" + "1"
buffer_polygon["code"] = "dummy_code_nhws_buffer_" + buffer_polygon.index.astype(str)
buffer_polygon["nen3610id"] = "dummy_nen3610id_nhws_buffer_" + buffer_polygon.index.astype(str)
buffer_polygon["peilgebied_cat"] = 2
buffer_polygon = buffer_polygon.rename(columns={0: "geometry"})
buffer_polygon = buffer_polygon[["globalid", "code", "nen3610id", "peilgebied_cat", "geometry"]]

HHSK["peilgebied"] = pd.concat([buffer_polygon, HHSK["peilgebied"]])
HHSK["peilgebied"] = gpd.GeoDataFrame(HHSK["peilgebied"])

In [None]:
# Create boezem streefpeil layer
streefpeil_buffer = pd.DataFrame()
streefpeil_buffer["waterhoogte"] = [np.nan]
streefpeil_buffer["globalid"] = ["dummy_globalid_nhws_buffer_1"]
streefpeil_buffer["geometry"] = [None]


HHSK["streefpeil"] = pd.concat([streefpeil_buffer, HHSK["streefpeil"]])
HHSK["streefpeil"] = gpd.GeoDataFrame(HHSK["streefpeil"])

### Fix duplicates hydroobjects

In [None]:
# Rename duplicates
# identify duplicates
HHSK["hydroobject"]["temp_globalid"] = HHSK["hydroobject"].groupby("globalid").cumcount() + 1
HHSK["hydroobject"]["temp_code"] = HHSK["hydroobject"].groupby("code").cumcount() + 1
HHSK["hydroobject"]["temp_nen3610id"] = HHSK["hydroobject"].groupby("nen3610id").cumcount() + 1

# AAdd _1 etc
HHSK["hydroobject"]["globalid_new"] = HHSK["hydroobject"].apply(
    lambda x: f"{x['globalid']}_{x['temp_globalid']}" if x["temp_globalid"] > 1 else x["globalid"], axis=1
)
HHSK["hydroobject"]["code_new"] = HHSK["hydroobject"].apply(
    lambda x: f"{x['code']}_{x['temp_code']}" if x["temp_code"] > 1 else x["code"], axis=1
)
HHSK["hydroobject"]["nen3610id_new"] = HHSK["hydroobject"].apply(
    lambda x: f"{x['nen3610id']}_{x['temp_nen3610id']}" if x["temp_nen3610id"] > 1 else x["nen3610id"], axis=1
)

# drop columns
HHSK["hydroobject"] = HHSK["hydroobject"].drop(
    columns=["temp_globalid", "temp_code", "temp_nen3610id", "globalid", "nen3610id", "code"]
)
# rename columns
HHSK["hydroobject"] = HHSK["hydroobject"].rename(
    columns={"globalid_new": "globalid", "code_new": "code", "nen3610id_new": "nen3610id"}
)
# check
print(HHSK["hydroobject"].globalid.is_unique)
print(HHSK["hydroobject"].code.is_unique)
print(HHSK["hydroobject"].nen3610id.is_unique)

### Store post-processed data

In [None]:
for key in HHSK.keys():
    print(key)
    HHSK[str(key)].to_file(f"{output_folder}/{waterschap}.gpkg", layer=str(key), driver="GPKG")