# Save basis EZG data from PIA and hydrosheds as geojsons to Station output folders

In [1]:
from camelsp import Bundesland, Station, get_metadata
import geopandas as gpd
from glob import glob
import os
import warnings

## Basis EZG (Pia)

In [2]:
# get metadata
meta = get_metadata()

# create a list of camels ids
camels_ids = meta["camels_id"].tolist()

# get the geopackage with all station catchments
gdf_all = gpd.read_file('../catchments_pia/camels_20231018/camels_catchments_n2860.gpkg')

for camels_id in camels_ids:
    try:
        # initiate Station
        s = Station(camels_id)

        # read geojson file
        gdf = gdf_all[gdf_all['camels_id'] == camels_id].reset_index()

        # save geojson to Station output folder
        s.save_catchment_geometry(gdf, datasource='basis_ezg')

    except Exception as e:
        print(f"{camels_id} --- Error: {e}")


DEF10780 --- Error: catchment_geometry contains 0 geometries / rows for the station, 1 is allowed
DED10050 --- Error: catchment_geometry contains 0 geometries / rows for the station, 1 is allowed
DED10110 --- Error: catchment_geometry contains 0 geometries / rows for the station, 1 is allowed
DED10190 --- Error: catchment_geometry contains 0 geometries / rows for the station, 1 is allowed
DED10240 --- Error: catchment_geometry contains 0 geometries / rows for the station, 1 is allowed
DED10260 --- Error: catchment_geometry contains 0 geometries / rows for the station, 1 is allowed
DED10370 --- Error: catchment_geometry contains 0 geometries / rows for the station, 1 is allowed
DED10380 --- Error: catchment_geometry contains 0 geometries / rows for the station, 1 is allowed
DED10480 --- Error: catchment_geometry contains 0 geometries / rows for the station, 1 is allowed
DED10490 --- Error: catchment_geometry contains 0 geometries / rows for the station, 1 is allowed
DED10510 --- Error: 

## HydroSheds

In [3]:
hydrosheds = glob('../hydrosheds/*.geojson')

for hydroshed in hydrosheds:
    try:
        # get the id
        id = hydroshed.split('/')[-1].split('_')[0]
        # initiate Station
        s = Station(hydroshed.split('/')[-1].split('_')[0])

        # read geojson file
        gdf = gpd.read_file(hydroshed)

        # save geojson to Station output folder
        s.save_catchment_geometry(gdf, datasource='hydrosheds')

    except Exception as e:
        print(f"{hydroshed} --- Error: {e}")

../hydrosheds/DE812120_hydrosheds.geojson --- Error: index 0 is out of bounds for axis 0 with size 0


## Federal agency catchments

Go through the shapefiles that were provided by the federal agencies.

### DE1 - Baden-Württemberg

In [2]:
BASE = Bundesland('Baden-Württemberg').input_path

gdf_meta = gpd.read_file(os.path.join(BASE, '../Shapes/BW_Shapes/Pegeleinzugsgebiete_2014_ETRS89.shp'))

# make column MESSTELLEN int
gdf_meta['MESSTELLEN'] = gdf_meta['MESSTELLEN'].astype(int)

# save errors
errors = []

for id in gdf_meta['MESSTELLEN'].values:
    # init station via PKZ, ignore warnings as we use provider_id instead of camels_id
    try:
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            s = Station(id)
    except ValueError as e:
        errors.append(e)
        continue

    # get catchment geometry for id
    catchment = gdf_meta[gdf_meta['MESSTELLEN'] == id].iloc[[0]]

    # save catchment geometry
    s.save_catchment_geometry(catchment, datasource='federal_agency_ezg', if_exists='replace')

# print results and number of errors
if len(errors) > 0:
    print("Errors:")
    for e in errors:
        print(e)


Errors:
103 is neither a provider_id nor a CAMELS-DE NUTSID
183 is neither a provider_id nor a CAMELS-DE NUTSID
184 is neither a provider_id nor a CAMELS-DE NUTSID
185 is neither a provider_id nor a CAMELS-DE NUTSID
386 is neither a provider_id nor a CAMELS-DE NUTSID
399 is neither a provider_id nor a CAMELS-DE NUTSID
1110 is neither a provider_id nor a CAMELS-DE NUTSID
1112 is neither a provider_id nor a CAMELS-DE NUTSID
1113 is neither a provider_id nor a CAMELS-DE NUTSID
1114 is neither a provider_id nor a CAMELS-DE NUTSID
1115 is neither a provider_id nor a CAMELS-DE NUTSID
1148 is neither a provider_id nor a CAMELS-DE NUTSID
1304 is neither a provider_id nor a CAMELS-DE NUTSID
1304 is neither a provider_id nor a CAMELS-DE NUTSID
1311 is neither a provider_id nor a CAMELS-DE NUTSID
1452 is neither a provider_id nor a CAMELS-DE NUTSID
2366 is neither a provider_id nor a CAMELS-DE NUTSID
4401 is neither a provider_id nor a CAMELS-DE NUTSID
4402 is neither a provider_id nor a CAMELS-D

### DE2 - Bayern

Bayern hat uns Basis EZGs gegeben, die noch zusammengefügt werden müssen (?) -> bekomme ich nicht hin.  
Gibt auch noch ein Shapefile für die Pegel, es könnte eine Verknüpfung zwischen 'einzugsgeb' von Pegeln zu Spalte 'GEBKZ_K' von EZGs geben, das haut aber für die meisten Stationen nicht hin, einzugsgeb-Nummer gibt es dann nicht in EZG-Shapefile...

In [3]:
BASE = Bundesland('Bayern').input_path

# Spalte 'stationsnu' und 'einzugsgeb'
gdf_pegel = gpd.read_file(os.path.join(BASE, '../../../Shapes/Bayern_Shapes/pegel_bayern_epsg4258_shp/pegel_epsg4258.shp'))

# Spalte 'GEBKZ_K'
gdf_ezg = gpd.read_file(os.path.join(BASE, '../../../Shapes/Bayern_Shapes/EZG/ezg25_15_2016_by.shp'))


### DE4 - Brandenburg

Auch Basis-EZGs, bei denen ich nicht weiß wie ich sie mergen soll.

In [4]:
BASE = Bundesland('Brandenburg').input_path

gdf_ezg = gpd.read_file(os.path.join(BASE, '../Shapes/Brandenburg_Shapes/ezg25/ezg25_20211105/ezg25.shp'))

### DE7 - Hessen

Hessen gave us two Shapefiles: "_inoffizielle_version" and "_offizielleversion_2022".  
The Shapefiles are in big parts identical, I use the official version for now, as it includes the station_id and is therefor easier to use.

In [5]:
BASE = Bundesland('Hessen').input_path

gdf_ezg = gpd.read_file(os.path.join(BASE, '../Shapes/Hessen_Shapes/_offizielle_version_2022/HE_Pegel_CAMELS_EZG.shp'))

# save errors
errors = []

for id in gdf_ezg['STAT_NUM'].values:
    # init station via PKZ, ignore warnings as we use provider_id instead of camels_id
    try:
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            s = Station(id)
            
    except ValueError as e:
        errors.append(e)
        continue

    # get catchment geometry for id
    catchment = gdf_ezg[gdf_ezg['STAT_NUM'] == id].iloc[[0]]

    # save catchment geometry
    s.save_catchment_geometry(catchment, datasource='federal_agency_ezg', if_exists='replace')

# print results and number of errors
if len(errors) > 0:
    print("Errors:")
    for e in errors:
        print(e)


### DE8 - Mecklenburg-Vorpommern

Gibt Pegel und EZG shape, merge ist mir aber nicht klar, sind tw. auch Basis-EZGs.

In [6]:
BASE = Bundesland('Mecklenburg-Vorpommern').input_path

gdf_ezg = gpd.read_file(os.path.join(BASE, '../Shapes/MeckPom_Shapes/EZG_MekPom/ezg.shp'))

### DE9 - Niedersachsen

In [7]:
gdf_meta = gpd.read_file(os.path.join(BASE, '../Shapes/Niedersachsen_Shapes/EZG_Pegel_NWLKN/EZG_Pegel_NWLKN.shp'))

# save errors
errors = []

for id in gdf_meta['PEGELID'].values:
    # init station via PKZ, ignore warnings as we use provider_id instead of camels_id
    try:
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            s = Station(id)
    except ValueError as e:
        errors.append(e)
        continue

    # get catchment geometry for id
    catchment = gdf_meta[gdf_meta['PEGELID'] == id].iloc[[0]]

    # save catchment geometry
    s.save_catchment_geometry(catchment, datasource='federal_agency_ezg', if_exists='replace')

# print results and number of errors
if len(errors) > 0:
    print("Errors:")
    for e in errors:
        print(e)


Errors:
4926110 is neither a provider_id nor a CAMELS-DE NUTSID
3871101 is neither a provider_id nor a CAMELS-DE NUTSID
3885101 is neither a provider_id nor a CAMELS-DE NUTSID
3895101 is neither a provider_id nor a CAMELS-DE NUTSID
3889102 is neither a provider_id nor a CAMELS-DE NUTSID
3888104 is neither a provider_id nor a CAMELS-DE NUTSID
3951102 is neither a provider_id nor a CAMELS-DE NUTSID
3942102 is neither a provider_id nor a CAMELS-DE NUTSID
9429105 is neither a provider_id nor a CAMELS-DE NUTSID
3983101 is neither a provider_id nor a CAMELS-DE NUTSID
9353102 is neither a provider_id nor a CAMELS-DE NUTSID
4995105 is neither a provider_id nor a CAMELS-DE NUTSID
4995110 is neither a provider_id nor a CAMELS-DE NUTSID
9374105 is neither a provider_id nor a CAMELS-DE NUTSID
9412120 is neither a provider_id nor a CAMELS-DE NUTSID
9392105 is neither a provider_id nor a CAMELS-DE NUTSID
4997105 is neither a provider_id nor a CAMELS-DE NUTSID
5992120 is neither a provider_id nor a C

### DEA - Nordrhein-Westfalen

In [8]:
BASE = Bundesland('NRW').input_path

gdf_ezg = gpd.read_file(os.path.join(BASE, '../Shapes/NRW_Shapes/whm_nrw_pegeleinzugsgebiete_dissolved/pegeleinzugsgebiete_dissolved.shp'), encoding='utf-8')

# save errors
errors = []

for id in gdf_ezg['pegelid'].values:
    # init station via PKZ, ignore warnings as we use provider_id instead of camels_id
    try:
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            s = Station(id)
            
    except ValueError as e:
        errors.append(e)
        continue

    # get catchment geometry for id
    catchment = gdf_ezg[gdf_ezg['pegelid'] == id].iloc[[0]]

    # save catchment geometry
    s.save_catchment_geometry(catchment, datasource='federal_agency_ezg', if_exists='replace')

# print results and number of errors
if len(errors) > 0:
    print("Errors:")
    for e in errors:
        print(e)



Errors:
2718180000100 is neither a provider_id nor a CAMELS-DE NUTSID
2718193000100 is neither a provider_id nor a CAMELS-DE NUTSID
27194900001010 is neither a provider_id nor a CAMELS-DE NUTSID
2721344000100 is neither a provider_id nor a CAMELS-DE NUTSID
2721811000100 is neither a provider_id nor a CAMELS-DE NUTSID
2721830000100 is neither a provider_id nor a CAMELS-DE NUTSID
2722590000100 is neither a provider_id nor a CAMELS-DE NUTSID
2726613000100 is neither a provider_id nor a CAMELS-DE NUTSID
2729900000100 is neither a provider_id nor a CAMELS-DE NUTSID
2731450500099 is neither a provider_id nor a CAMELS-DE NUTSID
2737323400100 is neither a provider_id nor a CAMELS-DE NUTSID
2745400000100 is neither a provider_id nor a CAMELS-DE NUTSID
2746790000100 is neither a provider_id nor a CAMELS-DE NUTSID
2749490000100 is neither a provider_id nor a CAMELS-DE NUTSID
2751270000200 is neither a provider_id nor a CAMELS-DE NUTSID
2761490000100 is neither a provider_id nor a CAMELS-DE NUTSID

### DEB - Rheinland-Pfalz

The provided shapefile does not contain the station id but only station name and river name.  
We try to get the station id by matching station name AND river name.

In [9]:
BASE = Bundesland('Rheinland-Pfalz').input_path

gdf_ezg = gpd.read_file(os.path.join(BASE, '../Shapes/RLP_Shapes/pezg/pegelezg.shp'))

# get RLP metadata to get station id via station name and river name
meta = Bundesland("RLP").metadata

for station_name, river_name in zip(gdf_ezg['NAME'].values, gdf_ezg['GEWäSSER'].values):
    # get station id from station name and river name
    id = meta[(meta['gauge_name'] == station_name) & (meta['waterbody_name'] == river_name)].camels_id.values

    # if id was found, init station
    if id.size > 0:
        s = Station(id[0])

        # get catchment geometry for id
        catchment = gdf_ezg[(gdf_ezg['NAME'] == station_name) & (gdf_ezg['GEWäSSER'] == river_name)].iloc[[0]]

        # save catchment geometry
        s.save_catchment_geometry(catchment, datasource='federal_agency_ezg', if_exists='replace')


ValueError: Cannot transform naive geometries.  Please set a crs on the object first.

### DEC - Saarland

Saarland fehlt im im Shapes Ordner .shp!!

### DED - Sachsen

Sachsen nur große EZGs (Spree, Elbe, Mulde), keine matches mit unseren Stationen.

In [None]:
BASE = Bundesland('Sachsen').input_path

gdf_ezg = gpd.read_file(os.path.join(BASE, '../Shapes/Sachsen_Shapes/EZG_SACHS/EZG_SACHS.shp'))

### DEE - Sachsen-Anhalt

In [None]:
BASE = Bundesland('Sachsen-Anhalt').input_path

gdf_meta = gpd.read_file(os.path.join(BASE, '../Shapes/Sachsen-Anhalt_Shapes/Datensatz.gpkg'))

for id in gdf_meta['PKZ'].values:
    # init station via PKZ, ignore warnings as we use provider_id instead of camels_id
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        s = Station(id)

    # get catchment geometry for id
    catchment = gdf_meta[gdf_meta['PKZ'] == id].iloc[[0]]

    # save catchment geometry
    s.save_catchment_geometry(catchment, datasource='federal_agency_ezg', if_exists='replace')


### DEF - Schleswig-Holstein

In [None]:
BASE = Bundesland('Schleswig-Holstein').input_path

# get all provided shape files, they have the following naming pattern: "n{provider_id}_{gauge_name}.shp"
all_shapes = glob(os.path.join(BASE, '../Shapes/Schleswig-Holstein_Shapes/SH_shapes/*.shp'))

# get all provider ids
provider_ids = Bundesland('Schleswig-Holstein').metadata['provider_id'].values

for id in provider_ids:
    # filter all shapes for the current id
    shape_path = [s for s in all_shapes if f'n{id}' in s]

    if len(shape_path) == 1:
        gdf_ezg = gpd.read_file(shape_path[0])

    elif len(shape_path) > 1:
        # in some cases there are multiple shapes for one id, we take the one with 'ges' in its name (gesamt) -> checked with shape area from metadata
        shape_path = [s for s in shape_path if '_ges' in s]

        # handle station 114064 manually (partyl in Denmark) -> checked with shape area from metadata
        if id == '114064':
            shape_path = [os.path.join(BASE, '../Shapes/Schleswig-Holstein_Shapes/SH_shapes/n114064_Soholm.shp')]
        
        gdf_ezg = gpd.read_file(shape_path[0])
    else:
        continue

    # init station via PKZ, ignore warnings as we use provider_id instead of camels_id
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        s = Station(id)

        s.save_catchment_geometry(gdf_ezg, datasource='federal_agency_ezg', if_exists='replace')


### DEG - Thüringen

Different shapefiles for Thüringen but no clear connection between stations and shapes -> not easily possible.

In [None]:
BASE = Bundesland('Thüringen').input_path

gdf_1 = gpd.read_file(os.path.join(BASE, '../Shapes/Thueringen_Shapes/oberirdische_einzugsgebiete_thueringens__stand_2016_.shp'))
gdf_2 = gpd.read_file(os.path.join(BASE, '../Shapes/Thueringen_Shapes/wasserkoerperkategorie_thueringen_stand_2021_.shp'))
gdf_3 = gpd.read_file(os.path.join(BASE, '../Shapes/Thueringen_Shapes/Monitoring_Pegel_TH.shp'))