## Verarbeitung der HYRAS-Daten

In [None]:
import os
import time
import json

import pandas as pd
import xarray as xr
import rioxarray
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import dask
import contextily as ctx

Festlegen der Gebiets-ID und der zu verarbeitenden Variable

In [None]:
ID = 1
HYRAS_VARIABLE = "TemperatureMean" 
INPUT_PATH = "../input_data"
RESULT_PATH = "../output_data"

In [None]:
if not os.path.exists(f"{RESULT_PATH}/{ID}"):
    os.makedirs(f"{RESULT_PATH}/{ID}")

In [None]:
var_mapping = {
    "Humidity": dict(variable_name = "hurs", datapath = f"{INPUT_PATH}/hyras/Humidity/hurs_hyras_5_1951_2020_v5-0_de.nc"),
    "Precipitation": dict(variable_name = "pr", datapath = f"{INPUT_PATH}/hyras/Precipitation/pr_hyras_1_1931_2020_v5-0_de.nc"),
    "RadiationGlobal": dict(variable_name = "rsds", datapath = f"{INPUT_PATH}/hyras/RadiationGlobal/*.nc"),
    "TemperatureMax": dict(variable_name = "tasmax", datapath = f"{INPUT_PATH}/hyras/TemperatureMax/tasmax_hyras_5_1951_2020_v5-0_de.nc"),
    "TemperatureMin": dict(variable_name = "tasmin", datapath = f"{INPUT_PATH}/hyras/TemperatureMin/tasmin_hyras_5_1951_2020_v5-0_de.nc"),
    "TemperatureMean": dict(variable_name = "tas", datapath = f"{INPUT_PATH}/hyras/TemperatureMean/tas_hyras_5_1951_2020_v5-0_de.nc"),
}

# get the variable and data_path
variable = var_mapping[HYRAS_VARIABLE]["variable_name"]
data_path = var_mapping[HYRAS_VARIABLE]["datapath"]

# empty list to store warnings
warnings = []

Import des Einzugsggebiets aus catchments.py

In [None]:
from catchments import get_catchment_gdf

catchment = get_catchment_gdf(ID).to_crs(epsg=3034)

print(catchment)
print(catchment.geometry)

Verarbeitung der Daten

In [None]:
# open hyras data, chunks="auto" with unify_chunks() produced the fastest results
ds = xr.open_mfdataset(data_path, combine="by_coords", chunks="auto").unify_chunks()

# if variable is Precipitation, select from 1950
if HYRAS_VARIABLE == 'Precipitation':
    ds = ds.sel(time=slice('1951', None))

# need to set the crs (EPSG:3034)
ds.rio.write_crs("EPSG:3034", inplace=True)

# drop variable time_bnds, x_bnds_clipped_clipped_clipped, y_bnds and coordinate crs_HYRAS (makes problems with xarray)
ds = ds.drop_vars("time_bnds")
ds = ds.drop_vars("x_bnds")
ds = ds.drop_vars("y_bnds")

# set the spatial dimensions
ds.rio.set_spatial_dims(x_dim="x", y_dim="y", inplace=True)

In [None]:
# clip the data to the catchment shape, all_touched=True to get all pixels that are at least partially in the catchment
ds_clipped = ds.rio.clip(catchment.geometry, all_touched=True)

# load the data into memory, this yielded the fastest computation times
ds_clipped = ds_clipped.load()

Plot der zugeschnittenen Rasterdaten mit überlagerten Grenzen des Einzugsgebiet zur Kontrolle

In [None]:
data_1d = False

# check dimensionality of clipped data, raster plot and calculating weighted statistics with exactaxtract can only be done if shape is >= (2, 2)
if 1 in ds_clipped[variable].isel(time=0).shape:
    data_1d = True

# check dimensionality of clipped data, raster plot can only be done if shape is >= (2, 2)
if data_1d:
    print("Clipped data has dimensionality of 1, cannot create raster plot")
    warnings.append("Clipped data has dimensionality of 1, cannot create raster plot")
    
else:
    # plot ds_clipped together with gdf_polygon
    fig_spatial, ax = plt.subplots(figsize=(16, 7))

    # plot ds_clipped on top
    ds_clipped[variable].isel(time=0).plot(alpha=1, ax=ax, cmap="viridis")

    # plot catchment first, big red border, no fill
    catchment.plot(ax=ax, color="none", edgecolor="black", linewidth=3)

    # add basemap but this needs an in internet connection and sometimes takes a while
    try:
        ctx.add_basemap(ax, crs=ds_clipped.rio.crs.to_string(), source=ctx.providers.OpenTopoMap)
    except Exception as e:
        print(f"Basemap loading not succesfull: {e}")
        warnings.append(f"Basemap loading not succesfull: {e}")
        
    # Increase x and y limits
    xmin, xmax = ax.get_xlim()
    ymin, ymax = ax.get_ylim()
    ax.set_xlim(xmin - 0.2*(xmax-xmin), xmax + 0.2*(xmax-xmin))
    ax.set_ylim(ymin - 0.2*(ymax-ymin), ymax + 0.2*(ymax-ymin))

    # Add a title
    ax.set_title(f"{HYRAS_VARIABLE} clipped to catchment {ID}")

    plt.show()

In [None]:
# Remove the grid_mapping key from the variable's attributes (problems with xarray)
ds_clipped[variable].attrs.pop("grid_mapping", None)

# drop variable crs_HYRAS (problems with xarray)
ds_clipped = ds_clipped.drop_vars("crs_HYRAS")

Berechnung der Kennzahlen

In [None]:
mean = ds_clipped[variable].mean(dim=["x", "y"])
std = ds_clipped[variable].std(dim=["x", "y"])
min_val = ds_clipped[variable].min(dim=["x", "y"])
max_val = ds_clipped[variable].max(dim=["x", "y"])

# Compute the results in parallel
mean, std, min_val, max_val = dask.compute(mean, std, min_val, max_val)

# Convert the results to pandas DataFrames
df_mean = mean.to_dataframe()
df_mean.columns = [f"{variable}_mean"]

df_std = std.to_dataframe()
df_std.columns = [f"{variable}_stdev"]

df_min = min_val.to_dataframe()
df_min.columns = [f"{variable}_min"]

df_max = max_val.to_dataframe()
df_max.columns = [f"{variable}_max"]

# Concatenate all dataframes
df_timeseries = pd.concat([df_mean, df_std, df_min, df_max], axis=1)

df_timeseries


Plot der Kennzahlen über den gesamten Zeitraum

In [None]:
fig_timeseries = plt.figure(figsize=(10, 7))

# Define the height ratios for the subplots
gs = gridspec.GridSpec(2, 1, height_ratios=[2, 1]) 

# Plot all columns except 'hurs_std' in the first subplot
ax0 = plt.subplot(gs[0])
lines1 = df_timeseries.drop(columns=[f"{variable}_stdev"]).groupby(pd.Grouper(freq='Y')).mean().plot(ax=ax0, lw=2, legend=False)
ax0.set_title(f"{HYRAS_VARIABLE} yearly mean timeseries for catchment {ID}\n")
ax0.xaxis.set_visible(False)  # Remove x-axis

# Plot 'hurs_std' in the second subplot
ax1 = plt.subplot(gs[1])
lines2 = df_timeseries[f"{variable}_stdev"].groupby(pd.Grouper(freq='Y')).mean().plot(ax=ax1, lw=2, color='orange', legend=False)

# Create a shared legend
lines = lines1.get_lines() + lines2.get_lines()
labels = [line.get_label() for line in lines]

# Move the legend outside of the plot to the bottom
fig_timeseries.legend(lines, labels, loc='upper center', bbox_to_anchor=(0.5, 0), ncol=len(lines))

plt.tight_layout()
plt.show()

Abspeichern der Daten

In [None]:
# Make output directory if it does not exist
os.makedirs(f"{RESULT_PATH}/{ID}/plots", exist_ok=True)
os.makedirs(f"{RESULT_PATH}/{ID}/data", exist_ok=True)

# Save figures
if not data_1d:
    fig_spatial.savefig(f"{RESULT_PATH}/{ID}/plots/{HYRAS_VARIABLE}_catchment_clipped.png", dpi=300, bbox_inches="tight")
fig_timeseries.savefig(f"{RESULT_PATH}/{ID}/plots/{HYRAS_VARIABLE}_timeseries.png", dpi=300, bbox_inches="tight")

# Save timeseries data
df_timeseries.to_csv(f"{RESULT_PATH}/{ID}/data/{ID}_{HYRAS_VARIABLE}.csv")

In [None]:
# close xarray datasets
ds.close()
ds_clipped.close()

An diesem Punkt liegen die Zeitreihendaten als df_timeseries vor und wurden als csv gespeichert.