In [None]:
import os
import sys
import time
import timeit
import traceback as tb
from pathlib import Path
import xarray as xr

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#DEBUG_FLAG = False

In [None]:
from HBV_setup import daa_optimize as HBVd_calib
from HBV_setup import dab_validate as HBVd_valid

## 1. Intoduction: The HBV Model and Ahr catchment

### Acknowledgement
We would like to thank Faizen Anwer from TU Munich for providing the HBV model used in this training school

### Display catchment
The cell below loads and plots a map of the two catchments and the sensor data. Frist the one for the official rain gauges from the DWD and the federal state of Rhineland-Palatinate. Both data sets are used for the operational interplation product (Intermet) which were used as input for the interpolation of the catchment precipitation time series. 

In [None]:
# load location of sensors:
df_pws = pd.read_csv("metadata/pws_coords.csv",sep=';')
df_dwd = pd.read_csv("metadata/dwd_coords.csv",sep=',')
df_rlp = pd.read_csv("metadata/coords_rlp.csv",sep=';')
df_altenahr_shape = pd.read_csv("metadata/Altenahr_shape.csv")
df_kreuzberg_shape = pd.read_csv("metadata/Kreuzberg_shape.csv")

# todo: -add the location of the stream gauges
#       -add rivers?
plt.plot(df_kreuzberg_shape.lon,df_kreuzberg_shape.lat,color='grey',label="catchment Kreuzberg")
plt.plot(df_altenahr_shape.lon,df_altenahr_shape.lat,color='black',label="catchment Altenahr")
plt.scatter(df_rlp.lon,df_rlp.lat,color='C2',label="RLP rain gauge", alpha=0.8)
plt.scatter(df_dwd.lon,df_dwd.lat,color='C0',label='DWD rain gauge',alpha=0.8)
plt.xlim(6.5,7.2)
plt.ylim(50.2,50.6)
plt.legend(loc='lower right')
plt.xlabel('deg [°]')
plt.ylabel('deg [°]')


### Question:
- How to you judge the coverage of rain gauges in these two catchments?

## 2. HBV modeling with daily data

Load daily HBV input data for calibration period from 2001 to 2011

### 2.1 Model Calibration

Calibration procedure involves following steps
- read calibration data
- set ranges for model parameters
- set catchment area 
- run optimization

#### Read calibration data

In [None]:
input_path = 'daily/inputs/daily_2001_2011/daily_input_data_10420.csv'
daily_data_calib_10420 = pd.read_csv(
    input_path, 
    sep=';', 
    index_col=0, 
    parse_dates=True)
daily_data_calib_10420

This is a function for plotting discharge and precipiation data. It takes a dataframe with preipitation and Discharge data as inputand you can specify the time spane by using `beg_time`and `end_time`

In [None]:
def plot_q_pcp(df, beg_time='2001-01-01', end_time='2021-01-01'):
    time = df[beg_time:end_time].index
    q = df.dis_ref[beg_time:end_time].values
    p = df[beg_time:end_time].ppt.values

    fig = plt.gcf()
    plt.clf()
    ax = plt.gca()

    ax.plot(time, q, color=[.5, .5, .5])
    ax.set_ylim([0, max(q) * 2])
    ax.set_ylabel('Q [m³/s]')
    ax.set_xlabel('time')

   # Rotate x-axis labels diagonally
    plt.xticks(rotation=45, ha='right')  # ha='right' aligns labels properly

    ax2 = ax.twinx()
    #ax2.bar(time, p, width=1, color=[0, .14, .5], edgecolor=[.7, .7, .7], alpha=.9, linewidth=.1)
    ax2.plot(time, p, color=[0, .14, .5], linestyle='-', linewidth=1.5, label="Precipitation (Pcp)")
    ax2.set_ylim([max(p) * 2, 0])
    ax2.set_ylabel('Pcp [mm / time_step]')
    
    plt.show(block=True)

You can modify the time span of displayed precipitation and runoff data.

In [None]:
plot_q_pcp(daily_data_calib_10420, beg_time='2001-01-01', end_time='2021-01-01')

#### Set parameter bounds
Set upper and lower bounds for the model calibration (paramerter optimization). To save a computational time, we have fixed some of the parameter values in this exercise.

In [None]:
prms_buds_dict = {
    # snow storage parameters
    'snw_dth': (33.00, 33.00),  # Initial depth [L]
    'snw_ast': (-0.62, -0.62),  # Air snow TEM [K].
    'snw_amt': (-0.54, -0.54),  # Air melt TEM [K].
    'snw_amf': (1.9, 1.9),      # Air melt factor [L/TK].
    'snw_pmf': (0.65, 0.65),    # PPT melt factor [L/LTK].
    #soil storage parameters
    'sl0_mse': (5.66, 5.66),    # Soil 0 initial depth [L].
    'sl1_mse': (197.00, 197),   # Soil 1 initial depth [L].
    'sl0_fcy': (0.00, 2e+2),    # Field capacity [L].
    'sl0_bt0': (0.00, 3.00),    # Beta [-]. 
    'sl1_pwp': (0.00, 4e+2),    # PWP [L].
    'sl1_fcy': (0.00, 4e+2),    # Field capacity [L].
    'sl1_bt0': (2.5, 2.50),     # Beta [-].
    # uppper reservoir parameters
    'urr_dth': (6.1, 6.1),      # URR initial depth [L].
    'urr_rsr': (0.00, 1.00),    # Runoff split ratio [-].
    'urr_tdh': (0.00, 1e+2),    # Threshold depth [L].
    'urr_tdr': (0.00, 1.00),    # Threshold DIS const. [1/T].
    'urr_cst': (0.00, 1.00),    # RNF const. [1/T].
    'urr_dro': (0.00, 1.00),    # DIS ratio [-].
    'urr_ulc': (0.00, 1.00),    # URR-to-LRR const. [1/T].
    # lower reservoir data
    'lrr_dth': (1.14, 1.14),    # LRR initial depth [L].
    'lrr_tdh': (0.00, 1e+4),    # Threshold depth [L]
    'lrr_cst': (0.00, 1.00),    # Runoff const. [1/T].
    'lrr_dro': (0.00, 1.00),    # Discharge ratio [-].
    }

prms_long_names = {
    # snow storage parameters
    'snw_dth': "Initial depth (L)", 'snw_ast': "Air snow TEM (K)",
    'snw_amt': "Air melt TEM (K)", 'snw_amf': "Air melt factor (L/TK)",
    'snw_pmf': "PPT melt factor (L/LTK)",
    #soil storage parameters
    'sl0_mse': "Soil 0 initial depth (L)", 'sl1_mse': "Soil 1 initial depth (L)",
    'sl0_fcy': "Field capacity (L)",  'sl0_bt0': "Beta (-)",
    'sl1_pwp': "PWP (L)",  'sl1_fcy': "Field capacity (L)",
    'sl1_bt0': "Beta (-)",
    # uppper reservoir parameters
    'urr_dth': "URR initial depth (L)", 'urr_rsr': "Runoff split ratio (-)",
    'urr_tdh': "Threshold depth (L)", 'urr_tdr': "Threshold DIS const  (1/T)",
    'urr_cst': "RNF const  (1/T)", 'urr_dro': "DIS ratio (-)",
    'urr_ulc': "URR-to-LRR const  (1/T)", 
    # lower reservoir data
    'lrr_dth': "LRR initial depth (L)", 'lrr_tdh': "Threshold depth (L)",    # Threshold depth [L]
    'lrr_cst': "Runoff const  (1/T)", 'lrr_dro': "Discharge ratio (-)"
}

#### Model calibration
Feed the calibration function with:
- parameter dictionary
- input DataFrame
- catchment area
- output directory, where optimized parameters and other optimization outputs will be stored as csv files
- catchment label, string used in a name of the generated files

The following code will start the model calibration. Depending on the the Performance of your computer this may take several minutes.


In [None]:
c_area_10420 = 749117129.0 # catchment area in [m^2]
HBVd_calib.main(
    prms_buds_dict, 
    daily_data_calib_10420, 
    cat_area = c_area_10420, 
    output_dir=r'daily/calibration_hbv_daily', 
    cat_label = '10420')

The results will be stored in a folder called `HBV/daily/calib_results_hbv_daily` (relative path in your working directory set above).
You will find here following files:
- "prms_{cat_label}_sr.csv" - optimized parameter values
- "prf_{cat_label}_sr.csv" - performance metrics
- "dis_sim_{cat_label}_df.csv" - simulated and reference flows
- "sim_{cat_label}_otps_df.csv" - simulated state variables

### Model performance during calibration
Let's read scores evaluating the model calibration and judge the model/calibration performance

In [None]:
daily_prf_calib_10420 = pd.read_csv(
    r'daily/calibration_hbv_daily/prf_10420_sr.csv', 
    sep=';', 
    skiprows = 1, 
    names = ['score', 'value'])

In [None]:
daily_prf_calib_10420

### Question: 
- How to you judge the Model performance based on Nash-Sutcliffe (NS) and above?

## 2.2. Model Validation

Now let's validate the model with data from 2011-2025

### Load model parameters and input data

Load the model parameters (optimized in the previous steps):

In [None]:
prms_path = Path(rf'daily/calibration_hbv_daily/prms_10420_sr.csv')
daily_prms_10420 = pd.read_csv(prms_path, sep=';', index_col=0).iloc[:, 0]

Load the input data for the validation period:

In [None]:
input_path = 'daily/inputs/daily_2011_2025/daily_input_data_10420.csv'
daily_data_valid_10420 = pd.read_csv(input_path, sep=';', index_col=0, parse_dates=True)
daily_data_valid_10420

### Run the model validation

In [None]:
HBVd_valid.main(daily_prms_10420, daily_data_valid_10420, cat_area = c_area_10420, secs_per_step = 86400,
                output_dir=r'daily/validation_hbv_daily', cat_label = '10420')

### Evaluate the model performance
load the performance scores:


In [None]:
# load performance scores
daily_prf_valid_10420 = pd.read_csv(
    r'daily/validation_hbv_daily/prf_10420_sr.csv', 
    sep=';', 
    skiprows = 1, 
    names = ['score', 'value'])
daily_prf_valid_10420

Compare scores for the calibration and validation:

In [None]:
# Define colors and labels for each point
colors = ['red', 'blue', 'green', 'orange', 'purple', 'grey']
labels = daily_prf_valid_10420.score

# Create scatter plot
for i in range(1,6):
    plt.scatter(
        daily_prf_valid_10420.value[i], 
        daily_prf_calib_10420.value[i], 
        color=colors[i], label=labels[i], 
        edgecolors='k', s=100)

# Add identity line (y = x)
plt.plot([0,5], [0,5], linestyle='--', color='black', label="Identity Line")

# Set labels and title
plt.xlabel("Daily PRF Valid Values")
plt.ylabel("Daily PRF Calib Values")
plt.title("Scores for validation and calibration - catchment 10420")

# Add legend
plt.legend()

# Set axis limits to match the identity line range
plt.xlim(0.5, 1)
plt.ylim(0.5, 1)

# Show grid and plot
plt.grid(True)
plt.show()

### Question: 
- How to you judge the Model performance based on Nash-Sutcliffe (NS) above for the validation period?
- What could be the reasons that the NS worse?

### Compare simulated and reference flow time series
Plot the observed and simulatied discharges for the validation period 2016-2022 using the function below. You can adjust the time span by specifying start and end dates in .main(), e.g. (beg_time='2021-01-01',end_time='2022-01-01')

In [None]:
def plot_sim_ref_ts (dis_df, beg_time='2021-07-01', end_time='2021-08-01'):
    dis_df = dis_df.loc[beg_time:end_time,:]
    
    plt.plot(dis_df.index, dis_df['ref'].values, label='REF', alpha=0.8)
    plt.plot(dis_df.index, dis_df['sim'].values, label='SIM', alpha=0.8)
    
    plt.xlabel('Time [day]')
    plt.xticks(rotation=45, ha="right")
    plt.ylabel('Discharge [m$^3$.s$^{-1}$]')
    
    plt.grid()
    plt.gca().set_axisbelow(True)
    plt.legend()

This code loads simulated and reference data into a dataframe:

In [None]:
daily_sim_valid_10420 = pd.read_csv(
    r'daily/validation_hbv_daily/dis_sim_10420_df.csv', 
    sep=';', 
    index_col = 0)
daily_sim_valid_10420.index = pd.to_datetime(
    daily_sim_valid_10420.index, 
    format='%Y-%m-%d')
daily_sim_valid_10420

## Excercise 1:
Plot observed and simulated discharge in the variable `daily_sim_valid_10420` using the `plot_sim_ref_ts`function and explore differences during peak flows. You can also look at yearly flow maxima using e.g. this chunk of code to identify them: `peak_flow_dates = daily_sim_valid_10420.resample('YE')['ref'].apply(lambda x: x.idxmax())`


In [None]:
# enter code here...




In [None]:
if input ("Enter 'Solution' to display solution: ") == 'Solution':
    %load solutions/1a_solution.py

In [None]:
# add your code here .. 

In [None]:
if input ("Enter 'Solution' to display solution: ") == 'Solution':
    %load solutions/1b_solution.py

### Question: 

-The peak discharge during the flood event in July 2021 at the gauge Altenahr was ~1200 m³/s. What is the issue here? 

**Hint:** Have a look at the hourly discharges between 14 and 16 July 2021 and plot these. The code below loads the hourly input data into a data frame, the observed discharge is named `hourly_ref`

Plot hourly and daily reference flows:

In [None]:
# read data
hourly_data_valid_10420=pd.read_csv(r'hourly/inputs/Intermet_gauge/hbv_input_data_10420.csv.zip',
                                    sep=';', index_col=0, parse_dates=True)

# Plot hourly data (blue line) and daily data (red dashed line with markers)
hourly_data_valid_10420.dis_ref['2021-07-12':'2021-07-17 00:00'].plot(
    label="Hourly Data", linestyle='-', color='blue', alpha=0.7)
daily_data_valid_10420.dis_ref['2021-07-12':'2021-07-17'].plot(
    label="Daily Data", linestyle='--', color='red', marker='o', alpha=0.8)

# Labels, titles 
plt.xlabel('Time [hh:mm, date]')
plt.xticks(rotation=45, ha="right")  # Rotate x-axis labels for better readability
plt.ylabel('Discharge [m$^3$.s$^{-1}$]')
plt.title("Discharge Data: Daily vs. Hourly Resolution")

# Add legend with solid white background
plt.legend(facecolor='white', edgecolor='black')  # Edge color makes it visible
plt.grid(True) # Show grid

plt.show()

Compare hydrograph volumes:

In [None]:
hourly_volume = 3600 * sum(hourly_data_valid_10420.dis_ref['2021-07-12':'2021-07-17 00:00'].values[:1] + 
                                  hourly_data_valid_10420.dis_ref['2021-07-12':'2021-07-17 00:00'].values[:-1]) / 2
daily_volume = 3600 * 24 * sum(daily_data_valid_10420.dis_ref['2021-07-12':'2021-07-17'].values[:1] + 
                                      daily_data_valid_10420.dis_ref['2021-07-12':'2021-07-17'].values[:-1]) / 2
print(f"Hourly runoff volume in Mio m³: {round(1e-6 * hourly_volume, 2)}")
print(f"Dialy runoff volume in Mio m³: {round(1e-6 * daily_volume, 2)}")

### 3. HBV modeling with hourly data

As we've seen above, a daily model resolution is not suffcient to capture the flood peaks in smaller, fast reacting catchments. Before we investigate the influences of different (OS) rainfall inputs, we will look how the model works with standard (Intermet gauge) rainfall data at hourly resolution.

### 3.1 Model validation with hourly data

First we load the hourly data for the Altenahr catchment

In [None]:
input_path = 'hourly/inputs/Intermet_gauge/hbv_input_data_10420.csv.zip'
hourly_data_valid_10420 = pd.read_csv(input_path, sep=';', index_col=0, parse_dates=True)
hourly_data_valid_10420

In [None]:
# Maximal reference flow rain
hourly_data_valid_10420.dis_ref.max()

## Exercise 2
Use the `plot_q_pcp`function for plotting discharge and precipiation data. Plotting all hourly data might take some time, so consider specifying a time by using `beg_time`and `end_time`, e.g. for 2021

In [None]:
#type your code here:



In [None]:
if input("Enter 'Solution' to display solutions: ")=='Solution':
    %load solutions/2_solution.py

### Question:
- What do you observe with respect to precipitation and discharge from May - August 2021?




## Run model with hourly data
Run model with hourly data. Use model parameters obtained in the previous exercise (calibration with daily data).   

In [None]:
HBVd_valid.main(daily_prms_10420, inp_dfe = hourly_data_valid_10420, cat_area = c_area_10420,
                secs_per_step = 3600, output_dir=r'hourly/validation_Intermet', cat_label = '10420')

In [None]:
hourly_sim_valid_10420 = pd.read_csv(r'hourly/validation_Intermet/dis_sim_10420_df.csv', sep=';', index_col = 0)
hourly_sim_valid_10420.index = pd.to_datetime(hourly_sim_valid_10420.index, format='%Y-%m-%d %H:%M:%S')
#dis_df
plot_sim_ref_ts(hourly_sim_valid_10420, beg_time='2021-07-11', end_time='2021-07-21')

### Question
What ist the issue here? Why is the peak now overestimated?

### Run model with hourly data once again
We did calibration with hourly input data for you, so just load the optimized parameters and compare them with daily-data based parameters 


In [None]:
prms_path = Path(rf'hourly/calibration_Intermet_gauge/prms_10420_sr.csv')
hourly_prms_10420 = pd.read_csv(prms_path, sep=';', index_col=0).iloc[:, 0]

df = pd.concat([pd.Series(prms_long_names), daily_prms_10420, hourly_prms_10420], axis=1)
df.columns = ['Long names', 'Daily calibration', 'Hourly calibration']  # Rename columns if needed
df

### Exercise 3
Run model again with parameters optimized on hourly Intermet interploated gauge data. They are stored now in `hourly_prms_10420`

In [None]:
if input("Enter 'Solution' to display solutions: ")=='Solution':
    %load solutions/3_solution.py

In [None]:
hourly_prf_valid_10420_new = pd.read_csv(
    r'hourly/validation_Intermet/prf_10420_sr.csv', 
    sep=';')

In [None]:
hourly_prf_valid_10420_new

### Plot simulation with hourly data

In [None]:
# store old simluation into a separate variable
hourly_sim_valid_10420_new = pd.read_csv(
    r'hourly/validation_Intermet/dis_sim_10420_df.csv', 
    sep=';', 
    index_col = 0)
hourly_sim_valid_10420_new.index = pd.to_datetime(
    hourly_sim_valid_10420_new.index, 
    format='%Y-%m-%d %H:%M:%S')

In [None]:
ts_old = hourly_sim_valid_10420['2021-07-11':'2021-07-21']
ts_new = hourly_sim_valid_10420_new['2021-07-11':'2021-07-21']

plt.plot(ts_old.index, ts_old['ref'].values, label = 'REF', alpha = 0.8, lw = 3)
plt.plot(ts_old.index, ts_old['sim'].values, label = 'SIM_old', alpha = 0.8, color = 'grey', ls = '--')
plt.plot(ts_new.index, ts_new['sim'].values, label = 'SIM_new', alpha = 0.8, color = 'darkorange', lw = 2)


plt.xlabel('Time [day]')
plt.xticks(rotation=45, ha="right")
plt.ylabel('Discharge [m$^3$.s$^{-1}$]')

plt.grid()
plt.gca().set_axisbelow(True)
plt.legend()

### 3.2 Rainfall from OS Sensors

The following code will plot a map showing the location of PWS(and CMLs?) together with the DWD and RLP rain guages

In [None]:
plt.plot(df_kreuzberg_shape.lon,df_kreuzberg_shape.lat,color='grey',label="catchment Kreuzberg")
plt.plot(df_altenahr_shape.lon,df_altenahr_shape.lat,color='black',label="catchment Altenahr")
plt.scatter(df_rlp.lon,df_rlp.lat,color='C2',label="RLP rain gauge", alpha=0.8)
plt.scatter(df_pws.lon,df_pws.lat,color='C1',label="PWS rain gauge",alpha=0.8)
plt.scatter(df_dwd.lon,df_dwd.lat,color='C0',label='DWD rain gauge',alpha=0.8)
plt.xlim(6.5,7.2)
plt.ylim(50.2,50.6)
plt.legend(loc='lower right')

<img src="solutions/os_map.png" alt="Description" style="width:600px;">

Now we compare the different precipitation datasets (official and OS) for the event itself which was on 14/15 July 2021. First for the Altenahr catchment. The following precipitation datasets are available for the two catachments {ID}:
- `Intermet`: Interpolated DWD and RLP gauge data
- `RW_cml`: CML adjusted radar data
- `RW_gauge`: gauge adjusted radar data
- `RW_gauge_cml`: CML and gauge adjusted radar data
- `dwd`: Interpolation from DWD rain gauges
- `dwd_pws`: Interpolation from DWD rain gauges and PWS
- `dwd_pws_cml`: Interpolation from DWD rain gauges, PWS and CML

Task: load and plot the different data sets for the two catchments. The data are located in the folder `/data/OS_pcp`

#### Altenahr (ID: 10420)

In [None]:
os_pcp_10420=pd.read_csv(r'hourly/OS_pcp/os_pcp_10420.csv', sep=';', index_col=0, parse_dates=True)

In [None]:
os_pcp_10420.plot()

#### Kreuzberg (ID: 10460)

In [None]:
os_pcp_10460=pd.read_csv(r'hourly/OS_pcp/os_pcp_10460.csv', sep=';', index_col=0, parse_dates=True)
os_pcp_10460.plot()

* Question: What do you observe here?

### 3.3 Running the model with OS data

We will now run the model with different rainfall inputs. Tempreture and PET is always the same. THe files with modified precipitation data are in the folders Inputs and subfoders 
- `Intermet_gauge`: Interpolated DWD and RLP gauge data
- `RW_cml`: CML adjusted radar data
- `RW_gauge`: gauge adjusted radar data
- `RW_gauge_cml`: CML and gauge adjusted radar data
- `dwd`: Interpolation from DWD rain gauges
- `dwd_pws`: Interpolation from DWD rain gauges and PWS
- `dwd_pws_cml`: Interpolation from DWD rain gauges, PWS and CML

# Exercise 4
- Modify string in dataset variable to match the folder names with different rainfall inputs (above) and run the validation.
in the cells below, compare the time series.
- Run the simulation for Kreuzberg catchment

In [None]:
dataset = 'dwd_pws'
input_dir_name = r'hourly/inputs/' + dataset
output_dir_name = r'hourly/validation_' + dataset

ID='10420'
hourly_valid_10420 = pd.read_csv(input_dir_name + '/hbv_input_data_' + ID + '.csv.zip', sep=';',
                                          index_col=0, parse_dates=True)
c_area_10420 = 749117129.0 # area of the catchment in m2
HBVd_valid.main(hourly_prms_10420, hourly_valid_10420, cat_area = c_area_10420, secs_per_step = 3600,
                output_dir = output_dir_name, cat_label = ID)


In [None]:
hourly_sim_valid_10420 = pd.read_csv(output_dir_name + '/dis_sim_10420_df.csv', sep=';', index_col = 0)
hourly_sim_valid_10420.index = pd.to_datetime(hourly_sim_valid_10420.index, format='%Y-%m-%d %H:%M:%S')
#dis_df
plot_sim_ref_ts(hourly_sim_valid_10420, beg_time='2021-07-13', end_time='2021-07-17')

### Kreuzberg catchment simulation
Use the different OS precipitation data for the Kreuzberg Catchment as done above by changing the `dataset`

In [None]:
c_area_10460 = 45117129.0 # catchment area in [m^2]
prms_path = Path(rf'hourly/calibration_Intermet_gauge/prms_10460_sr.csv')
hourly_prms_10460 = pd.read_csv(prms_path, sep=';', index_col=0).iloc[:, 0]

In [None]:
dataset = 'Intermet_gauge'
input_dir_name = r'hourly/inputs/' + dataset
output_dir_name = r'hourly/validation_' + dataset

ID = '10460'
hourly_valid_10460 = pd.read_csv(input_dir_name + '/hbv_input_data_' + ID + '.csv.zip',
                                 sep=';', index_col=0, parse_dates=True)
c_area_10460 = 45*1e6 # area of the catchment in m2
hourly_valid_10460['dis_ref'] = hourly_valid_10460['dis_ref'] + 0.001

HBVd_valid.main(hourly_prms_10460, hourly_valid_10460, cat_area = c_area_10460, secs_per_step = 3600,
                output_dir = output_dir_name, cat_label = ID)


#min(hourly_RW_gauge_valid_10460['dis_ref'])
#HBVd_valid.main(main_dir=path, ID=ID, dir_name=dir_name)

In [None]:
hourly_sim_valid_10460 = pd.read_csv(output_dir_name + '/dis_sim_10460_df.csv', sep=';', index_col = 0)
hourly_sim_valid_10460.index = pd.to_datetime(hourly_sim_valid_10460.index, format='%Y-%m-%d %H:%M:%S')

#dis_df
plot_sim_ref_ts(hourly_sim_valid_10460, beg_time='2021-07-13', end_time='2021-07-17')

## Question
- What do you observe when you use different datasets for the model?
- Why are results not always optimal?

### 3.4 Spatial patterns of different OS precipitation data sets

As a final exercise we can look at and comapre the spatial rainfall data from different precipitation datasets. You will find all the data sets as netCDF files in the folder `hourly/OS_PCP`. These datasets contain data from xxx to xxx and are as follows:
- `dwd.nc` : Interpolated rainfall from DWD and RLP rain gauges
- `dwd_pws.nc` : Interpolated rainfall from DWD,RLP and QC'ed PWS
- `dwd_pws_cml.nc` : Interpolated rainfall from DWD,RLP and QC'ed PWS and CML
- `RW_gauge.nc` : Gauge adjusted radar data
- `RW_cml.nc` : CML adjusted radar data
- `RW_gauge_cml.nc` : Gauge and CML adjusted radar data

Here's an example how you can load and explore these data:

In [None]:
# load RW_cml data set
ds_RW_cml=xr.open_dataset(r'hourly/OS_pcp/RW_cml.nc')
ds_dwd_pws=xr.open_dataset(r'hourly/OS_pcp/dwd_pws.nc')

In [None]:
ds_RW_cml

Plot maps and explore the data and spatial patterns. The variable in the RW datasets is called `RW`, in the dwd datsets `rainfall`. You have several options:
 - plot a time step, e.g. (`isel(time=30)`) 
 - plot the sum for the whole time by using `sum(dim='time')`
 - by adding `plot(x='longitudes', y='latitudes')`you can plot the map with Lat/Lon coordinates
 - you can also plot difference maps by substracating two datasats

In [None]:
# plot a map with the data for the first time step (isel(time=30)). The variable is called 'RW'
ds_RW_cml.isel(time=30).RW.plot(x='longitudes', y='latitudes')

In [None]:
(ds_dwd_pws.rainfall[24:41].sum(dim='time')-ds_RW_cml.RW[24:41].sum(dim='time')).plot(x='longitudes', y='latitudes')

