# Evaluation of test year GCM-driven run

In [26]:
import valenspy as vp
from valenspy._utilities import load_yml
from valenspy.preprocessing_tasks.select import select_point
from valenspy.diagnostic_functions import mean_bias, mean_absolute_error, root_mean_square_error, spearman_correlation, perkins_skill_score
from pathlib import Path


import xarray as xr
import numpy as np
import pandas as pd


# define machine name - used for paths of (observational) datasets
machine = 'hortense'

manager = vp.InputManager(machine=machine)


## 0. Pre-defining 

In [27]:
# Define reference datasets
reference = 'ERA5'
test_set = 'EC-Earth'
domain = 'EUR11'
experiment = "CB2_CCLM_EUR11_EC-Earth_test"
model = "CCLM"

variables = 'tas'

In [28]:
variable = "tas"

## 1. Functions

In [29]:
def load_data_exp(variable, model, experiment):

    if variable in ['tas','clt', 'clh', 'clm', 'cll']:
        daily_statistic = "mean"
    #    ds_exp = ds_obs.resample(time='1D').mean()

    elif variable == 'pr': 
 
        daily_statistic = "sum"
   #     ds_obs = ds_obs.resample(time='1D').sum()

    elif variable == "tasmax": 
        daily_statistic = "max"
       # ds_obs = ds_obs.resample(time='1D').max()

    ds_mod = manager.load_data(model, [variable], freq="hourly", path_identifiers=[experiment, daily_statistic])


    ds_mod['time'] = ds_mod['time'].dt.floor('D')


    da_exp = ds_mod[variable].compute()

    return da_exp

In [30]:
#def split_data_in_years(data):
    

## 2. Running

In [31]:
da_ref = load_data_exp(variable, model, experiment)

FileNotFoundError: No files found for dataset CCLM, variables ['tas'], period None, frequency hourly, region None and path_identifiers ['CB2_CCLM_EUR11_EC-Earth_test', 'mean'].

## 2. Calculation and visualisation of time series
in this example, the reference data is split up for each year. Each year is plot in a spaghetti plot, where the test year is also included.
Additionally, the P5, P50 and P95 of the reference data is included for every day of every year.

In [None]:
def get_ranks_metrics(df: pd.DataFrame): 
    """
    Ranks the performance of different models across various metrics based on predefined ranking criteria.

    This function applies custom ranking rules to evaluate the performance of models across different metrics.
    The ranking is based on the following criteria:
    
    - 'Mean Bias' is ranked by its absolute value, with smaller values (closer to zero) ranked higher.
    - 'Spearman Correlation' and 'Perkins Skill Score' are ranked in descending order, meaning higher values (closer to 1) are better.
    - All other metrics are ranked in ascending order, where lower values are better.
    
    The input DataFrame `df` is expected to have the following structure:
    - The first column contains the metric names.
    - Each subsequent column contains the performance values of different models for each metric.
    
    Parameters
    ----------
    df : pandas.DataFrame
        A DataFrame where each row corresponds to a metric, the first column is the metric name, 
        and the subsequent columns contain performance values for different models.
    
    Returns
    -------
    pandas.DataFrame
        A DataFrame where each value is replaced by its rank based on the ranking criteria for the corresponding metric.
        The rows are indexed by the metric names.
    
    """

    # Function to rank values
    def rank_values(row):

        if row['metric'] == 'Mean Bias':
            return row[1:].abs().rank(ascending=True)

        if row['metric'] in ['Spearman Correlation', 'Perkins Skill Score']:
            return row[1:].rank(ascending=False, method='min')
        else:
            return row[1:].rank(ascending=True, method='min')

    # Apply ranking
    df_ranked = df.apply(rank_values, axis=1).set_index(df['metric'])

    return df_ranked


## 3. Calculation of P5, P50, P95 for each year 
In this example, the P5, P50 and P95 for each year are calculated for the reference run and the test year. 