In [None]:
#| default_exp losses

# Losses

> Loss functions for model evaluation.
> 

> The most important train signal is the forecast error, which is the difference between the observed value $y_{\tau}$ and the prediction $\hat{y}_{\tau}$, at time $y_{\tau}$:
> 
> $$ e_{\tau} = y_{\tau}-\hat{y}_{\tau} \qquad \qquad \tau \in \{t+1,\dots,t+H \} $$
> 
> The train loss summarizes the forecast errors in different evaluation metrics.

In [None]:
#| export
from typing import TYPE_CHECKING, Callable, List, Optional, Union

if TYPE_CHECKING:
    import polars as pl

import numpy as np
import pandas as pd

from utilsforecast.compat import DataFrame, pl_DataFrame, pl_col

In [None]:
#| hide
import warnings

from nbdev import show_doc

In [None]:
#| hide
warnings.filterwarnings('ignore', message='Unknown section References')

In [None]:
import polars as pl

from utilsforecast.data import generate_series

In [None]:
models = ['model0', 'model1']
series = generate_series(10, static_as_categorical=False, n_models=2, level=[80])
series_pl = generate_series(10, static_as_categorical=False, n_models=2, level=[80], engine='polars')

# <span style="color:DarkOrange">1. Scale-dependent Errors </span>

## Mean Absolute Error (MAE)
> $$ \mathrm{MAE}(\mathbf{y}_{\tau}, \mathbf{\hat{y}}_{\tau}) = \frac{1}{H} \sum^{t+H}_{\tau=t+1} |y_{\tau} - \hat{y}_{\tau}| $$

![](imgs/losses/mae_loss.png)

In [None]:
#| exporti
def _base_docstring(*args, **kwargs) -> Callable:
    base_docstring = """

    Parameters
    ----------
    df : pandas or polars DataFrame
        Input dataframe with id, actual values and predictions.
    models : list of str
        Columns that identify the models predictions.
    id_col : str (default='unique_id')
        Column that identifies each serie.
    target_col : str (default='y')
        Column that contains the target.

    Returns
    -------
    pandas or polars Dataframe
        dataframe with one row per id and one column per model.
    """
    def docstring_decorator(f: Callable):
        if f.__doc__ is not None:
            f.__doc__ += base_docstring
        return f

    return docstring_decorator(*args, **kwargs)

In [None]:
#| exporti
def _pl_agg_expr(
    df: pl_DataFrame,
    models: List[str],
    id_col: str,
    gen_expr: Callable[[str], 'pl.Expr'],
) -> pl_DataFrame:
    exprs = [gen_expr(model) for model in models]
    df = df.select([id_col, *exprs])
    try:
        res = df.group_by(id_col).mean()
    except AttributeError:
        res = df.groupby(id_col).mean()
    return res

In [None]:
#| export
@_base_docstring
def mae(
    df: DataFrame,
    models: List[str],
    id_col: str = 'unique_id',
    target_col: str = 'y',
) -> DataFrame:
    """Mean Absolute Error (MAE)

    MAE measures the relative prediction
    accuracy of a forecasting method by calculating the
    deviation of the prediction and the true
    value at a given time and averages these devations
    over the length of the series."""
    if isinstance(df, pd.DataFrame):
        res = (df[models].sub(df[target_col], axis=0)).abs().groupby(df[id_col], observed=True).mean()
        res.index.name = id_col
        res = res.reset_index()
    else:
        def gen_expr(model):
            return pl_col(target_col).sub(pl_col(model)).abs().alias(model)

        res = _pl_agg_expr(df, models, id_col, gen_expr)
    return res

In [None]:
show_doc(mae)

---

[source](https://github.com/Nixtla/utilsforecast/blob/main/utilsforecast/losses.py#L63){target="_blank" style="float:right; font-size:smaller"}

### mae

>      mae
>           (df:Union[pandas.core.frame.DataFrame,polars.dataframe.frame.DataFra
>           me], models:List[str], id_col:str='unique_id', target_col:str='y')

Mean Absolute Error (MAE)

MAE measures the relative prediction
accuracy of a forecasting method by calculating the
deviation of the prediction and the true
value at a given time and averages these devations
over the length of the series.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| df | Union |  | Input dataframe with id, actual values and predictions. |
| models | List |  | Columns that identify the models predictions. |
| id_col | str | unique_id | Column that identifies each serie. |
| target_col | str | y | Column that contains the target. |
| **Returns** | **Union** |  | **dataframe with one row per id and one column per model.** |

In [None]:
def pd_vs_pl(pd_df, pl_df, models):
    np.testing.assert_allclose(
        pd_df[models].to_numpy(),
        pl_df.sort('unique_id').select(models).to_numpy(),
    )

In [None]:
pd_vs_pl(
    mae(series, models),
    mae(series_pl, models),
    models,
)

## Mean Squared Error
> $$ \mathrm{MSE}(\mathbf{y}_{\tau}, \mathbf{\hat{y}}_{\tau}) = \frac{1}{H} \sum^{t+H}_{\tau=t+1} (y_{\tau} - \hat{y}_{\tau})^{2} $$

![](imgs/losses/mse_loss.png)

In [None]:
#| export
@_base_docstring
def mse(
    df: DataFrame,
    models: List[str],
    id_col: str = 'unique_id',
    target_col: str = 'y',
) -> DataFrame:
    """Mean Squared Error (MSE)
    
    MSE measures the relative prediction
    accuracy of a forecasting method by calculating the 
    squared deviation of the prediction and the true
    value at a given time, and averages these devations
    over the length of the series."""    
    if isinstance(df, pd.DataFrame):
        res = (df[models].sub(df[target_col], axis=0)).pow(2).groupby(df[id_col], observed=True).mean()
        res.index.name = id_col
        res = res.reset_index()
    else:
        def gen_expr(model):
            return pl_col(target_col).sub(pl_col(model)).pow(2).alias(model)

        res = _pl_agg_expr(df, models, id_col, gen_expr)
    return res

In [None]:
show_doc(mse)

---

[source](https://github.com/Nixtla/utilsforecast/blob/main/utilsforecast/losses.py#L95){target="_blank" style="float:right; font-size:smaller"}

### mse

>      mse
>           (df:Union[pandas.core.frame.DataFrame,polars.dataframe.frame.DataFra
>           me], models:List[str], id_col:str='unique_id', target_col:str='y')

Mean Squared Error (MSE)

MSE measures the relative prediction
accuracy of a forecasting method by calculating the 
squared deviation of the prediction and the true
value at a given time, and averages these devations
over the length of the series.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| df | Union |  | Input dataframe with id, actual values and predictions. |
| models | List |  | Columns that identify the models predictions. |
| id_col | str | unique_id | Column that identifies each serie. |
| target_col | str | y | Column that contains the target. |
| **Returns** | **Union** |  | **dataframe with one row per id and one column per model.** |

In [None]:
pd_vs_pl(
    mse(series, models),
    mse(series_pl, models),
    models,
)

## Root Mean Squared Error
> $$ \mathrm{RMSE}(\mathbf{y}_{\tau}, \mathbf{\hat{y}}_{\tau}) = \sqrt{\frac{1}{H} \sum^{t+H}_{\tau=t+1} (y_{\tau} - \hat{y}_{\tau})^{2}} $$

![](imgs/losses/rmse_loss.png)

In [None]:
#| export
@_base_docstring
def rmse(
    df: DataFrame,
    models: List[str],
    id_col: str = 'unique_id',
    target_col: str = 'y',
) -> Union[float, np.ndarray]:
    """Root Mean Squared Error (RMSE)
    
    RMSE measures the relative prediction
    accuracy of a forecasting method by calculating the squared deviation
    of the prediction and the observed value at a given time and
    averages these devations over the length of the series.
    Finally the RMSE will be in the same scale
    as the original time series so its comparison with other
    series is possible only if they share a common scale. 
    RMSE has a direct connection to the L2 norm."""    
    res = mse(df, models, id_col, target_col)
    if isinstance(res, pd.DataFrame):
        res[models] = res[models].pow(0.5)
    else:
        res = res.with_columns(*[pl_col(c).pow(0.5) for c in models])
    return res

In [None]:
show_doc(rmse)

---

[source](https://github.com/Nixtla/utilsforecast/blob/main/utilsforecast/losses.py#L127){target="_blank" style="float:right; font-size:smaller"}

### rmse

>      rmse
>            (df:Union[pandas.core.frame.DataFrame,polars.dataframe.frame.DataFr
>            ame], models:List[str], id_col:str='unique_id', target_col:str='y')

Root Mean Squared Error (RMSE)

RMSE measures the relative prediction
accuracy of a forecasting method by calculating the squared deviation
of the prediction and the observed value at a given time and
averages these devations over the length of the series.
Finally the RMSE will be in the same scale
as the original time series so its comparison with other
series is possible only if they share a common scale. 
RMSE has a direct connection to the L2 norm.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| df | Union |  | Input dataframe with id, actual values and predictions. |
| models | List |  | Columns that identify the models predictions. |
| id_col | str | unique_id | Column that identifies each serie. |
| target_col | str | y | Column that contains the target. |
| **Returns** | **Union** |  | **dataframe with one row per id and one column per model.** |

In [None]:
pd_vs_pl(
    rmse(series, models),
    rmse(series_pl, models),
    models,
)

# <span style="color:DarkOrange">2. Percentage Errors </span>

## Mean Absolute Percentage Error
> $$ \mathrm{MAPE}(\mathbf{y}_{\tau}, \mathbf{\hat{y}}_{\tau}) = \frac{1}{H} \sum^{t+H}_{\tau=t+1} \frac{|y_{\tau}-\hat{y}_{\tau}|}{|y_{\tau}|} $$

![](imgs/losses/mape_loss.png)

In [None]:
#| exporti
def _zero_to_nan(series: Union[pd.Series, 'pl.Expr']) -> Union[pd.Series, 'pl.Expr']:
    if isinstance(series, pd.Series):
        res = series.replace(0, np.nan)
    else:
        import polars as pl
        
        res = (
            pl.when(series == 0)
            .then(float('nan'))
            .otherwise(series.abs())
        )
    return res

In [None]:
#| export
@_base_docstring
def mape(
    df: DataFrame,
    models: List[str],
    id_col: str = 'unique_id',
    target_col: str = 'y',
) -> Union[float, np.ndarray]:
    """Mean Absolute Percentage Error (MAPE)
    
    MAPE measures the relative prediction
    accuracy of a forecasting method by calculating the percentual deviation
    of the prediction and the observed value at a given time and
    averages these devations over the length of the series.
    The closer to zero an observed value is, the higher penalty MAPE loss
    assigns to the corresponding error."""
    if isinstance(df, pd.DataFrame):
        res = (
            df[models]
            .sub(df[target_col], axis=0)
            .abs()
            .div(_zero_to_nan(df[target_col].abs()), axis=0)
            .fillna(0)
            .groupby(df[id_col], observed=True).mean()
        )
        res.index.name = id_col
        res = res.reset_index()
    else:
        def gen_expr(model):
            abs_err = pl_col(target_col).sub(pl_col(model)).abs()
            abs_target = _zero_to_nan(pl_col(target_col))
            ratio = abs_err.truediv(abs_target).alias(model)
            return ratio.fill_nan(0)

        res = _pl_agg_expr(df, models, id_col, gen_expr)
    return res

In [None]:
show_doc(mape)

---

[source](https://github.com/Nixtla/utilsforecast/blob/main/utilsforecast/losses.py#L162){target="_blank" style="float:right; font-size:smaller"}

### mape

>      mape
>            (df:Union[pandas.core.frame.DataFrame,polars.dataframe.frame.DataFr
>            ame], models:List[str], id_col:str='unique_id', target_col:str='y')

Mean Absolute Percentage Error (MAPE)

MAPE measures the relative prediction
accuracy of a forecasting method by calculating the percentual deviation
of the prediction and the observed value at a given time and
averages these devations over the length of the series.
The closer to zero an observed value is, the higher penalty MAPE loss
assigns to the corresponding error.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| df | Union |  | Input dataframe with id, actual values and predictions. |
| models | List |  | Columns that identify the models predictions. |
| id_col | str | unique_id | Column that identifies each serie. |
| target_col | str | y | Column that contains the target. |
| **Returns** | **Union** |  | **dataframe with one row per id and one column per model.** |

In [None]:
pd_vs_pl(
    mape(series, models),
    mape(series_pl, models),
    models,
)

## Symmetric Mean Absolute Percentage Error
> $$ \mathrm{SMAPE}_{2}(\mathbf{y}_{\tau}, \mathbf{\hat{y}}_{\tau}) = \frac{1}{H} \sum^{t+H}_{\tau=t+1} \frac{|y_{\tau}-\hat{y}_{\tau}|}{|y_{\tau}|+|\hat{y}_{\tau}|} $$

In [None]:
#| export
@_base_docstring
def smape(
    df: DataFrame,
    models: List[str],
    id_col: str = 'unique_id',
    target_col: str = 'y',
) -> Union[float, np.ndarray]:
    """Symmetric Mean Absolute Percentage Error (SMAPE)

    SMAPE measures the relative prediction
    accuracy of a forecasting method by calculating the relative deviation
    of the prediction and the observed value scaled by the sum of the
    absolute values for the prediction and observed value at a
    given time, then averages these devations over the length
    of the series. This allows the SMAPE to have bounds between
    0% and 200% which is desireble compared to normal MAPE that
    may be undetermined when the target is zero."""
    if isinstance(df, pd.DataFrame):
        delta_y = df[models].sub(df[target_col], axis=0).abs()
        scale = df[models].abs().add(df[target_col].abs(), axis=0)
        raw = delta_y.div(scale).fillna(0)
        res = raw.groupby(df[id_col], observed=True).mean()
        res.index.name = id_col
        res = res.reset_index()
    else:
        def gen_expr(model):
            abs_err = pl_col(model).sub(pl_col(target_col)).abs()
            denominator = _zero_to_nan(pl_col(model).abs().add(pl_col(target_col)).abs())
            ratio = abs_err.truediv(denominator).alias(model)
            return ratio.fill_nan(0)

        res = _pl_agg_expr(df, models, id_col, gen_expr)
    return res

In [None]:
show_doc(smape)

---

[source](https://github.com/Nixtla/utilsforecast/blob/main/utilsforecast/losses.py#L201){target="_blank" style="float:right; font-size:smaller"}

### smape

>      smape
>             (df:Union[pandas.core.frame.DataFrame,polars.dataframe.frame.DataF
>             rame], models:List[str], id_col:str='unique_id',
>             target_col:str='y')

Symmetric Mean Absolute Percentage Error (SMAPE)

SMAPE measures the relative prediction
accuracy of a forecasting method by calculating the relative deviation
of the prediction and the observed value scaled by the sum of the
absolute values for the prediction and observed value at a
given time, then averages these devations over the length
of the series. This allows the SMAPE to have bounds between
0% and 200% which is desireble compared to normal MAPE that
may be undetermined when the target is zero.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| df | Union |  | Input dataframe with id, actual values and predictions. |
| models | List |  | Columns that identify the models predictions. |
| id_col | str | unique_id | Column that identifies each serie. |
| target_col | str | y | Column that contains the target. |
| **Returns** | **Union** |  | **dataframe with one row per id and one column per model.** |

In [None]:
pd_vs_pl(
    smape(series, models),
    smape(series_pl, models),
    models,
)

# <span style="color:DarkOrange">3. Scale-independent Errors </span>

## Mean Absolute Scaled Error
> $$ \mathrm{MASE}(\mathbf{y}_{\tau}, \mathbf{\hat{y}}_{\tau}, \mathbf{\hat{y}}^{season}_{\tau}) = 
        \frac{1}{H} \sum^{t+H}_{\tau=t+1} \frac{|y_{\tau}-\hat{y}_{\tau}|}{\mathrm{MAE}(\mathbf{y}_{\tau}, \mathbf{\hat{y}}^{season}_{\tau})} $$

![](imgs/losses/mase_loss.png)

In [None]:
#| export
def mase(
    df: DataFrame,
    models: List[str],
    seasonality: int,
    train_df: DataFrame,
    id_col: str = 'unique_id',
    target_col: str = 'y',
) -> DataFrame:
    """Mean Absolute Scaled Error (MASE)
    
    MASE measures the relative prediction
    accuracy of a forecasting method by comparinng the mean absolute errors
    of the prediction and the observed value against the mean
    absolute errors of the seasonal naive model.
    The MASE partially composed the Overall Weighted Average (OWA), 
    used in the M4 Competition.

    Parameters
    ----------
    df : pandas or polars DataFrame
        Input dataframe with id, actuals and predictions.
    models : list of str
        Columns that identify the models predictions.
    seasonality : int
        Main frequency of the time series;
        Hourly 24, Daily 7, Weekly 52, Monthly 12, Quarterly 4, Yearly 1.
    train_df : pandas or polars DataFrame
        Training dataframe with id and actual values. Must be sorted by time.
    id_col : str (default='unique_id')
        Column that identifies each serie.
    target_col : str (default='y')
        Column that contains the target.

    Returns
    -------
    pandas or polars Dataframe
        dataframe with one row per id and one column per model.

    References
    ----------
    [1] https://robjhyndman.com/papers/mase.pdf        
    """
    mean_abs_err = mae(df, models, id_col, target_col)
    if isinstance(train_df, pd.DataFrame):
        mean_abs_err = mean_abs_err.set_index(id_col)
        # assume train_df is sorted
        lagged = train_df.groupby(id_col, observed=True)[target_col].shift(seasonality)
        scale = train_df[target_col].sub(lagged).abs()
        scale = scale.groupby(train_df[id_col], observed=True).mean()
        res = mean_abs_err.div(_zero_to_nan(scale), axis=0).fillna(0)
        res.index.name = id_col
        res = res.reset_index()
    else:
        # assume train_df is sorted
        lagged = pl_col(target_col).shift(seasonality).over(id_col)
        scale_expr = pl_col(target_col).sub(lagged).abs().alias('scale')
        scale = train_df.select([id_col, scale_expr])
        try:
            scale = scale.group_by(id_col).mean()
        except AttributeError:
            scale = scale.groupby(id_col).mean()
        scale = scale.with_columns(_zero_to_nan(pl_col('scale')))

        def gen_expr(model):
            return pl_col(model).truediv(pl_col('scale')).fill_nan(0).alias(model)

        full_df = mean_abs_err.join(scale, on=id_col, how='left')
        res = _pl_agg_expr(full_df, models, id_col, gen_expr)
    return res

In [None]:
show_doc(mase)

---

[source](https://github.com/Nixtla/utilsforecast/blob/main/utilsforecast/losses.py#L238){target="_blank" style="float:right; font-size:smaller"}

### mase

>      mase
>            (df:Union[pandas.core.frame.DataFrame,polars.dataframe.frame.DataFr
>            ame], models:List[str], seasonality:int, train_df:Union[pandas.core
>            .frame.DataFrame,polars.dataframe.frame.DataFrame],
>            id_col:str='unique_id', target_col:str='y')

Mean Absolute Scaled Error (MASE)

MASE measures the relative prediction
accuracy of a forecasting method by comparinng the mean absolute errors
of the prediction and the observed value against the mean
absolute errors of the seasonal naive model.
The MASE partially composed the Overall Weighted Average (OWA), 
used in the M4 Competition.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| df | Union |  | Input dataframe with id, actuals and predictions. |
| models | List |  | Columns that identify the models predictions. |
| seasonality | int |  | Main frequency of the time series;<br>Hourly 24, Daily 7, Weekly 52, Monthly 12, Quarterly 4, Yearly 1. |
| train_df | Union |  | Training dataframe with id and actual values. Must be sorted by time. |
| id_col | str | unique_id | Column that identifies each serie. |
| target_col | str | y | Column that contains the target. |
| **Returns** | **Union** |  | **dataframe with one row per id and one column per model.** |

In [None]:
pd_vs_pl(
    mase(series, models, 7, series),
    mase(series_pl, models, 7, series_pl),
    models,
)

## Relative Mean Absolute Error
> $$ \mathrm{RMAE}(\mathbf{y}_{\tau}, \mathbf{\hat{y}}_{\tau}, \mathbf{\hat{y}}^{base}_{\tau}) = \frac{1}{H} \sum^{t+H}_{\tau=t+1} \frac{|y_{\tau}-\hat{y}_{\tau}|}{\mathrm{MAE}(\mathbf{y}_{\tau}, \mathbf{\hat{y}}^{base}_{\tau})} $$

![](imgs/losses/rmae_loss.png)

In [None]:
#| export
def rmae(
    df: DataFrame,
    models: List[str],
    baseline_models: List[str],
    id_col: str = 'unique_id',
    target_col: str = 'y',
) -> DataFrame:
    """Relative Mean Absolute Error (RMAE)
    
    Calculates the RAME between two sets of forecasts (from two different forecasting methods).
    A number smaller than one implies that the forecast in the 
    numerator is better than the forecast in the denominator.

    Parameters
    ----------
    df : pandas or polars DataFrame
        Input dataframe with id, times, actuals and predictions.
    models : list of str
        Columns that identify the models predictions.
    baseline_models : list of str
        Columns that identify the baseline models predictions.
    id_col : str (default='unique_id')
        Column that identifies each serie.
    target_col : str (default='y')
        Column that contains the target.

    Returns
    -------
    pandas or polars Dataframe
        dataframe with one row per id and one column per model.
    """
    numerator = mae(df, models, id_col, target_col)
    denominator = mae(df, baseline_models, id_col, target_col)
    if isinstance(numerator, pd.DataFrame):
        res = numerator.merge(denominator, on=id_col, suffixes=('', '_denominator'))
        out_cols = [id_col]
        for model, baseline in zip(models, baseline_models):
            col_name = f'{model}_div_{baseline}'
            res[col_name] = res[model].div(_zero_to_nan(res[f'{baseline}_denominator'])).fillna(0)
            out_cols.append(col_name)
        res = res[out_cols]
    else:
        def gen_expr(model, baseline):
            denominator = _zero_to_nan(pl_col(f'{baseline}_denominator'))
            return pl_col(model).truediv(denominator).fill_nan(0).alias(f'{model}_div_{baseline}')
        
        res = numerator.join(denominator, on=id_col, suffix='_denominator')
        exprs = [gen_expr(m1, m2) for m1, m2 in zip(models, baseline_models)]
        res = res.select([id_col, *exprs])
    return res

In [None]:
show_doc(rmae)

---

[source](https://github.com/Nixtla/utilsforecast/blob/main/utilsforecast/losses.py#L309){target="_blank" style="float:right; font-size:smaller"}

### rmae

>      rmae
>            (df:Union[pandas.core.frame.DataFrame,polars.dataframe.frame.DataFr
>            ame], models:List[str], baseline_models:List[str],
>            id_col:str='unique_id', target_col:str='y')

Relative Mean Absolute Error (RMAE)

Calculates the RAME between two sets of forecasts (from two different forecasting methods).
A number smaller than one implies that the forecast in the 
numerator is better than the forecast in the denominator.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| df | Union |  | Input dataframe with id, times, actuals and predictions. |
| models | List |  | Columns that identify the models predictions. |
| baseline_models | List |  | Columns that identify the baseline models predictions. |
| id_col | str | unique_id | Column that identifies each serie. |
| target_col | str | y | Column that contains the target. |
| **Returns** | **Union** |  | **dataframe with one row per id and one column per model.** |

In [None]:
pd_vs_pl(
    rmae(series, models, list(reversed(models))),
    rmae(series_pl, models, list(reversed(models))),
    [f'{m1}_div_{m2}' for m1, m2 in zip(models, reversed(models))],
)

# <span style="color:DarkOrange">4. Probabilistic Errors </span>

## Quantile Loss
> $$ \mathrm{QL}(\mathbf{y}_{\tau}, \mathbf{\hat{y}}^{(q)}_{\tau}) = 
        \frac{1}{H} \sum^{t+H}_{\tau=t+1} 
        \Big( (1-q)\,( \hat{y}^{(q)}_{\tau} - y_{\tau} )_{+} 
        + q\,( y_{\tau} - \hat{y}^{(q)}_{\tau} )_{+} \Big) $$

![](imgs/losses/q_loss.png)

In [None]:
#| export
def quantile_loss(
    df: DataFrame,
    models: List[str],
    q: float = 0.5,
    id_col: str = 'unique_id',
    target_col: str = 'y',
) -> DataFrame:
    """Quantile Loss (QL)
    
    QL measures the deviation of a quantile forecast.
    By weighting the absolute deviation in a non symmetric way, the
    loss pays more attention to under or over estimation.    
    A common value for q is 0.5 for the deviation from the median.

    Parameters
    ----------
    df : pandas or polars DataFrame
        Input dataframe with id, times, actuals and predictions.
    models : list of str
        Columns that identify the models predictions.
    q : float (default=0.5)
        Quantile for the predictions' comparison.
    id_col : str (default='unique_id')
        Column that identifies each serie.
    target_col : str (default='y')
        Column that contains the target.

    Returns
    -------
    pandas or polars Dataframe
        dataframe with one row per id and one column per model.
    """
    if isinstance(df, pd.DataFrame):
        delta_y = df[models].sub(df[target_col], axis=0).abs()
        res = np.maximum(q * delta_y, (q - 1) * delta_y).groupby(df[id_col]).mean()
        res.index.name = id_col
        res = res.reset_index()
    else:
        import polars as pl

        def gen_expr(model):
            delta_y = pl_col(model).sub(pl_col(target_col)).abs()
            try:
                col_max = pl.max_horizontal([q * delta_y, (q - 1) * delta_y])
            except AttributeError:
                col_max = pl.max([q * delta_y, (q - 1) * delta_y])
            return col_max.alias(model)

        res = _pl_agg_expr(df, models, id_col, gen_expr)
    return res

In [None]:
show_doc(quantile_loss)

---

[source](https://github.com/Nixtla/utilsforecast/blob/main/utilsforecast/losses.py#L369){target="_blank" style="float:right; font-size:smaller"}

### quantile_loss

>      quantile_loss
>                     (df:Union[pandas.core.frame.DataFrame,polars.dataframe.fra
>                     me.DataFrame], models:List[str], q:float=0.5,
>                     id_col:str='unique_id', target_col:str='y')

Quantile Loss (QL)

QL measures the deviation of a quantile forecast.
By weighting the absolute deviation in a non symmetric way, the
loss pays more attention to under or over estimation.    
A common value for q is 0.5 for the deviation from the median.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| df | Union |  | Input dataframe with id, times, actuals and predictions. |
| models | List |  | Columns that identify the models predictions. |
| q | float | 0.5 | Quantile for the predictions' comparison. |
| id_col | str | unique_id | Column that identifies each serie. |
| target_col | str | y | Column that contains the target. |
| **Returns** | **Union** |  | **dataframe with one row per id and one column per model.** |

In [None]:
pd_vs_pl(
    quantile_loss(series, models),
    quantile_loss(series_pl, models),
    models,
)

## Multi-Quantile Loss
> $$ \mathrm{MQL}(\mathbf{y}_{\tau},
                    [\mathbf{\hat{y}}^{(q_{1})}_{\tau}, ... ,\hat{y}^{(q_{n})}_{\tau}]) = 
       \frac{1}{n} \sum_{q_{i}} \mathrm{QL}(\mathbf{y}_{\tau}, \mathbf{\hat{y}}^{(q_{i})}_{\tau}) $$

![](imgs/losses/mq_loss.png)

In [None]:
#| export
def mqloss(
    df: DataFrame,
    models: List[str],
    quantiles: np.ndarray,
    id_col: str = 'unique_id',
    target_col: str = 'y',
) -> DataFrame:
    """Multi-Quantile loss (MQL)
    
    MQL calculates the average multi-quantile Loss for
    a given set of quantiles, based on the absolute 
    difference between predicted quantiles and observed values.

    The limit behavior of MQL allows to measure the accuracy 
    of a full predictive distribution $\mathbf{\hat{F}}_{\\tau}$ with 
    the continuous ranked probability score (CRPS). This can be achieved 
    through a numerical integration technique, that discretizes the quantiles 
    and treats the CRPS integral with a left Riemann approximation, averaging over 
    uniformly distanced quantiles.

    Parameters
    ----------
    df : pandas or polars DataFrame
        Input dataframe with id, times, actuals and predictions.
    models : list of str
        Columns that identify the models predictions.
    quantiles : numpy array
        Quantiles to compare against.
    id_col : str (default='unique_id')
        Column that identifies each serie.
    target_col : str (default='y')
        Column that contains the target.

    Returns
    -------
    pandas or polars Dataframe
        dataframe with one row per id and one column per model.

    References
    ----------
    [1] https://www.jstor.org/stable/2629907        
    """ 
    res: Optional[DataFrame] = None
    quantiles = np.asarray(quantiles)
    for model in models:
        error = (df[target_col].to_numpy() - df[model].to_numpy()).reshape(-1, 1)
        loss = np.maximum(error * quantiles, error * (quantiles - 1)).mean(axis=1)
        result = type(df)({model: loss})
        try:
            result = result.group_by(df[id_col]).mean()
        except AttributeError:
            result = result.groupby(df[id_col]).mean()
        if res is None:
            res = result
            if isinstance(res, pd.DataFrame):
                res.index.name = id_col
                res = res.reset_index()
        else:
            if isinstance(res, pd.DataFrame):
                res = pd.concat([res, result], axis=1)
            else:
                res = res.join(result, on=id_col)
    return res

In [None]:
show_doc(mqloss)

---

[source](https://github.com/Nixtla/utilsforecast/blob/main/utilsforecast/losses.py#L421){target="_blank" style="float:right; font-size:smaller"}

### mqloss

>      mqloss
>              (df:Union[pandas.core.frame.DataFrame,polars.dataframe.frame.Data
>              Frame], models:List[str], quantiles:numpy.ndarray,
>              id_col:str='unique_id', target_col:str='y')

Multi-Quantile loss (MQL)

MQL calculates the average multi-quantile Loss for
a given set of quantiles, based on the absolute 
difference between predicted quantiles and observed values.

The limit behavior of MQL allows to measure the accuracy 
of a full predictive distribution $\mathbf{\hat{F}}_{\tau}$ with 
the continuous ranked probability score (CRPS). This can be achieved 
through a numerical integration technique, that discretizes the quantiles 
and treats the CRPS integral with a left Riemann approximation, averaging over 
uniformly distanced quantiles.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| df | Union |  | Input dataframe with id, times, actuals and predictions. |
| models | List |  | Columns that identify the models predictions. |
| quantiles | ndarray |  | Quantiles to compare against. |
| id_col | str | unique_id | Column that identifies each serie. |
| target_col | str | y | Column that contains the target. |
| **Returns** | **Union** |  | **dataframe with one row per id and one column per model.** |

In [None]:
pd_vs_pl(
    mqloss(series, models, [0.1, 0.3]),
    mqloss(series_pl, models, [0.1, 0.3]),
    models,
)

## Coverage

In [None]:
#| export
def coverage(
    df: DataFrame,
    models: List[str],
    level: int,
    id_col: str = 'unique_id',
    target_col: str = 'y',
) -> DataFrame:
    """Coverage of y with y_hat_lo and y_hat_hi.

    Parameters
    ----------
    df : pandas or polars DataFrame
        Input dataframe with id, times, actuals and predictions.
    models : list of str
        Columns that identify the models predictions.
    level : int
        Confidence level used for intervals.
    id_col : str (default='unique_id')
        Column that identifies each serie.
    target_col : str (default='y')
        Column that contains the target.

    Returns
    -------
    pandas or polars Dataframe
        dataframe with one row per id and one column per model.

    References
    ----------
    [1] https://www.jstor.org/stable/2629907        
    """
    if isinstance(df, pd.DataFrame):
        out = np.empty((df.shape[0], len(models)))
        for j, model in enumerate(models):
            out[:, j] = df[target_col].between(df[f'{model}-lo-{level}'], df[f'{model}-hi-{level}'])
        res = pd.DataFrame(out, columns=models).groupby(df[id_col]).mean()
        res.index.name = id_col
        res = res.reset_index()
    else:
        def gen_expr(model):
            return pl_col(target_col).is_between(pl_col(f'{model}-lo-{level}'), pl_col(f'{model}-hi-{level}')).alias(model)

        res = _pl_agg_expr(df, models, id_col, gen_expr)
    return res

In [None]:
show_doc(coverage)

---

[source](https://github.com/Nixtla/utilsforecast/blob/main/utilsforecast/losses.py#L486){target="_blank" style="float:right; font-size:smaller"}

### coverage

>      coverage
>                (df:Union[pandas.core.frame.DataFrame,polars.dataframe.frame.Da
>                taFrame], models:List[str], level:int, id_col:str='unique_id',
>                target_col:str='y')

Coverage of y with y_hat_lo and y_hat_hi.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| df | Union |  | Input dataframe with id, times, actuals and predictions. |
| models | List |  | Columns that identify the models predictions. |
| level | int |  | Confidence level used for intervals. |
| id_col | str | unique_id | Column that identifies each serie. |
| target_col | str | y | Column that contains the target. |
| **Returns** | **Union** |  | **dataframe with one row per id and one column per model.** |

In [None]:
pd_vs_pl(
    coverage(series, models, 80),
    coverage(series_pl, models, 80),
    models,
)

## Calibration

In [None]:
#| export
def calibration(
    df: DataFrame,
    models: List[str],
    level: int,
    id_col: str = 'unique_id',
    target_col: str = 'y',
) -> DataFrame:
    """
    Fraction of y that is lower than y_hat_hi. 
    
    Parameters
    ----------
    df : pandas or polars DataFrame
        Input dataframe with id, times, actuals and predictions.
    models : list of str
        Columns that identify the models predictions.
    level : int
        Confidence level used for intervals.
    id_col : str (default='unique_id')
        Column that identifies each serie.
    target_col : str (default='y')
        Column that contains the target.

    Returns
    -------
    pandas or polars Dataframe
        dataframe with one row per id and one column per model.
        
    References
    ----------
    [1] https://www.jstor.org/stable/2629907            
    """
    if isinstance(df, pd.DataFrame):
        out = np.empty((df.shape[0], len(models)))
        for j, model in enumerate(models):
            out[:, j] = df[target_col].le(df[f'{model}-hi-{level}'])
        res = pd.DataFrame(out, columns=models).groupby(df[id_col]).mean()
        res.index.name = id_col
        res = res.reset_index()
    else:
        def gen_expr(model):
            return pl_col(target_col).le(pl_col(f'{model}-hi-{level}')).alias(model)

        res = _pl_agg_expr(df, models, id_col, gen_expr)
    return res    

In [None]:
show_doc(calibration)

---

[source](https://github.com/Nixtla/utilsforecast/blob/main/utilsforecast/losses.py#L541){target="_blank" style="float:right; font-size:smaller"}

### calibration

>      calibration
>                   (df:Union[pandas.core.frame.DataFrame,polars.dataframe.frame
>                   .DataFrame], models:List[str], level:int,
>                   id_col:str='unique_id', target_col:str='y')

Fraction of y that is lower than y_hat_hi.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| df | Union |  | Input dataframe with id, times, actuals and predictions. |
| models | List |  | Columns that identify the models predictions. |
| level | int |  | Confidence level used for intervals. |
| id_col | str | unique_id | Column that identifies each serie. |
| target_col | str | y | Column that contains the target. |
| **Returns** | **Union** |  | **dataframe with one row per id and one column per model.** |

In [None]:
pd_vs_pl(
    calibration(series, models, 80),
    calibration(series_pl, models, 80),
    models,
)

## CRPS
> $$ \mathrm{sCRPS}(\hat{F}_{\tau}, \mathbf{y}_{\tau}) = \frac{2}{N} \sum_{i}
    \int^{1}_{0}
    \frac{\mathrm{QL}(\hat{F}_{i,\tau}, y_{i,\tau})_{q}}{\sum_{i} | y_{i,\tau} |} dq $$
>
> Where $\hat{F}_{\tau}$ is the an estimated multivariate distribution, and $y_{i,\tau}$
    are its realizations. 

In [None]:
#| export
def scaled_crps(
    df: DataFrame,
    models: List[str],
    quantiles: np.ndarray,
    id_col: str = 'unique_id',
    target_col: str = 'y',
) -> DataFrame:
    """Scaled Continues Ranked Probability Score
    
    Calculates a scaled variation of the CRPS, as proposed by Rangapuram (2021),
    to measure the accuracy of predicted quantiles `y_hat` compared to the observation `y`.
    This metric averages percentual weighted absolute deviations as 
    defined by the quantile losses.
    
    Parameters
    ----------
    df : pandas or polars DataFrame
        Input dataframe with id, times, actuals and predictions.
    models : list of str
        Columns that identify the models predictions.
    quantiles : numpy array
        Quantiles to compare against.
    id_col : str (default='unique_id')
        Column that identifies each serie.
    target_col : str (default='y')
        Column that contains the target.

    Returns
    -------
    pandas or polars Dataframe
        dataframe with one row per id and one column per model.

    References
    ----------
    [1] https://proceedings.mlr.press/v139/rangapuram21a.html        
    """
    eps = np.finfo(float).eps
    quantiles = np.asarray(quantiles)
    loss = mqloss(df, models, quantiles, id_col, target_col)
    if isinstance(loss, pd.DataFrame):
        loss = loss.set_index(id_col)
        assert isinstance(df, pd.DataFrame)
        norm = df[target_col].abs().groupby(df[id_col]).sum()
        sizes = df[id_col].value_counts()
        scales = sizes * (sizes + 1) / 2
        res = 2 * loss.mul(scales, axis=0).div(norm + eps, axis=0)
        res.index.name = id_col
        res = res.reset_index()
    else:
        def gen_expr(model):
            return (2 * pl_col(model) * pl_col('counts') / (pl_col('norm') + eps)).alias(model)

        try:
            grouped_df = df.group_by(id_col)
        except AttributeError:
            grouped_df = df.groupby(id_col)
        norm = grouped_df.agg(pl_col(target_col).abs().sum().alias('norm'))
        sizes = df[id_col].value_counts().with_columns(pl_col('counts') * (pl_col('counts') + 1) / 2)
        res = _pl_agg_expr(loss.join(sizes, on=id_col).join(norm, on=id_col), models, id_col, gen_expr)
    return res

In [None]:
show_doc(scaled_crps)

---

[source](https://github.com/Nixtla/utilsforecast/blob/main/utilsforecast/losses.py#L589){target="_blank" style="float:right; font-size:smaller"}

### scaled_crps

>      scaled_crps
>                   (df:Union[pandas.core.frame.DataFrame,polars.dataframe.frame
>                   .DataFrame], models:List[str], quantiles:numpy.ndarray,
>                   id_col:str='unique_id', target_col:str='y')

Scaled Continues Ranked Probability Score

Calculates a scaled variation of the CRPS, as proposed by Rangapuram (2021),
to measure the accuracy of predicted quantiles `y_hat` compared to the observation `y`.
This metric averages percentual weighted absolute deviations as 
defined by the quantile losses.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| df | Union |  | Input dataframe with id, times, actuals and predictions. |
| models | List |  | Columns that identify the models predictions. |
| quantiles | ndarray |  | Quantiles to compare against. |
| id_col | str | unique_id | Column that identifies each serie. |
| target_col | str | y | Column that contains the target. |
| **Returns** | **Union** |  | **dataframe with one row per id and one column per model.** |

In [None]:
pd_vs_pl(
    scaled_crps(series, models, [0.3, 0.5]),
    scaled_crps(series_pl, models, [0.3, 0.5]),
    models,
)