In [271]:
#|default_exp losses

#|export
from abc import ABC, abstractmethod
import torch
import numpy as np
import pandas as pd
from tsai.basics import *

# Custom Loss Functions

---
## Index
#### <a href="#loss-measures-implementation">1. Loss Measures Implementation</a>
  - <a href="#mean-squared-error-loss-mse">1.1 Mean Squared Error Loss (MSE)</a>
  - <a href="#mean-absolute-error-loss-mae">1.2 Mean Absolute Error Loss (MAE)</a>
  - <a href="#mean-squared-logarithmic-error-loss-msle">1.3 Mean Squared Logarithmic Error Loss (MSLE)</a>
  - <a href="#root-mean-squared-logarithmic-error-loss-msle">1.4 Root Mean Squared Logarithmic Error Loss (RMSLE)</a>
  - <a href="#huber-loss-hl">1.5 Huber Loss (HL)</a>
  - <a href="#quantile-loss-ql">1.6 Quantile Loss (QL)</a>
#### <a href="#weighted-losses">2. Weighted Losses</a>
  - <a href="#weighted-losses-using-the-pre-defined-loss-functions">2.1 Weighted Losses Using the Pre-defined Loss Functions</a>
  - <a href="#special-weighted-losses">2.2 Special Weighted Losses</a>
    - <a href="#classification-loss">2.2.1 Classification Loss</a>
    - <a href="#trended-loss">2.2.2 Trended Loss</a>
#### <a href="#loss-factory">4. Loss Factory</a>
#### <a href="#tests">5. Tests</a>
---

This document contains the implementation of all custom loss functions that can be used during the training process. We opted not to use the pre-existing functions available in `PyTorch`, as they are not specifically tailored to our needs. Implementing these loss functions from scratch allowed us to better control the weighting process and customize them according to our requirements. 

As a starting point, we implemented an abstract class for the loss function to work as an `nn.Module`. This class provides an option for loss reduction and includes an abstract method for loss computation, which is then used in the `forward()` method.

In [272]:
#| export
class Loss(nn.Module, ABC):
    """
    <p>Base class for loss functions, providing a common interface for different types of losses.</p>
    <h3>Attributes:</h3>
    <ul>
        <li>reduction (str): Method for reducing the loss value across batches | <i><u>Default</u></i>: None.</li>
    </ul>
    """
    def __init__(self, reduction:str=None):
        super().__init__()
        self.reduction = reduction
    
    def _reduce(self, loss: torch.Tensor) -> torch.Tensor:
        if self.reduction == 'mean': return loss.mean()
        if self.reduction == 'sum': return loss.sum()
        return loss
    
    @abstractmethod
    def _compute_loss(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
        return NotImplementedError
    
    def forward(self, input: torch.Tensor, target: torch.Tensor, reduction:str=None) -> torch.Tensor:
        if reduction is not None:
            self.reduction = reduction
        loss = self._compute_loss(input, target)
        return self._reduce(loss)

## Loss Measures Implementation

In this section, we provide the implementation of the unweighted loss functions. We have separated the implementations of the unweighted and weighted loss functions **to allow for a direct comparison of their performance**. This separation also enhances the versatility of our evaluation process. The decisions we made regarding which loss functions to implement are based on the work of Jadon, A. et al. (2024).

<details>
  <summary><u>References</u></summary>

Jadon, A., Patil, A. & Jadon, S. A Comprehensive Survey of Regression-Based Loss Functions for Time Series Forecasting. arXiv: [2211.02989 [cs]](https://arxiv.org/abs/2211.02989). (2024). Preprint.

</details>


### Mean Squared Error Loss (MSE)

$$
\text{MSE} = \frac{1}{N} \sum_{i=1}^{N} (y_i - \hat{y}_i)^2
$$

The Mean Squared Error (MSE) is a common loss function for regression problems, including time series forecasting. It calculates the average of the squared differences between predicted and actual values. MSE is popular because it is computationally efficient, differentiable, and works well with various optimization algorithms. However, MSE is highly sensitive to outliers, meaning that large errors can disproportionately influence the model's learning process. This sensitivity can be problematic in time series forecasting, where outliers might represent anomalies or unusual events rather than the underlying data pattern. Therefore, this loss function could be particularly well-suited for **periods where volatility is usual**, such as during high solar activity levels.

In [273]:
#| export

class MSELoss(Loss):
    """
    <p>Mean Squared Error Loss (MSELoss) measures the average squared difference between predicted and actual values.</p>
    <h3>Attributes:</h3>
    <ul>
        <li>reduction (str): Method for reducing the loss value across batches | <i><u>Default</u></i>: None.</li>
    </ul>
    """

    def __init__(self, reduction:str=None):
        super().__init__(reduction)

    def _compute_loss(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
        return (target-input)**2

### Mean Absolute Error Loss (MAE)

$$
\text{MAE} = \frac{1}{N} \sum_{i=1}^{N} |y_i - \hat{y}_i|
$$

The Mean Absolute Error (MAE) is a widely used loss function in regression tasks, including time series forecasting. It quantifies the average of the absolute differences between predicted and actual values. Unlike Mean Squared Error (MSE), which squares the errors, MAE treats all errors linearly, making it more robust to outliers. However, MAE's linear scoring method can lead to less efficient convergence during optimization, especially when dealing with smaller errors. Despite this potential drawback, MAE remains a valuable loss function, particularly in situations where **minimizing the impact of outliers is crucial**, such as in cases with low activity levels where values are more stable but outliers may still occur.


In [274]:
#| export

class MAELoss(Loss):
    """
    <p>Mean Absolute Error Loss (MAELoss) calculates the average absolute differences between predicted and actual values.</p>
    <h3>Attributes:</h3>
    <ul>
        <li>reduction (str): Method for reducing the loss value across batches | <i><u>Default</u></i>: None.</li>
    </ul>
    """
    def __init__(self, reduction:str=None):
        super().__init__(reduction)

    def _compute_loss(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
        return torch.abs(target-input)
    

### Mean Squared Logarithmic Error Loss (MSLE)

$$
\frac{1}{N} \sum_{i=0}^{N} (log(y_i' + 1) - log(\hat{y}_i' + 1))^2 \qquad \text{where: } y_i = 
\begin{cases} 
y_i & \text{if } y_i > -1 \\
-1 + \epsilon & \text{if } y_i \leq -1 
\end{cases}
$$

The Mean Squared Logarithmic Error (MSLE) assesses the **relative difference between predicted and actual values** and is particularly useful when dealing with unscaled quantities. It mitigates the impact of large discrepancies in predictions for large values while remaining sensitive to smaller differences in predictions for smaller values. This characteristic stems from applying a logarithmic transformation to both the actual and predicted values before calculating the squared difference. MSLE is especially well-suited for situations where **underestimating values carries a higher penalty than overestimating them**.

Note that we have clipped the value to not be lower than -1 to avoid errors with the $log(1+x)$ function, which tends to infinity as $x$ approaches -1. However, the value is scaled to be closer to -1 if it is more negative.

In [275]:
#| export

class MSLELoss(Loss):
    """
    <p>Mean Squared Logarithmic Error Loss (MSLELoss) penalizes underestimations more than overestimations by using logarithms.</p>
    <h3>Attributes:</h3>
    <ul>
        <li>reduction (str): Method for reducing the loss value across batches | <i><u>Default</u></i>: None.</li>
    </ul>
    """
    def __init__(self, reduction:str=None):
        super().__init__(reduction)

    @staticmethod
    def inverse_scale_values_below_threshold(tensor, threshold, lower_bound, upper_bound):
        mask = tensor < threshold

        if mask.sum() == 0:
            # If no values are below the threshold, return the original tensor
            return tensor
        
        values_to_scale = tensor[mask]
        min_orig = values_to_scale.min()
        max_orig = values_to_scale.max()
        
        if min_orig == max_orig:
            scaled_values = torch.full_like(tensor, upper_bound)
        else:
            scaled_values = upper_bound - (tensor - min_orig) * (upper_bound - lower_bound
        ) / (max_orig - min_orig)
        
        result_tensor = torch.where(mask, scaled_values, tensor)
        
        return result_tensor

    def _compute_loss(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
        epsilon = torch.finfo(torch.float32).eps
        target_scaled = MSLELoss.inverse_scale_values_below_threshold(target, -1, 0.1, epsilon)
        input_scaled = MSLELoss.inverse_scale_values_below_threshold(input, -1, 0.1, epsilon)
        
        target = torch.where(target <= -1, -1 + target_scaled, target)
        input = torch.where(input <= -1, -1 + input_scaled, input)

        return (torch.log1p(target) - torch.log1p(input))**2

### Root Mean Squared Logarithmic Error Loss (MSLE)

$$
\sqrt{\frac{1}{N} \sum_{i=0}^{N} \left(\log(y_i' + 1) - \log(\hat{y}_i' + 1)\right)^2} \qquad \text{where: } y_i = 
\begin{cases} 
y_i & \text{if } y_i > -1 \\
-1 + \epsilon & \text{if } y_i \leq -1 
\end{cases}
$$

The Root Mean Squared Logarithmic Error Loss (RMSLE) is similar to MSLE but is **scale-invariant** and **less sensitive to outliers**. This loss is preferred when overestimation is acceptable, but underestimation is undesirable. For instance, in the case of DST and AP geomagnetic indices, a slight overestimation might be beneficial, as it can help better predict the significant outliers caused by solar storms.


In [276]:
#| export

class RMSLELoss(nn.Module):
    """
    <p>Root Mean Squared Logarithmic Error Loss (RMSLELoss) is the square root of MSLE, useful for reducing the impact of outliers.</p>
    <h3>Attributes:</h3>
    <ul>
        <li>reduction (str): Method for reducing the loss value across batches | <i><u>Default</u></i>: None.</li>
    </ul>
    """

    def __init__(self, reduction:str='mean'):
        super().__init__()
        self.msle_loss = MSLELoss(reduction=reduction)

    def forward(self, input: torch.Tensor, target: torch.Tensor, reduction='mean') -> torch.Tensor:
        return torch.sqrt(self.msle_loss(input, target, reduction))

### Huber Loss (HL)

$$
\text{HuberLoss} = \frac{1}{N} \sum_{i=1}^{N}
\begin{cases} 
\frac{1}{2} (y_i - \hat{y}_i)^2 & \text{if } |y_i - \hat{y}_i| < \delta \\
\delta \times \left(|y_i - \hat{y}_i| - \frac{1}{2} \delta\right) & \text{if } |y_i - \hat{y}_i| \geq \delta
\end{cases}
$$

Huber Loss (HL) combines the characteristics of both MSE and MAE, aiming to benefit from their respective strengths while mitigating their limitations. This loss function introduces a parameter called delta (δ) that acts as a threshold to determine the appropriate method for calculating the loss. For errors smaller than delta, Huber Loss behaves like MSE, employing a quadratic scoring function to facilitate efficient convergence. However, for errors exceeding delta, it transitions to a linear scoring approach akin to MAE, effectively reducing the influence of outliers. This adaptive behavior makes Huber Loss a versatile choice for time series forecasting, as it can handle datasets with **varying degrees of noise and outlier presence**.


In [277]:
#| export

class HubberLoss(Loss):
    """
    <p>Huber Loss (HL) combines the characteristics of both MSE and MAE, aiming to benefit from their respective strengths while mitigating their limitations.</p>
    <h3>Attributes:</h3>
    <ul>
        <li>reduction (str): Method for reducing the loss value across batches | <i><u>Default</u></i>: 'mean'.</li>
        <li>delta (float): Threshold from where the loss changes from MAE to MSE-like functioning.</li>
    </ul>
    """
    def __init__(self, reduction:str=None, delta:float=1.):
        super().__init__(reduction)
        self.delta = delta

    def _compute_loss(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
        error = target - input
        
        is_small_error = error < self.delta
        small_error_loss = (0.5 * (error ** 2))
        large_error_loss = (self.delta * (torch.abs(error) - 0.5 * self.delta))

        return torch.where(is_small_error, small_error_loss, large_error_loss)

### Quantile Loss (QL)

$$
\text{Quantile Loss} = \frac{1}{N} \sum_{i=1}^{N}
\begin{cases} 
(\gamma - 1) \cdot (y_i - \hat{y}_i) & \text{if  } y_i < \hat{y}_i \\
\gamma \cdot (y_i - \hat{y}_i) & \text{if  } y_i \geq \hat{y}_i
\end{cases}

$$

Quantile Loss (QL) is particularly useful when the goal is to predict not just a single point estimate but rather a range of possible outcomes with associated probabilities. This loss function is used in quantile regression, a type of regression analysis that estimates the conditional quantiles of the target variable given a set of predictor variables. Quantile Loss is defined based on the desired quantile ($\gamma$) and penalizes overestimations and underestimations differently depending on the value of $\gamma$.

For instance, when $\gamma = 0.5$, the loss function aims to estimate the median, penalizing both overestimations and underestimations equally. However, for other values of $\gamma$, the penalties are adjusted to reflect the desired quantile. In our case, by **adjusting the quantile to higher percentiles**, we can focus more on **outliers**, such as solar storms.


In [278]:
#| export

class QuantileLoss(Loss):
    """
    <p>Quantile Loss is used for regression tasks where we want to predict a specific quantile.</p>
    <h3>Attributes:</h3>
    <ul>
        <li>reduction (str): Method for reducing the loss value across batches | <i><u>Default</u></i>: None.</li>
        <li>quantile (float): The quantile to be predicted, usually a value between 0 and 1.</li>
    </ul>
    """
    def __init__(self, quantile: float, reduction: str = None):
        super().__init__(reduction)
        self.quantile = quantile

    def _compute_loss(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
        errors = target - input
        return torch.where(errors >= 0, self.quantile * errors, (self.quantile - 1) * errors)

## Weighted Losses

Here we implement the addition of a **weights tensor** to the loss functions. As discussed in our paper, we introduce **weighted loss functions** to assign more importance to levels that appear to be **underrepresented** in the training data, thereby giving them greater relevance.

As shown below, we have created a **weighted loss superclass** where the weights are calculated and applied. The core method of this class involves calculating the weight tensor. In this superclass, we take the thresholds, reshape the target tensor and thresholds so they can be directly compared, classify the target tensor, and then apply the weights using the `torch.einsum()` function. The final part of the function applies the weights, whether they are **equal for all variables** or **different for each variable**.

Additionally, a **data preprocessing method** is used because some thresholds and weights may have different shapes, which can result in errors due to unaligned shapes that do not fit properly with the weight calculation method. To address this, padding is added to variables that have fewer categories. **Reduction is recalculated** as the weights are applied directly to the error tensor, not to the reduced one.

In [279]:
#|export
class WeightedLoss(nn.Module, ABC):
    """
    <p>Base class for weighted loss functions, where different samples are given different importance.</p>
    <h3>Attributes:</h3>
    <ul>
        <li>reduction (str): Method for reducing the loss value across batches | <i><u>Default</u></i>: None.</li>
        <li>weights (Tensor): Weights assigned to each sample in the batch.</li>
        <li>thresholds (Tensor): Threshold values for weighted computation.</li>
    </ul>
    """
    def __init__(self, thresholds:dict, weights:dict):
        super().__init__()

        # Activity levels' weights can be equal across all variables or different,
        # and this should be taken into account during preprocessing. 
        self.all_variables_have_same_weights = len(weights.keys()) == 1
        ranges, weights = self._preprocess_data(thresholds, weights)

        self.register_buffer('ranges', torch.Tensor(ranges))
        self.register_buffer('weights', torch.Tensor(weights))


    def weighted_loss_tensor(self, target: torch.Tensor) -> torch.Tensor:        
        batch, variables, horizon = target.shape  # Example shape (32, 4, 6)
        variable, max_range, interval = self.ranges.shape  # Example shape (4, 4, 2)

        target_shaped = torch.reshape(target, (batch, variables, 1, horizon))  # Example shape (32, 4, 6) -> (32, 4, 1, 6)
        ranges_shaped = torch.reshape(self.ranges, (variable, max_range, 1, interval))  # Example shape (4, 4, 2) -> (4, 4, 1, 2)

        weights_tensor = ((ranges_shaped[..., 0] <= target_shaped) & (target_shaped <= ranges_shaped[..., 1])).float()
             
        if self.all_variables_have_same_weights:
            equation = 'r,bvrh->bvh'
        else:
            equation = 'vr,bvrh->bvh'

        return torch.einsum(equation, self.weights, weights_tensor)
    
    
    @abstractmethod
    def loss_measure(self, y_pred: torch.Tensor, y_true: torch.Tensor) -> torch.Tensor:
        return NotImplementedError
    
    def _preprocess_data(self, thresholds, weights):
        # If each variable has its own weights, calculate the maximum size of weights.
        # Padding shorter weights with NaNs prevents heterogeneous tensor errors.
        if (self.all_variables_have_same_weights):
            ranges = np.array(list(thresholds.values())[:])
            weights = np.array(next(iter(weights.values())))
        else:
            def add_padding(x, padding_value, shape):
                result = np.full(shape, padding_value)
                for i, r in enumerate(x):
                    result[i, :len(r)] = r
                return result
            
            max_size = max([len(array) for array in thresholds.values()])

            ranges_raw = thresholds.values()
            ranges = add_padding(ranges_raw, np.nan, (len(ranges_raw), max_size, 2))

            weights_raw = [weights[key] for key in thresholds.keys()]
            weights = add_padding(weights_raw, 0.0, (len(weights_raw), max_size))

        return ranges, weights
    
    
    def forward(self, y_pred, y_true, reduction='mean'):
        error = self.loss_measure(y_pred, y_true)
        weights = self.weighted_loss_tensor(y_true)

        if reduction == 'mean':
            loss = (error * weights).mean()
        elif reduction == 'sum':
            loss = (error * weights).sum()
        else: 
            loss = error*weights
        
        return loss

### Weighted Losses Using the Pre-defined Loss Functions

Here you can find the weighted versions of the loss functions implemented in this document. All functions follow the structure below when applying the weights:

$$ 
\text{Weighted Loss} = \frac{1}{N} \sum_{i=1}^{N} w^{Bs \times V \times H} \cdot \mathcal{L}^{Bs \times V \times H} \qquad \text{where}: 
\begin{cases}
    w: \text{Weight tensor} \\
    Bs: \text{Batch size} \\
    V: \text{Number of variables} \\
    H: \text{Horizon length} \\
    \mathcal{L}: \text{Loss measure}
\end{cases}
$$


In [280]:
#|export

class wMSELoss(WeightedLoss):
    """
    <p>Weighted Mean Squared Error Loss (wMSELoss) is the weighted version of MSE, giving different importance to different samples.</p>
    <h3>Attributes:</h3>
    <ul>
        <li>reduction (str): Method for reducing the loss value across batches | <i><u>Default</u></i>: None.</li>
        <li>weights (Tensor): Weights assigned to each sample in the batch.</li>
        <li>thresholds (Tensor): Threshold values for weighted computation.</li>
    </ul>
    """
    def __init__(self, thresholds, weights):
        super().__init__(thresholds, weights)

    
    def loss_measure(self, input, target):
        return MSELoss()(input, target)
    

    
class wMAELoss(WeightedLoss):
    """
    <p>Weighted Mean Absolute Error Loss (wMAELoss) is the weighted version of MAE, giving different importance to different samples.</p>
    <h3>Attributes:</h3>
    <ul>
        <li>reduction (str): Method for reducing the loss value across batches | <i><u>Default</u></i>: None.</li>
        <li>weights (Tensor): Weights assigned to each sample in the batch.</li>
        <li>thresholds (Tensor): Threshold values for weighted computation.</li>
    </ul>
    """
    def __init__(self, thresholds, weights):
        super().__init__(thresholds, weights)

    def loss_measure(self, input, target):
        return MAELoss()(input, target)


    
class wMSLELoss(WeightedLoss):
    """
    <p>Weighted Mean Squared Logarithmic Error Loss (wMSLELoss) is the weighted version of MSLE, penalizing underestimations more than overestimations.</p>
    <h3>Attributes:</h3>
    <ul>
        <li>reduction (str): Method for reducing the loss value across batches | <i><u>Default</u></i>: None.</li>
        <li>weights (Tensor): Weights assigned to each sample in the batch.</li>
        <li>thresholds (Tensor): Threshold values for weighted computation.</li>
    </ul>
    """
    def __init__(self, thresholds, weights):
        super().__init__(thresholds, weights)
    
    def loss_measure(self, input, target):
        return MSLELoss()(input, target)
    


class wRMSLELoss(nn.Module):
    """
    <p>Weighted Root Mean Squared Logarithmic Error Loss (wRMSLELoss) is the weighted version of RMSLE, useful for reducing the impact of outliers.</p>
    <h3>Attributes:</h3>
    <ul>
        <li>reduction (str): Method for reducing the loss value across batches | <i><u>Default</u></i>: None.</li>
        <li>weights (Tensor): Weights assigned to each sample in the batch.</li>
        <li>thresholds (Tensor): Threshold values for weighted computation.</li>
    </ul>
    """
    def __init__(self, thresholds, weights):
        super().__init__()
        self.msle_loss = wMSLELoss(thresholds, weights)
        
    def forward(self, input, target, reduction='mean'):
        return torch.sqrt(self.msle_loss(input, target, reduction))
    


class wHubberLoss(WeightedLoss):
    """
    <p>Weighted Huber Loss (wHubberLoss) combines the characteristics of both MSE and MAE, with weights for different samples.</p>
    <h3>Attributes:</h3>
    <ul>
        <li>reduction (str): Method for reducing the loss value across batches | <i><u>Default</u></i>: 'mean'.</li>
        <li>delta (float): Threshold from where the loss changes from MAE to MSE-like functioning.</li>
        <li>weights (Tensor): Weights assigned to each sample in the batch.</li>
        <li>thresholds (Tensor): Threshold values for weighted computation.</li>
    </ul>
    """
    def __init__(self, thresholds, weights, delta=2.0):
        super().__init__(thresholds, weights)
        self.delta = delta
    
    def loss_measure(self, y_pred, y_true):
        return HubberLoss(delta=self.delta)(y_pred, y_true)
    


class wQuantileLoss(WeightedLoss):
    """
    <p>Weighted Quantile Loss is used for regression tasks with weighted samples where we want to predict a specific quantile.</p>
    <h3>Attributes:</h3>
    <ul>
        <li>reduction (str): Method for reducing the loss value across batches | <i><u>Default</u></i>: None.</li>
        <li>quantile (float): The quantile to be predicted, usually a value between 0 and 1.</li>
        <li>weights (Tensor): Weights assigned to each sample in the batch.</li>
        <li>thresholds (Tensor): Threshold values for weighted computation.</li>
    </ul>
    """
    def __init__(self, thresholds, weights, quantile=0.5):
        super().__init__(thresholds, weights)
        self.quantile = quantile
    
    def loss_measure(self, y_pred, y_true):
        return QuantileLoss(quantile=self.quantile)(y_pred, y_true)

### Special Weighted Losses

Here we add two special losses that do not use the predefined loss functions and they do not fully use the `WeightedLoss()` class or have their own implementation of the weights calculation.


#### Classification Loss

$$
\text{Classification Loss} = \frac{1}{N} \sum_{i=1}^{N} \; (1 - \alpha) \cdot \mathcal{L}_i^{Bs \times V \times H} + \alpha \cdot \mathcal{C}_i^{Bs \times V}\qquad \text{where: } \mathcal{C}_i = \frac{1}{H} \sum_{h=1}^{H} \left| w_{y_{vh}}^{Bs \times V \times H} - w_{\hat{y}_{vh}}^{Bs \times V \times H} \right|

$$

This loss function classifies both the input and target tensors to determine how the model has misclassified the category of the forecasted values, penalizing predictions with larger discrepancies between the actual and predicted categories. The purpose of this loss is to penalize the model when it **fails to correctly identify the category of the values based on the available context**. This is particularly important in cases where the volatility of the data, such as FSMY solar indices, varies depending on the activity level (with higher volatility at higher activity levels).

The weights are defined as a simple series from 1 to the number of variables, ensuring that the <u>steps between the designated categories are evenly spaced</u>.


In [281]:
#| export

class ClassificationLoss(WeightedLoss):
    """
    <p>Loss function for classification tasks, suitable for handling imbalanced classes and other classification-specific challenges.</p>
    <h3>Attributes:</h3>
    <ul>
        <li>reduction (str): Method for reducing the loss value across batches | <i><u>Default</u></i>: None.</li>
        <li>primary_loss (Loss): The base loss function used for classification.</li>
        <li>alpha (float): Weighting factor for balancing the importance of different classes.</li>
    </ul>
    """
    def __init__(self, thresholds, primary_loss, alpha=0.5):
        n_variables = len(thresholds.keys())
        weights = {'All': np.arange(n_variables)}
        super().__init__(thresholds, weights)

        self.loss = primary_loss

        if alpha < 0 or alpha > 1:
            raise ValueError('Alpha must be between 0 and 1, as it is the weight of the categorical loss against the other loss.')
        self.alpha = alpha
    
    def loss_measure(self, input, target):
        primary_loss_value = self.loss(input, target, reduction=None)

        categorical_error = torch.abs(self.weighted_loss_tensor(target) - self.weighted_loss_tensor(input))
        categorical_loss_value = torch.mean(categorical_error, dim=2, keepdim=True)


        return (1 - self.alpha) * primary_loss_value + self.alpha * categorical_loss_value


    def forward(self, input, target, reduction='mean'):
        error = self.loss_measure(input, target)

        # if (error.shape != weights.shape): # To properly format the weights tensor in case of multi-variable classification
          #   weights = weights.mean(dim=1)
            
        if reduction == 'mean':
            loss = error.mean()
        elif reduction == 'sum':
            loss = error.sum()
        else:
            loss = error
        
        return loss

#### Trended Loss

$$
\text{Trended Loss} = \frac{1}{n} \sum_{i=1}^{n} (1 + |\tau(y) - \tau(\hat{y})|) \cdot \mathcal{L} \qquad \text{where}\ \tau() \text{ is the trend calculation function}
$$

The aim of the Trended Loss function is to penalize the model when it **incorrectly detects the trend** of the data. To achieve this, it calculates the trend for both the input and target values, and then measures the difference between these trends. The performance of this loss function can be somewhat limited because the context available to the model is restricted to the horizon, as the batch is generated randomly and <u>cannot capture a larger context</u>.

In [282]:
#| export

class TrendedLoss(nn.Module):
    """
    <p>Trended Loss incorporates trends in the data to adjust the loss computation accordingly.</p>
    <h3>Attributes:</h3>
    <ul>
        <li>primary_loss (Loss): The base loss function used in combination with trend adjustments.</li>
    </ul>
    """
    def __init__(self, primary_loss: Loss):
        super().__init__()
        self.loss = primary_loss

    @staticmethod
    def _slope(y):
        x = np.arange(len(y))
        slope, _ = np.polyfit(x, y, deg=1)
        return slope

    @staticmethod
    def _calculate_trends(tensor):
        np_tensor = tensor.cpu().detach().numpy()
        trends = np.apply_along_axis(TrendedLoss._slope, 2, np_tensor)
        return torch.Tensor(trends)

    def forward(self, input, target):
        batch, variables, _ = input.shape

        input_trend = TrendedLoss._calculate_trends(input)
        target_trend = TrendedLoss._calculate_trends(target)
        
        trend_diff = 1 + torch.abs(input_trend - target_trend)

        error = self.loss(input, target)
        weights = trend_diff.reshape(batch,variables,1)
        loss = (error * weights).mean()

        return loss

## Loss Factory
This class group and simplifies the creation process of the different loss functions.

In [283]:
#| export

class LossFactory:
    losses = {
        'MSE': MSELoss,
        'MAE': MAELoss,
        'MSLE': MSELoss,
        'RMSLE': RMSLELoss,
        'Hubber': HubberLoss,
        'Quantile': QuantileLoss,
        'wMSE': wMSELoss,
        'wMAE': wMAELoss,
        'wMSLE': wMSLELoss,
        'wRMSLE': wRMSLELoss,
        'wHubber': wHubberLoss,
        'wQuantile': wQuantileLoss,
        'Classification': ClassificationLoss,
        'Trended': TrendedLoss
    }

    def __init__(self, thresholds, weights):
        self.thresholds = thresholds
        self.weights = weights

    @classmethod
    def list(cls):
        from IPython.display import HTML, display

        table_rows = []
        
        # Generate rows for the table
        for key, value in cls.losses.items():
            doc_html = value.__doc__.strip().replace("\n", " ")
            table_rows.append(f"<tr><td style='text-align: left;'><strong>{key}</strong></td><td style='text-align: left;'>{doc_html}</td></tr>")
        
        # Create the HTML for the table with left-aligned text
        table_html = f"""
        <table>
            <thead>
                <tr>
                    <th style='text-align: left;'>Loss Name</th>
                    <th style='text-align: left;'>Description</th>
                </tr>
            </thead>
            <tbody>
                {''.join(table_rows)}
            </tbody>
        </table>
        """
        
        display(HTML(table_html))



    def create(self, loss_name:str='MSE', **kwargs) -> nn.Module: 
        if loss_name in LossFactory.losses:
            if loss_name.__contains__('w'):

                if loss_name == 'hubber':
                    delta = kwargs.get('delta', 2.0)
                    return wHubberLoss(
                            thresholds=self.thresholds, 
                            weights=self.weights, 
                            delta=delta
                        )
                
                elif loss_name == 'quantile':
                    quantile = kwargs.get('quantile', 0.5)
                    return wQuantileLoss(
                            thresholds=self.thresholds, 
                            weights=self.weights, 
                            quantile=quantile
                        )
                
                else:
                    return LossFactory.losses[loss_name](
                                thresholds=self.thresholds, 
                                weights=self.weights
                            )
                
            elif loss_name == 'classification':
                alpha = kwargs.get('alpha', 0.5)
                primary_loss = kwargs.get('primary_loss', MSELoss())
                return ClassificationLoss(
                            thresholds=self.thresholds,
                            primary_loss=primary_loss,
                            alpha=alpha
                        )
            
            elif loss_name == 'trended':
                primary_loss = kwargs.get('primary_loss', MSELoss())
                return TrendedLoss(primary_loss=primary_loss)
            
            else:
                return LossFactory.losses[loss_name]()
        else:
            raise ValueError(f'Loss {loss_name} not found. Available losses are: {list(cls.losses.keys())}')
    

## Tests

This section contains the tests applied to all tensor loss calculations:

In [284]:
# Test
device = 'cpu'
ranges = {'A': np.array([[0, 1], [1, 2], [2, 3], [3, 4]]),
          'B': np.array([[0, 1], [1, 2], [2, 3], [3, 4]]),
          'C': np.array([[0, 1], [1, 2], [2, 3], [3, 4]]),
          'D': np.array([[0, 1], [1, 2], [2, 3], [3, 4]])}

weights = {'A': np.array([1, 2, 3, 4])}

target = torch.tensor([[[0.5, 1.5, 2.5, 3.5, 4.5, 5.5],
                        [0.5, 1.5, 2.5, 3.5, 4.5, 5.5],
                        [0.5, 1.5, 2.5, 3.5, 4.5, 5.5],
                        [0.5, 1.5, 2.5, 3.5, 4.5, 5.5]],
                        [[0.5, 1.5, 2.5, 3.5, 4.5, 5.5],
                        [0.5, 1.5, 2.5, 3.5, 4.5, 5.5],
                        [0.5, 1.5, 2.5, 3.5, 4.5, 5.5],
                        [0.5, 1.5, 2.5, 3.5, 4.5, 5.5]]], device=device, dtype=torch.float32)

input = target + 1

expected_weights = torch.tensor([[[1, 2, 3, 4, 0, 0],
                                 [1, 2, 3, 4, 0, 0],
                                 [1, 2, 3, 4, 0, 0],
                                 [1, 2, 3, 4, 0, 0]]], device=device, dtype=torch.float32)

solact_levels = ['low', 'moderate', 'elevated', 'high']

class DummyLoss(WeightedLoss):
        def loss_measure(self, y_pred: torch.Tensor, y_true: torch.Tensor) -> torch.Tensor:
            pass

def test_LossWeightsTensor():
    loss = DummyLoss(ranges, weights).to(device)
    result = loss.weighted_loss_tensor(target)

    assert torch.equal(result, expected_weights), f"Expected {expected_weights}, but got {result}"
    print(f"Loss Tensor test passed!")

In [285]:
# Test

thresholds_ne = {
    'var1': [[0, 1], [1, 2], [2, 3]],
    'var2': [[4, 5], [5, 6]],
}

weights_ne = {
    'var1': [1, 2, 3],
    'var2': [3, 4],
}

target_ne = torch.tensor([[[0.5,0.5,0.5,1.5],
                         [4.5,4.5,5.5,4.5]]])

expected_weights_ne = torch.tensor([[[1,1,1,2],
                                   [3,3, 4, 3]]])

def test_LossWeightsTensor_different_weights():
    model = DummyLoss(thresholds_ne, weights_ne)
    loss_tensor = model.weighted_loss_tensor(target_ne)
    assert torch.equal(loss_tensor, expected_weights_ne), f"Expected {expected_weights}, but got {loss_tensor}"
    print("Test for different weights per variable passed!")

In [286]:
# Test

def check_loss_function(loss_class, expected_value, loss_func=None):
    if loss_class.__name__ == "ClassificationLoss":
        loss = loss_class(ranges, loss_func).to(device)
    elif loss_class.__name__ == "TrendedLoss":
        loss = loss_class(loss_func).to(device)
    else:
        loss = loss_class(ranges, weights).to(device)
    
    result = loss(input, target)

    assert torch.isclose(result, expected_value), f"Expected {expected_value}, but got {result}"
    print(f"{type(loss).__name__} test passed!")

In [287]:
# Test

def test_wMSELoss():
    expected_mse_loss = torch.mean(expected_weights * (target - input) ** 2)
    check_loss_function(wMSELoss, expected_mse_loss)

def test_wMAELoss():
    expected_mae_loss = torch.mean(expected_weights * torch.abs(target - input))
    check_loss_function(wMAELoss, expected_mae_loss)

def test_wMSLELoss():
    expected_msle_loss = torch.mean(expected_weights * ((torch.log1p(target) - torch.log1p(input)) ** 2))
    check_loss_function(wMSLELoss, expected_msle_loss)

def test_wRMSLELoss():
    expected_msle_loss = torch.sqrt(torch.mean(expected_weights * (torch.log1p(target) - torch.log1p(input)) ** 2))
    check_loss_function(wRMSLELoss, expected_msle_loss)

def test_wHuberLoss():
    delta = 1
    expected_hubber_loss = torch.mean(expected_weights * 
                                   torch.where(torch.abs(input - target) < delta, 
                                                0.5 * (input - target) ** 2,
                                                delta * (torch.abs(input - target) - 0.5 * delta)
                                                )
                                  )
    check_loss_function(wHubberLoss, expected_hubber_loss)



def test_ClassificationLoss():
    expected_classification_loss = MSELoss('mean')(input, target)
    check_loss_function(ClassificationLoss, expected_classification_loss, loss_func=MSELoss())

def test_TrendedLoss():
    expected_loss = torch.mean((target - input) ** 2) # The trend will be the same so the weights will be all 1
    check_loss_function(TrendedLoss, expected_loss, loss_func=MSELoss())

def test_wQuantileLoss():
    quantile = 0.5  # You can change this to any quantile value you want to test
    errors = np.abs(target - input)
    expected_quantile_loss = torch.mean(expected_weights * torch.where(errors >= 0, quantile * errors, (quantile - 1) * errors))
    
    check_loss_function(lambda r, w: wQuantileLoss(r, w, quantile), expected_quantile_loss)


In [288]:
#| Test
# test_LossWeightsTensor()
# test_LossWeightsTensor_different_weights()
test_wMSELoss()
test_wMAELoss()
test_wMSLELoss()
test_wRMSLELoss()
test_wHuberLoss()
test_wQuantileLoss()

test_ClassificationLoss()
test_TrendedLoss()

wMSELoss test passed!
wMAELoss test passed!
wMSLELoss test passed!
wRMSLELoss test passed!
wHubberLoss test passed!
wQuantileLoss test passed!
ClassificationLoss test passed!
TrendedLoss test passed!


In [289]:
#|eval: false
#|hide
from nbdev import *
nbdev_export()