In [None]:
#default_exp criteria

In [None]:
#export
import torch
import numpy as np
from torch.nn.functional import mse_loss, binary_cross_entropy

from dl4to.criteria import Criterion

In [None]:
#hide
from nbdev.showdoc import show_doc

# Supervised criteria

In [None]:
#export
class SupervisedCriterion(Criterion):
    """
    A parent class that inherits all supervised criteria for both classical and learned methods.
    """
    def __init__(
        self,
        name:str, # The name of this criterion which will be monitored in logging.
        differentiable:bool=True, # Whether the criterion is differentiable or not. Only differentiable criteria can be used as loss/objective functions.
        lower_is_better:bool=True, # Whether lower values of the criterion correspond to better scores.
        compute_only_on_design_space:bool=True # Whether the criterion should be evaluated on voxels that have a design space information of -1, i.e., the voxels can be freely optimized. This parameter does not effect all criteria.
    ):
        super().__init__(name=name, 
                         supervised=True,
                         differentiable=differentiable,
                         lower_is_better=lower_is_better,
                         compute_only_on_design_space=compute_only_on_design_space
                        )


    def _convert_to_list(self, solutions, gt_solutions):
        if type(solutions) is not list:
            solutions = [solutions]
        if type(gt_solutions) is not list:
            gt_solutions = [gt_solutions]
        return solutions, gt_solutions


    def _check_inputs(self, solutions, gt_solutions):
        if gt_solutions is None:
            raise AttributeError('The criterion is supervised and needs ground thruth data. Therefore, gt_solution should not be None.')

        for gt_solution in gt_solutions:
            if gt_solution is None:
                raise ValueError('The criterion requires gt_solutions, which are currently None.')


    def _get_number_of_true_positives(self, θ, θ_true, design_space_mask):
        return (θ_true * θ * design_space_mask).sum(dim=1)


    def _get_number_of_true_negatives(self, θ, θ_true, design_space_mask):
        return ((1 - θ_true) * (1 - θ) * design_space_mask).sum(dim=1)


    def _get_number_of_false_positives(self, θ, θ_true, design_space_mask):
        return ((1 - θ_true) * θ * design_space_mask).sum(dim=1)


    def _get_number_of_false_negatives(self, θ, θ_true, design_space_mask):
        return (θ_true * (1 - θ) * design_space_mask).sum(dim=1)


    def _get_sensitivity(self, θ, θ_true, design_space_mask, ε=1e-6): # same as recall
        tp = self._get_number_of_true_positives(θ, θ_true, design_space_mask)
        fn = self._get_number_of_false_negatives(θ, θ_true, design_space_mask)
        return tp / (tp + fn + ε)


    def _get_specificity(self, θ, θ_true, design_space_mask, ε=1e-6):
        tn = self._get_number_of_true_negatives(θ, θ_true, design_space_mask)
        fp = self._get_number_of_false_positives(θ, θ_true, design_space_mask)
        return tn / (tn + fp + ε)


    def _get_precision(self, θ, θ_true, design_space_mask, ε=1e-6):
        tp = self._get_number_of_true_positives(θ, θ_true, design_space_mask)
        fp = self._get_number_of_false_positives(θ, θ_true, design_space_mask)
        return tp / (tp + fp + ε)


    def __call__(self,
                 solutions:list, # The solutions that should be evaluated with the criterion.
                 gt_solutions:list=None, # Ground truth solutions that are compared element-wise with the `solutions`.
                 binary:bool=False # Whether the criterion should be evaluated on binarized densities. Does not have an effect on some criteria.
                  ):
        """
        Calculates the output of the criterion for all solutions.
        """
        raise NotImplementedError("Must be overridden.")

In [None]:
show_doc(SupervisedCriterion.__call__)

<h4 id="SupervisedCriterion.__call__" class="doc_header"><code>SupervisedCriterion.__call__</code><a href="__main__.py#L64" class="source_link" style="float:right">[source]</a></h4>

> <code>SupervisedCriterion.__call__</code>(**`solutions`**:`list`, **`gt_solutions`**:`list`=*`None`*, **`binary`**:`bool`=*`False`*)

Calculates the output of the criterion for all solutions.

||Type|Default|Details|
|---|---|---|---|
|**`solutions`**|`list`||The solutions that should be evaluated with the criterion.|
|**`gt_solutions`**|`list`|`None`|Ground truth solutions that are compared element-wise with the `solutions`.|
|**`binary`**|`bool`|`False`|Whether the criterion should be evaluated on binarized densities. Does not have an effect on some criteria.|


In [None]:
#export
class WeightedBCE(SupervisedCriterion):
    """
    Weighted Binary cross entropy [1] is a variant of binary cross entropy variant. The weight value can be used to tune false negatives and false positives. 
    E.g; If you want to reduce the number of false negatives then set weight > 1, similarly to decrease the number of false positives, set weight < 1.
    The criterion reaches its best value at 0 and higher values correspond to worse scores.
    """
    def __init__(self, 
                 weight:float=.5, # The weight of the weighted binary cross entropy function which is used to take class imbalance into account.
                 compute_only_on_design_space:bool=True # Whether the criterion should be evaluated on voxels that have a design space information of -1, i.e., the voxels can be freely optimized. This parameter does not effect all criteria.
                ):
        self.weight = weight
        super().__init__(
            name=f'BCE({weight:.2})',
            compute_only_on_design_space=compute_only_on_design_space
        )


    def set_optimal_weight(self,
                           dataset:"dl4to.dataset.TopoDataset", # The dataset based on which the optimal weight is determined.
                           binary:bool=False # Whether the densities in the solutions are thresholded at 0.5 before the weight is determined.
                          ):
        """
        Calculates the optimal BCE weight based on the solutions in the dataset.
        """
        gt_solutions = dataset.get_gt_solutions()
        θ = torch.stack([gt_solution.get_θ(binary=binary).flatten() for gt_solution in gt_solutions])
        design_space_mask = self.get_design_space_mask(gt_solutions)
        self.weight = 1. - θ.sum() / design_space_mask.sum()
        print(f"Setting criterion weight to {self.weight}.")
        self._name = f'BCE({self.weight:.2})'


    def __call__(self,
                 solutions:list, # The solutions that should be evaluated with the criterion.
                 gt_solutions:list=None, # Ground truth solutions that are compared element-wise with the `solutions`.
                 binary:bool=False # Whether the criterion should be evaluated on binarized densities. Does not have an effect on some criteria.
                  ):
        """
        Calculates the output of the criterion for all solutions.
        """
        solutions, gt_solutions = self._convert_to_list(solutions, gt_solutions)
        self._check_inputs(solutions, gt_solutions)
        θ = self.get_θ_flat(solutions, binary=binary)
        θ_true = self.get_θ_flat(gt_solutions, binary=binary)
        design_space_mask = self.get_design_space_mask(solutions)
        loss = binary_cross_entropy(
            θ * design_space_mask, θ_true * design_space_mask,
            weight=torch.tensor([self.weight], device=θ.device),
            reduction='none'
        )

        return loss.sum(dim=1) / design_space_mask.sum(dim=1)

In [None]:
show_doc(WeightedBCE.set_optimal_weight)

<h4 id="WeightedBCE.set_optimal_weight" class="doc_header"><code>WeightedBCE.set_optimal_weight</code><a href="__main__.py#L19" class="source_link" style="float:right">[source]</a></h4>

> <code>WeightedBCE.set_optimal_weight</code>(**`dataset`**:`dl4to.dataset.TopoDataset`, **`binary`**:`bool`=*`False`*)

Calculates the optimal BCE weight based on the solutions in the dataset.

||Type|Default|Details|
|---|---|---|---|
|**`dataset`**|`dl4to.dataset.TopoDataset`||The dataset based on which the optimal weight is determined.|
|**`binary`**|`bool`|`False`|Whether the densities in the solutions are thresholded at 0.5 before the weight is determined.|


In [None]:
#export
class WeightedFocal(SupervisedCriterion):
    """
    Focal loss [2] can be seen as variation of Binary Cross-Entropy. It down-weights the contribution of easy examples and enables the model to focus more on learning hard examples. 
    It works well for highly imbalanced class scenarios. The criterion reaches its best value at 0 and higher values correspond to worse scores.
    """
    def __init__(self, 
                 weight:float=.5, # The weight of the weighted focal function which is used to take class imbalance into account.
                 γ:float=3, # $γ\geq0$ is the tunable focusing parameter. Setting $γ>0$ reduces the relative loss for well-classified examples, putting more focus on hard, misclassified examples.
                 ε:float=1e-6, # A small value $>0$ that avoids division by $0$ and therefore improves numerical stability.
                 compute_only_on_design_space:bool=True # Whether the criterion should be evaluated on voxels that have a design space information of -1, i.e., the voxels can be freely optimized. This parameter does not effect all criteria.
                ):
        self.weight = weight
        self.γ = γ
        self.ε = ε
        super().__init__(
            name=f'Focal({weight:.2})',
            compute_only_on_design_space=compute_only_on_design_space
        )


    def set_optimal_weight(self,
                           dataset:"dl4to.dataset.TopoDataset", # The dataset based on which the optimal weight is determined.
                           binary:bool=False # Whether the densities in the solutions are thresholded at 0.5 before the weight is determined.
                          ):
        """
        Calculates the optimal BCE weight based on the solutions in the dataset.
        """
        gt_solutions = dataset.get_gt_solutions()
        θ = torch.stack([gt_solution.get_θ(binary=binary).flatten() for gt_solution in gt_solutions])
        design_space_mask = self.get_design_space_mask(gt_solutions)
        self.weight = 1. - θ.sum() / design_space_mask.sum()
        print(f"Setting criterion weight to {self.weight}.")
        self._name = f'BCE({self.weight:.2})'


    def _get_loss(self, θ, θ_true, design_space_mask):
        bce_loss = binary_cross_entropy(θ * design_space_mask, θ_true * design_space_mask, reduction='none')
        pt = torch.clamp(torch.exp(- bce_loss * design_space_mask), min=self.ε, max=1-self.ε)
        loss = self.weight * (1 - pt)**self.γ * bce_loss

        return loss.sum(dim=1) / design_space_mask.sum(dim=1)


    def __call__(self,
                 solutions:list, # The solutions that should be evaluated with the criterion.
                 gt_solutions:list=None, # Ground truth solutions that are compared element-wise with the `solutions`.
                 binary:bool=False # Whether the criterion should be evaluated on binarized densities. Does not have an effect on some criteria.
                  ):
        """
        Calculates the output of the criterion for all solutions.
        """
        solutions, gt_solutions = self._convert_to_list(solutions, gt_solutions)
        self._check_inputs(solutions, gt_solutions)
        θ = self.get_θ_flat(solutions, binary=binary)
        θ_true = self.get_θ_flat(gt_solutions, binary=binary)
        design_space_mask = self.get_design_space_mask(solutions)

        return self._get_loss(θ, θ_true, design_space_mask)

In [None]:
show_doc(WeightedFocal.set_optimal_weight)

<h4 id="WeightedFocal.set_optimal_weight" class="doc_header"><code>WeightedFocal.set_optimal_weight</code><a href="__main__.py#L22" class="source_link" style="float:right">[source]</a></h4>

> <code>WeightedFocal.set_optimal_weight</code>(**`dataset`**:`dl4to.dataset.TopoDataset`, **`binary`**:`bool`=*`False`*)

Calculates the optimal BCE weight based on the solutions in the dataset.

||Type|Default|Details|
|---|---|---|---|
|**`dataset`**|`dl4to.dataset.TopoDataset`||The dataset based on which the optimal weight is determined.|
|**`binary`**|`bool`|`False`|Whether the densities in the solutions are thresholded at 0.5 before the weight is determined. $\varepsilon \geq 0$|


In [None]:
#export
class Dice(SupervisedCriterion):
    """
    The Dice coefficient is widely used metric in computer vision community to calculate the similarity between two images.
    Later in 2016, it has also been adapted as loss function known as Dice Loss [3]. It is also sometimes refered to as the F1 score [4].
    Dice reaches its best value at 0 and its worst value at 1.
    """
    def __init__(self, 
                 ε:float=1e-6, # A small value $>0$ that avoids division by $0$ and therefore improves numerical stability.
                 compute_only_on_design_space:bool=True # Whether the criterion should be evaluated on voxels that have a design space information of -1, i.e., the voxels can be freely optimized. This parameter does not effect all criteria.
                ):
        self.ε = ε
        super().__init__(
            name='Dice',
            compute_only_on_design_space=compute_only_on_design_space
        )


    def __call__(self,
                 solutions:list, # The solutions that should be evaluated with the criterion.
                 gt_solutions:list=None, # Ground truth solutions that are compared element-wise with the `solutions`.
                 binary:bool=False # Whether the criterion should be evaluated on binarized densities. Does not have an effect on some criteria.
                  ):
        """
        Calculates the output of the criterion for all solutions.
        """
        solutions, gt_solutions = self._convert_to_list(solutions, gt_solutions)
        self._check_inputs(solutions, gt_solutions)
        θ = self.get_θ_flat(solutions, binary=binary)
        θ_true = self.get_θ_flat(gt_solutions, binary=binary)
        design_space_mask = self.get_design_space_mask(solutions)

        tp = self._get_number_of_true_positives(θ, θ_true, design_space_mask)
        fp = self._get_number_of_false_positives(θ, θ_true, design_space_mask)
        fn = self._get_number_of_false_negatives(θ, θ_true, design_space_mask)
        dice_score = 2.*tp / (2.*tp + fp + fn + self.ε)

        return 1 - dice_score

In [None]:
#export
class Tversky(SupervisedCriterion):
    """
    Tversky index [5] can be seen as a generalization of the Dice coefficient. It adds a weight to false positives and false negatives. By setting the value of α > 0.5, we can penalise false negatives more. 
    This becomes useful in highly imbalanced datasets where the additional level of control over the loss function yields better small scale segmentations than the normal dice coefficient.
    Just like dice, this criterion reaches its best value at 0 and its worst value at 1.
   """
    def __init__(self, 
                 α:float=.5, # The Tversky weight. When $α=0.5$, it can be solved into the regular Dice coefficient.
                 ε:float=1e-6, # A small value $>0$ that avoids division by $0$ and therefore improves numerical stability.
                 compute_only_on_design_space:bool=True # Whether the criterion should be evaluated on voxels that have a design space information of -1, i.e., the voxels can be freely optimized. This parameter does not effect all criteria.
                ):
        self.α = α
        self.β = 1 - α
        self.ε = ε
        super().__init__(
            name=f'Tversky({α})',
            compute_only_on_design_space=compute_only_on_design_space
        )


    def __call__(self,
                 solutions:list, # The solutions that should be evaluated with the criterion.
                 gt_solutions:list=None, # Ground truth solutions that are compared element-wise with the `solutions`.
                 binary:bool=False # Whether the criterion should be evaluated on binarized densities. Does not have an effect on some criteria.
                  ):
        """
        Calculates the output of the criterion for all solutions.
        """
        solutions, gt_solutions = self._convert_to_list(solutions, gt_solutions)
        self._check_inputs(solutions, gt_solutions)
        θ = self.get_θ_flat(solutions, binary=binary)
        θ_true = self.get_θ_flat(gt_solutions, binary=binary)
        design_space_mask = self.get_design_space_mask(solutions)

        tp = self._get_number_of_true_positives(θ, θ_true, design_space_mask)
        fp = self._get_number_of_false_positives(θ, θ_true, design_space_mask)
        fn = self._get_number_of_false_negatives(θ, θ_true, design_space_mask)
        tversky = tp / (tp + self.α * fn + self.β * fp + self.ε)

        return 1 - tversky

In [None]:
#export
class FocalTversky(SupervisedCriterion):
    """
    The Focal Tversky Loss [6] is a generalisation of the Tversky loss. The non-linear nature of the loss gives control over how the loss behaves at different values of the Tversky index obtained. 
    Similar to Focal Loss, which focuses on hard examples by down-weighting easy ones. Focal Tversky loss  also attempts to learn hard-examples such with the help of γ, which controls the non-linearity of the loss.
    This criterion reaches its best value at 0, while higher values correspond to worse scores.
    """
    def __init__(self, 
                 α:float=.5, # The Tversky weight. When $α=0.5$, it can be solved into the regular Dice coefficient.
                 γ:float=3, # $γ\geq0$ is the Focal loss focusing parameter. Setting $γ>0$ reduces the relative loss for well-classified examples, putting more focus on hard, misclassified examples.
                 ε:float=1e-6, # A small value $>0$ that avoids division by $0$ and therefore improves numerical stability.
                 compute_only_on_design_space:bool=True # Whether the criterion should be evaluated on voxels that have a design space information of -1, i.e., the voxels can be freely optimized. This parameter does not effect all criteria.
                ):
        self.α = α
        self.β = 1 - α
        self.γ = γ
        self.ε = ε
        super().__init__(
            name=f'focal_Tversky({α})',
            compute_only_on_design_space=compute_only_on_design_space
        )


    def __call__(self,
                 solutions:list, # The solutions that should be evaluated with the criterion.
                 gt_solutions:list=None, # Ground truth solutions that are compared element-wise with the `solutions`.
                 binary:bool=False # Whether the criterion should be evaluated on binarized densities. Does not have an effect on some criteria.
                  ):
        """
        Calculates the output of the criterion for all solutions.
        """
        solutions, gt_solutions = self._convert_to_list(solutions, gt_solutions)
        self._check_inputs(solutions, gt_solutions)
        θ = self.get_θ_flat(solutions, binary=binary)
        θ_true = self.get_θ_flat(gt_solutions, binary=binary)
        design_space_mask = self.get_design_space_mask(solutions)

        tp = self._get_number_of_true_positives(θ, θ_true, design_space_mask)
        fp = self._get_number_of_false_positives(θ, θ_true, design_space_mask)
        fn = self._get_number_of_false_negatives(θ, θ_true, design_space_mask)
        tversky = tp / (tp + self.α * fn + self.β * fp + self.ε)

        return (1 - tversky)**self.γ

In [None]:
#export
class IoU(SupervisedCriterion):
    """
    The Intersection over Union (IoU) metric, also referred to as the Jaccard index, is essentially a method to quantify the percent overlap between the target mask and our prediction output. This metric is closely related to the Dice coefficient which is often used as a loss function during training.
    The IoU metric measures the number of pixels common between the target and prediction masks divided by the total number of pixels present across both masks.
    IoU reaches its best value at 1 and its worst value at 0, i.e., higher values are better.  
   """
    def __init__(self, 
                 ε:float=1e-6, # A small value $>0$ that avoids division by $0$ and therefore improves numerical stability.
                 compute_only_on_design_space:bool=True # Whether the criterion should be evaluated on voxels that have a design space information of -1, i.e., the voxels can be freely optimized. This parameter does not effect all criteria.
                ):
        self.ε = ε
        super().__init__(
            name='IoU',
            lower_is_better=False,
            compute_only_on_design_space=compute_only_on_design_space
        )


    def __call__(self,
                 solutions:list, # The solutions that should be evaluated with the criterion.
                 gt_solutions:list=None, # Ground truth solutions that are compared element-wise with the `solutions`.
                 binary:bool=False # Whether the criterion should be evaluated on binarized densities. Does not have an effect on some criteria.
                  ):
        """
        Calculates the output of the criterion for all solutions.
        """
        solutions, gt_solutions = self._convert_to_list(solutions, gt_solutions)
        self._check_inputs(solutions, gt_solutions)
        θ = self.get_θ_flat(solutions, binary=binary)
        θ_true = self.get_θ_flat(gt_solutions, binary=binary)
        design_space_mask = self.get_design_space_mask(solutions)

        tp = self._get_number_of_true_positives(θ, θ_true, design_space_mask)
        fp = self._get_number_of_false_positives(θ, θ_true, design_space_mask)
        fn = self._get_number_of_false_negatives(θ, θ_true, design_space_mask)

        return tp / (tp + fn + fp + self.ε)

In [None]:
#export
class VoxelAccuracy(SupervisedCriterion):
    """
    The voxel accuracy loss is a three-dimensional version of the pixel accuracy loss [7]. It reports the percent of voxels which are correctly classified. This metric can sometimes provide misleading results when the class representation is small within the image, as the measure will be biased in mainly reporting how well you identify negative case (ie. where the class is not present).
    Voxel accuracy reaches its best value at 1 and its worst value at 0, i.e., higher values are better.
    """
    def __init__(self, 
                 ε:float=1e-6, # A small value $>0$ that avoids division by $0$ and therefore improves numerical stability.
                 compute_only_on_design_space:bool=True # Whether the criterion should be evaluated on voxels that have a design space information of -1, i.e., the voxels can be freely optimized. This parameter does not effect all criteria.
                ):
        self.ε = ε
        super().__init__(
            name='voxel_accuracy',
            lower_is_better=False,
            compute_only_on_design_space=compute_only_on_design_space)


    def __call__(self,
                 solutions:list, # The solutions that should be evaluated with the criterion.
                 gt_solutions:list=None, # Ground truth solutions that are compared element-wise with the `solutions`.
                 binary:bool=False # Whether the criterion should be evaluated on binarized densities. Does not have an effect on some criteria.
                  ):
        """
        Calculates the output of the criterion for all solutions.
        """
        solutions, gt_solutions = self._convert_to_list(solutions, gt_solutions)
        self._check_inputs(solutions, gt_solutions)
        θ = self.get_θ_flat(solutions, binary=binary)
        θ_true = self.get_θ_flat(gt_solutions, binary=binary)
        design_space_mask = self.get_design_space_mask(solutions)

        tp = self._get_number_of_true_positives(θ, θ_true, design_space_mask)
        tn = self._get_number_of_true_negatives(θ, θ_true, design_space_mask)
        fp = self._get_number_of_false_positives(θ, θ_true, design_space_mask)
        fn = self._get_number_of_false_negatives(θ, θ_true, design_space_mask)

        return (tp + tn) / (tp + tn + fp + fn + self.ε)

In [None]:
#export
class BalancedVoxelAccuracy(SupervisedCriterion):
    """
    The balanced voxel accuracy loss [9] is a balanced version of the voxel accuracy criterion and can also be interpreted as a rescaled version of the "Youden index" [10]. That makes it a better metric to use with imbalanced data. It is defined as the average of recall obtained on each class.
    The criterion reaches its best value at 1 and its worst value at 0, i.e., higher values are better.
    """
    def __init__(self, 
                 ε:float=1e-6, # A small value $>0$ that avoids division by $0$ and therefore improves numerical stability.
                 compute_only_on_design_space:bool=True # Whether the criterion should be evaluated on voxels that have a design space information of -1, i.e., the voxels can be freely optimized. This parameter does not effect all criteria.
                ):
        self.ε = ε
        super().__init__(
            name=f'balanced_voxel_accuracy',
            lower_is_better=False,
            compute_only_on_design_space=compute_only_on_design_space
        )


    def __call__(self,
                 solutions:list, # The solutions that should be evaluated with the criterion.
                 gt_solutions:list=None, # Ground truth solutions that are compared element-wise with the `solutions`.
                 binary:bool=False # Whether the criterion should be evaluated on binarized densities. Does not have an effect on some criteria.
                  ):
        """
        Calculates the output of the criterion for all solutions.
        """
        solutions, gt_solutions = self._convert_to_list(solutions, gt_solutions)
        self._check_inputs(solutions, gt_solutions)
        θ = self.get_θ_flat(solutions, binary=binary)
        θ_true = self.get_θ_flat(gt_solutions, binary=binary)
        design_space_mask = self.get_design_space_mask(solutions)
        sensitivity = self._get_sensitivity(θ, θ_true, design_space_mask, ε=self.ε)
        specificity = self._get_specificity(θ, θ_true, design_space_mask, ε=self.ε)
        return (sensitivity + specificity) / 2.

In [None]:
#export
class L2Accuracy(SupervisedCriterion):
    """
    The L2 accuracy loss [8] reports the root mean squared accuracy of the predictions. The criterion reaches its best value at 1 and its worst value at 0, i.e., higher values are better.
    """
    def __init__(self, 
                 ε:float=1e-6, # A small value $>0$ that avoids division by $0$ and therefore improves numerical stability.
                 compute_only_on_design_space:bool=True # Whether the criterion should be evaluated on voxels that have a design space information of -1, i.e., the voxels can be freely optimized. This parameter does not effect all criteria.
                ):
        self.ε = ε
        super().__init__(
            name='L2_accuracy',
            lower_is_better=False,
            compute_only_on_design_space=compute_only_on_design_space
        )


    def __call__(self,
                 solutions:list, # The solutions that should be evaluated with the criterion.
                 gt_solutions:list=None, # Ground truth solutions that are compared element-wise with the `solutions`.
                 binary:bool=False # Whether the criterion should be evaluated on binarized densities. Does not have an effect on some criteria.
                  ):
        """
        Calculates the output of the criterion for all solutions.
        """
        solutions, gt_solutions = self._convert_to_list(solutions, gt_solutions)
        self._check_inputs(solutions, gt_solutions)
        θ = self.get_θ_flat(solutions, binary=binary)
        θ_true = self.get_θ_flat(gt_solutions, binary=binary)
        design_space_mask = self.get_design_space_mask(solutions)
        loss = mse_loss(θ * design_space_mask, θ_true * design_space_mask, reduction='none')
        return 1 - torch.sqrt(loss.sum(dim=1) / design_space_mask.sum(dim=1) + self.ε)

# References

[1] Pihur, Vasyl, Susmita Datta, and Somnath Datta. "Weighted rank aggregation of cluster validation measures: a monte carlo cross-entropy approach." Bioinformatics 23.13 (2007): 1607-1615

[2] Lin, Tsung-Yi, et al. "Focal loss for dense object detection." Proceedings of the IEEE international conference on computer vision. 2017.

[3] Sudre, Carole H., et al. "Generalised dice overlap as a deep learning loss function for highly unbalanced segmentations." Deep Learning in Medical Image Analysis and Multimodal Learning for Clinical Decision Support: Third International Workshop, DLMIA 2017, and 7th International Workshop, ML-CDS 2017, Held in Conjunction with MICCAI 2017, Québec City, QC, Canada, September 14, Proceedings 3. Springer International Publishing, 2017.

[4] Taha, Abdel Aziz, and Allan Hanbury. "Metrics for evaluating 3D medical image segmentation: analysis, selection, and tool." BMC medical imaging 15.1 (2015): 1-28.

[5] Salehi, Seyed Sadegh Mohseni, Deniz Erdogmus, and Ali Gholipour. "Tversky loss function for image segmentation using 3D fully convolutional deep networks." Machine Learning in Medical Imaging: 8th International Workshop, MLMI 2017, Held in Conjunction with MICCAI 2017, Quebec City, QC, Canada, September 10, 2017, Proceedings 8. Springer International Publishing, 2017.

[6] Abraham, Nabila, and Naimul Mefraz Khan. "A novel focal tversky loss function with improved attention u-net for lesion segmentation." 2019 IEEE 16th international symposium on biomedical imaging (ISBI 2019). IEEE, 2019.

[7] Long, Jonathan, Evan Shelhamer, and Trevor Darrell. "Fully convolutional networks for semantic segmentation." Proceedings of the IEEE conference on computer vision and pattern recognition. 2015.

[8] Banga, Saurabh, et al. "3d topology optimization using convolutional neural networks." arXiv preprint arXiv:1808.07440 (2018).

[9] Brodersen, K.H.; Ong, C.S.; Stephan, K.E.; Buhmann, J.M. (2010). The balanced accuracy and its posterior distribution. Proceedings of the 20th International Conference on Pattern Recognition, 3121-24.

[10] Youden, William J. "Index for rating diagnostic tests." Cancer 3.1 (1950): 32-35.

In [None]:
#hide
import math
from dl4to.datasets import BasicDataset
from dl4to.problem import Problem
from dl4to.solution import Solution

In [None]:
#hide
def get_solution(problem, enforce_θ_on_Ω_design=True):
    if problem == "ledge":
        solution = BasicDataset().ledge().trivial_solution
    elif problem == "cantilever":
        solution = BasicDataset().cantilever().trivial_solution
    elif problem == "wheel":
        solution = BasicDataset().wheel().trivial_solution

    enforce_θ_on_Ω_design = solution.enforce_θ_on_Ω_design = False
    return solution

In [None]:
#hide
def test_it_is_supervised(criterion):
    assert criterion.supervised

In [None]:
%%time
#hide
test_it_is_supervised(WeightedBCE())

CPU times: user 36 µs, sys: 25 µs, total: 61 µs
Wall time: 65.3 µs


In [None]:
%%time
#hide

def test_that_we_can_apply_it_and_some_basic_properties_hold(weight=.5, ε=1e-9):
    criterion = WeightedBCE(weight=weight)
    torch_criterion = torch.nn.BCELoss(weight=torch.tensor([weight]))

    ledge = BasicDataset().ledge()
    ledge._Ω_design = -1 * torch.ones_like(ledge.Ω_design)

    θ_ones  = torch.ones(1, *ledge.shape)
    θ_zeros = torch.zeros(1, *ledge.shape)
    θ_rand  = torch.zeros(1, *ledge.shape)
    θ_rand[.5 <= torch.rand(1, *ledge.shape)] = 1.

    ones  = Solution(ledge, θ_ones)
    zeros = Solution(ledge, θ_zeros)
    rand  = Solution(ledge, θ_rand)

    a = criterion([ones], [ones])
    b = torch_criterion(θ_ones, θ_ones)
    assert torch.allclose(a, b), f"{a}, {b}"

    a = criterion([zeros], [zeros])
    b = torch_criterion(θ_zeros, θ_zeros)
    assert torch.allclose(a, b), f"{a}, {b}"

    a = criterion([rand], [rand])
    b = torch_criterion(θ_rand, θ_rand)
    assert torch.allclose(a, b), f"{a}, {b}"

    a = criterion([ones], [zeros])
    b = torch_criterion(θ_ones, θ_zeros)
    assert torch.allclose(a, b), f"{a}, {b}"

    a = criterion([ones], [rand])
    b = torch_criterion(θ_ones, θ_rand)
    assert torch.allclose(a, b), f"{a}, {b}"


test_that_we_can_apply_it_and_some_basic_properties_hold()

CPU times: user 5.51 ms, sys: 9.03 ms, total: 14.5 ms
Wall time: 33.3 ms


In [None]:
%%time
#hide

def test_that_criterion_value_is_correct_if_both_arguments_are_the_same():
    solution = get_solution(problem="ledge", enforce_θ_on_Ω_design=False)
    gt_solution = get_solution(problem="ledge", enforce_θ_on_Ω_design=False)

    criterion = WeightedBCE()
    criterion_value = criterion([solution], [gt_solution])
    assert criterion_value == (not criterion.lower_is_better), criterion_value


test_that_criterion_value_is_correct_if_both_arguments_are_the_same()

CPU times: user 4.03 ms, sys: 0 ns, total: 4.03 ms
Wall time: 3.18 ms


In [None]:
%%time
#hide

def test_that_shape_is():
    solution = get_solution(problem="ledge")
    gt_solution = get_solution(problem="ledge")

    criterion = WeightedBCE()
    criterion_value = criterion([solution], [gt_solution])
    assert criterion_value.shape == torch.Size([1]), f"{criterion_value.shape}"

    criterion_value = criterion([solution, solution], [gt_solution, gt_solution])
    assert criterion_value.shape == torch.Size([2]), f"{criterion_value.shape}"


test_that_shape_is()

CPU times: user 2.08 ms, sys: 1.29 ms, total: 3.37 ms
Wall time: 3.08 ms


In [None]:
%%time
#hide

def test_that_criterion_value_is_correct_if_both_arguments_are_opposites():
    solution = get_solution(problem="ledge", enforce_θ_on_Ω_design=False)
    gt_solution = get_solution(problem="ledge", enforce_θ_on_Ω_design=False)

    criterion = WeightedBCE()
    shape = solution.get_θ().shape
    solution.θ = torch.ones(*shape)
    gt_solution.θ = torch.zeros(*shape)
    criterion_value = criterion([solution], [gt_solution])
    assert criterion_value == 50, criterion_value


test_that_criterion_value_is_correct_if_both_arguments_are_opposites()

CPU times: user 2.47 ms, sys: 0 ns, total: 2.47 ms
Wall time: 2.27 ms


In [None]:
%%time
#hide
test_it_is_supervised(WeightedFocal())

CPU times: user 17 µs, sys: 12 µs, total: 29 µs
Wall time: 32.4 µs


In [None]:
%%time
#hide

def test_that_criterion_value_is_correct_if_both_arguments_are_the_same():
    solution = get_solution(problem="ledge", enforce_θ_on_Ω_design=False)
    gt_solution = get_solution(problem="ledge", enforce_θ_on_Ω_design=False)

    criterion = WeightedFocal()
    criterion_value = criterion([solution], [gt_solution])
    assert criterion_value == (not criterion.lower_is_better), criterion_value


test_that_criterion_value_is_correct_if_both_arguments_are_the_same()

CPU times: user 2.39 ms, sys: 1.37 ms, total: 3.77 ms
Wall time: 3.42 ms


In [None]:
%%time
#hide

def test_that_wbce_and_focal_loss_do_the_same_for_γ_0():
    problem = BasicDataset().ledge()
    gt_solution = problem.trivial_solution
    solution = problem.trivial_solution
    gt_solution.θ = torch.rand(*solution.get_θ().shape)
    solution.θ = torch.rand(*solution.get_θ().shape)
    loss_focal = WeightedFocal(weight=.5, γ=0.)
    loss_wbce = WeightedBCE(weight=.5)
    assert loss_focal([solution], [gt_solution]) == loss_wbce([solution], [gt_solution])
    loss_focal = WeightedFocal(weight=.9, γ=0.)
    loss_wbce = WeightedBCE(weight=.9)
    assert loss_focal([solution], [gt_solution]) == loss_wbce([solution], [gt_solution])


test_that_wbce_and_focal_loss_do_the_same_for_γ_0()

CPU times: user 1.64 ms, sys: 1.1 ms, total: 2.74 ms
Wall time: 2.5 ms


In [None]:
%%time
#hide

def test_that_criterion_value_is_correct_if_both_arguments_are_opposites():
    solution = get_solution(problem="ledge", enforce_θ_on_Ω_design=False)
    gt_solution = get_solution(problem="ledge", enforce_θ_on_Ω_design=False)

    criterion = WeightedFocal(ε=0.)
    shape = solution.get_θ().shape
    solution.θ = torch.ones(*shape)
    gt_solution.θ = torch.zeros(*shape)
    criterion_value = criterion([solution], [gt_solution])
    assert criterion_value == 50, criterion_value


test_that_criterion_value_is_correct_if_both_arguments_are_opposites()

CPU times: user 2.61 ms, sys: 0 ns, total: 2.61 ms
Wall time: 2.39 ms


In [None]:
%%time
#hide

def test_backprop_in_weighted_focal():
    design_space_mask = torch.tensor([1, 1, 1, 1, 1, 1]).view(-1,1).type(torch.bool)
    labels = torch.tensor([1, 1, 1, 0, 0, 0]).view(-1,1).type(torch.float32)
    input = torch.tensor([[1], [0], [.5], [1], [0], [.5]], requires_grad=True)

    for γ in [0, .5, 1, 1.5, 2, 3]:
        criterion = WeightedFocal(γ=γ, ε=1e-6)
        loss = criterion._get_loss(input, labels, design_space_mask)
        assert not torch.any(torch.isnan(loss))
        for i in range(6):
            loss = criterion._get_loss(input, labels, design_space_mask)
            input._grad = None
            loss[i].backward()
            if torch.isnan(input.grad).any():
                print(f'γ: {γ}, i: {i}')
                print(input.grad)


test_backprop_in_weighted_focal()

CPU times: user 147 ms, sys: 27.8 ms, total: 175 ms
Wall time: 174 ms


In [None]:
%%time
#hide
test_it_is_supervised(IoU())

CPU times: user 22 µs, sys: 0 ns, total: 22 µs
Wall time: 24.3 µs


In [None]:
%%time
#hide

def test_that_criterion_value_is_correct_if_both_arguments_are_the_same():
    solution = get_solution(problem="ledge", enforce_θ_on_Ω_design=False)
    gt_solution = get_solution(problem="ledge", enforce_θ_on_Ω_design=False)

    criterion = IoU()
    criterion_value = criterion([solution], [gt_solution])
    assert criterion_value == (not criterion.lower_is_better), criterion_value


test_that_criterion_value_is_correct_if_both_arguments_are_the_same()

CPU times: user 3.39 ms, sys: 0 ns, total: 3.39 ms
Wall time: 3.08 ms


In [None]:
%%time
#hide

def test_that_criterion_value_is_correct_if_both_arguments_are_opposites():
    solution = get_solution(problem="ledge", enforce_θ_on_Ω_design=False)
    gt_solution = get_solution(problem="ledge", enforce_θ_on_Ω_design=False)

    criterion = IoU()
    shape = solution.get_θ().shape
    solution.θ = torch.ones(*shape)
    gt_solution.θ = torch.zeros(*shape)
    criterion_value = criterion([solution], [gt_solution])
    assert math.isclose(criterion_value, 0, abs_tol=1e-1), criterion_value


test_that_criterion_value_is_correct_if_both_arguments_are_opposites()

CPU times: user 2.46 ms, sys: 0 ns, total: 2.46 ms
Wall time: 2.25 ms


In [None]:
%%time
#hide
test_it_is_supervised(Dice())

CPU times: user 14 µs, sys: 9 µs, total: 23 µs
Wall time: 25 µs


In [None]:
%%time
#hide

def test_that_criterion_value_is_correct_if_both_arguments_are_the_same():
    solution = get_solution(problem="ledge", enforce_θ_on_Ω_design=False)
    gt_solution = get_solution(problem="ledge", enforce_θ_on_Ω_design=False)

    criterion = Dice()
    criterion_value = criterion([solution], [gt_solution])
    assert criterion_value == (not criterion.lower_is_better), criterion_value


test_that_criterion_value_is_correct_if_both_arguments_are_the_same()

CPU times: user 3.37 ms, sys: 0 ns, total: 3.37 ms
Wall time: 3.05 ms


In [None]:
%%time
#hide

def test_that_criterion_value_is_correct_if_both_arguments_are_opposites():
    solution = get_solution(problem="ledge", enforce_θ_on_Ω_design=False)
    gt_solution = get_solution(problem="ledge", enforce_θ_on_Ω_design=False)

    criterion = Dice()
    shape = solution.get_θ().shape
    solution.θ = torch.ones(*shape)
    gt_solution.θ = torch.zeros(*shape)
    criterion_value = criterion([solution], [gt_solution])
    assert math.isclose(criterion_value, 1, abs_tol=1e-1), criterion_value


test_that_criterion_value_is_correct_if_both_arguments_are_opposites()

CPU times: user 2.53 ms, sys: 0 ns, total: 2.53 ms
Wall time: 2.28 ms


In [None]:
%%time
#hide
test_it_is_supervised(Tversky())

CPU times: user 29 µs, sys: 0 ns, total: 29 µs
Wall time: 32.2 µs


In [None]:
%%time
#hide

def test_that_criterion_value_is_correct_if_both_arguments_are_the_same():
    solution = get_solution(problem="ledge", enforce_θ_on_Ω_design=False)
    gt_solution = get_solution(problem="ledge", enforce_θ_on_Ω_design=False)

    criterion = Tversky()
    criterion_value = criterion([solution], [gt_solution])
    assert criterion_value == (not criterion.lower_is_better), criterion_value


test_that_criterion_value_is_correct_if_both_arguments_are_the_same()

CPU times: user 3.03 ms, sys: 0 ns, total: 3.03 ms
Wall time: 2.74 ms


In [None]:
%%time
#hide

def test_that_criterion_value_is_correct_if_both_arguments_are_opposites():
    solution = get_solution(problem="ledge", enforce_θ_on_Ω_design=False)
    gt_solution = get_solution(problem="ledge", enforce_θ_on_Ω_design=False)

    criterion = Tversky()
    shape = solution.get_θ().shape
    solution.θ = torch.ones(*shape)
    gt_solution.θ = torch.zeros(*shape)
    criterion_value = criterion([solution], [gt_solution])
    assert math.isclose(criterion_value, 1, abs_tol=1e-1), criterion_value


test_that_criterion_value_is_correct_if_both_arguments_are_opposites()

CPU times: user 3.1 ms, sys: 0 ns, total: 3.1 ms
Wall time: 2.8 ms


In [None]:
%%time
#hide
test_it_is_supervised(FocalTversky())

CPU times: user 19 µs, sys: 13 µs, total: 32 µs
Wall time: 35 µs


In [None]:
%%time
#hide

def test_that_criterion_value_is_correct_if_both_arguments_are_the_same():
    solution = get_solution(problem="ledge", enforce_θ_on_Ω_design=False)
    gt_solution = get_solution(problem="ledge", enforce_θ_on_Ω_design=False)

    criterion = FocalTversky()
    criterion_value = criterion([solution], [gt_solution])
    assert criterion_value == (not criterion.lower_is_better), criterion_value


test_that_criterion_value_is_correct_if_both_arguments_are_the_same()

CPU times: user 1.87 ms, sys: 1.2 ms, total: 3.08 ms
Wall time: 2.79 ms


In [None]:
%%time
#hide

def test_that_criterion_value_is_correct_if_both_arguments_are_opposites():
    solution = get_solution(problem="ledge", enforce_θ_on_Ω_design=False)
    gt_solution = get_solution(problem="ledge", enforce_θ_on_Ω_design=False)

    criterion = FocalTversky()
    shape = solution.get_θ().shape
    solution.θ = torch.ones(*shape)
    gt_solution.θ = torch.zeros(*shape)
    criterion_value = criterion([solution], [gt_solution])
    assert math.isclose(criterion_value, 1, abs_tol=1e-1), criterion_value


test_that_criterion_value_is_correct_if_both_arguments_are_opposites()

CPU times: user 2.45 ms, sys: 163 µs, total: 2.61 ms
Wall time: 2.37 ms


In [None]:
%%time
#hide
test_it_is_supervised(VoxelAccuracy())

CPU times: user 16 µs, sys: 0 ns, total: 16 µs
Wall time: 18.6 µs


In [None]:
%%time
#hide

def test_that_criterion_value_is_correct_if_both_arguments_are_the_same():
    solution = get_solution(problem="ledge")
    gt_solution = get_solution(problem="ledge")

    criterion = VoxelAccuracy()

    criterion_value = criterion([solution, solution], [gt_solution, gt_solution])
    assert torch.allclose(criterion_value, torch.ones(2)), criterion_value


test_that_criterion_value_is_correct_if_both_arguments_are_the_same()

CPU times: user 3.55 ms, sys: 0 ns, total: 3.55 ms
Wall time: 3.17 ms


In [None]:
%%time
#hide

def test_that_criterion_value_is_correct_if_both_arguments_are_opposites():
    solution = get_solution(problem="ledge", enforce_θ_on_Ω_design=False)
    gt_solution = get_solution(problem="ledge", enforce_θ_on_Ω_design=False)

    criterion = VoxelAccuracy()

    solution.θ = torch.ones_like(solution.get_θ())
    gt_solution.θ = torch.zeros_like(solution.get_θ())

    criterion_value = criterion([solution, solution], [gt_solution, gt_solution])
    assert torch.allclose(criterion_value, torch.zeros(2)), criterion_value


test_that_criterion_value_is_correct_if_both_arguments_are_opposites()

CPU times: user 2.66 ms, sys: 0 ns, total: 2.66 ms
Wall time: 2.45 ms
