# QuickTorch Playground
https://neptune.ai/blog/pytorch-loss-functions

In [2]:
import torch
import torch.nn as nn

In [3]:
input = torch.randn(3, 5, requires_grad=True)
target = torch.randn(3, 5)

## The Mean Absolute Error
- Regression problems, especially when the distribution of the target variable has outliers, 
  - such as small or big values that are a great distance from the mean value. It is considered to be more robust to outliers.

In [4]:
mae_loss = nn.L1Loss()
output = mae_loss(input, target)
output.backward()

print('input: ', input)
print('target: ', target)
print('output: ', output)

input:  tensor([[-2.6039, -1.2204,  0.6570,  0.0422,  0.3718],
        [-0.6171,  1.1668,  0.6107, -1.2699,  0.0993],
        [-1.5890, -1.7168,  0.5738,  1.3962,  0.1433]], requires_grad=True)
target:  tensor([[ 0.1537, -0.9205,  1.2392, -0.0545,  0.3506],
        [ 0.2091,  1.1160,  2.1931,  1.5657,  1.9054],
        [-2.0823, -0.8792, -0.5747, -1.5154,  1.3977]])
output:  tensor(1.1669, grad_fn=<L1LossBackward>)


## Mean Squared Error Loss Function
- MSE is the default loss function for most Pytorch regression problems.

In [5]:
mse_loss = nn.MSELoss()
output = mse_loss(input, target)
output.backward()

print('input: ', input)
print('target: ', target)
print('output: ', output)

input:  tensor([[-2.6039, -1.2204,  0.6570,  0.0422,  0.3718],
        [-0.6171,  1.1668,  0.6107, -1.2699,  0.0993],
        [-1.5890, -1.7168,  0.5738,  1.3962,  0.1433]], requires_grad=True)
target:  tensor([[ 0.1537, -0.9205,  1.2392, -0.0545,  0.3506],
        [ 0.2091,  1.1160,  2.1931,  1.5657,  1.9054],
        [-2.0823, -0.8792, -0.5747, -1.5154,  1.3977]])
output:  tensor(2.3233, grad_fn=<MseLossBackward>)


## Negative Log-Likelihood Loss Function
https://ljvmiranda921.github.io/notebook/2017/08/13/softmax-and-the-negative-log-likelihood/

- Multi-class classification problems

<img src="https://i.imgur.com/hU252jE.jpg" width="500">

In [8]:
# size of input (N x C) is = 3 x 5
# every element in target should have 0 <= value < C
# target_2 = torch.tensor([1, 0, 4])
target_2 = torch.tensor([2, 4, 4])

# (NLL) is applied only on models with the softmax function as an output activation layer. 
# Softmax refers to an activation function that calculates the normalized exponential function of every unit in the layer.
m = nn.LogSoftmax(dim=1)
nll_loss = nn.NLLLoss()
output = nll_loss(m(input), target_2)
output.backward()

print('input: ', input)
print('target: ', target_2)
print('output: ', output)

input:  tensor([[-2.6039, -1.2204,  0.6570,  0.0422,  0.3718],
        [-0.6171,  1.1668,  0.6107, -1.2699,  0.0993],
        [-1.5890, -1.7168,  0.5738,  1.3962,  0.1433]], requires_grad=True)
target:  tensor([2, 4, 4])
output:  tensor(1.5350, grad_fn=<NllLossBackward>)


## Cross-Entropy Loss Function
- Common type is the Binary Cross-Entropy (BCE)
  - The BCE Loss is mainly used for binary classification models
- Creating confident models—the prediction will be accurate and with a higher probability

In [9]:
target_2 = torch.empty(3, dtype=torch.long).random_(5)

cross_entropy_loss = nn.CrossEntropyLoss()
output = cross_entropy_loss(input, target_2)
output.backward()

print('input: ', input)
print('target: ', target_2)
print('output: ', output)

input:  tensor([[-2.6039, -1.2204,  0.6570,  0.0422,  0.3718],
        [-0.6171,  1.1668,  0.6107, -1.2699,  0.0993],
        [-1.5890, -1.7168,  0.5738,  1.3962,  0.1433]], requires_grad=True)
target:  tensor([3, 3, 0])
output:  tensor(2.7738, grad_fn=<NllLossBackward>)


## Hinge Embedding Loss Function
- Classification problems, especially when determining if two inputs are dissimilar or similar. 
- Learning nonlinear embeddings or semi-supervised learning tasks.

In [10]:
hinge_loss = nn.HingeEmbeddingLoss()
output = hinge_loss(input, target)
output.backward()

print('input: ', input)
print('target: ', target)
print('output: ', output)

input:  tensor([[-2.6039, -1.2204,  0.6570,  0.0422,  0.3718],
        [-0.6171,  1.1668,  0.6107, -1.2699,  0.0993],
        [-1.5890, -1.7168,  0.5738,  1.3962,  0.1433]], requires_grad=True)
target:  tensor([[ 0.1537, -0.9205,  1.2392, -0.0545,  0.3506],
        [ 0.2091,  1.1160,  2.1931,  1.5657,  1.9054],
        [-2.0823, -0.8792, -0.5747, -1.5154,  1.3977]])
output:  tensor(1.0375, grad_fn=<MeanBackward0>)


## Margin Ranking Loss Function
- Ranking problems

In [11]:
import torch
import torch.nn as nn

input_one = torch.randn(3, requires_grad=True)
input_two = torch.randn(3, requires_grad=True)
target_3 = torch.randn(3).sign()

ranking_loss = nn.MarginRankingLoss()
output = ranking_loss(input_one, input_two, target_3)
output.backward()

print('input one: ', input_one)
print('input two: ', input_two)
print('target: ', target_3)
print('output: ', output)

input one:  tensor([-2.3125, -0.8794, -0.1077], requires_grad=True)
input two:  tensor([ 0.7138,  0.8160, -2.4854], requires_grad=True)
target:  tensor([ 1., -1.,  1.])
output:  tensor(1.0088, grad_fn=<MeanBackward0>)


## Triplet Margin Loss Function
- Determining the relative similarity existing between samples. 
- It is used in content-based retrieval problems 

In [12]:
anchor = torch.randn(100, 128, requires_grad=True)
positive = torch.randn(100, 128, requires_grad=True)
negative = torch.randn(100, 128, requires_grad=True)

triplet_margin_loss = nn.TripletMarginLoss(margin=1.0, p=2)
output = triplet_margin_loss(anchor, positive, negative)
output.backward()

print('anchor: ', anchor)
print('positive: ', positive)
print('negative: ', negative)
print('output: ', output)

anchor:  tensor([[-0.7325, -1.7829, -0.4846,  ..., -0.0598, -0.1298, -0.1603],
        [ 0.0557, -0.5592, -1.6508,  ...,  0.9345, -0.9865,  1.0287],
        [-0.4330, -0.8617,  0.9119,  ..., -0.5017,  1.0565, -1.7936],
        ...,
        [ 0.9324, -0.6961, -1.2872,  ...,  0.9515, -0.9232,  1.5767],
        [-0.5145,  1.6347, -0.4723,  ...,  0.4561, -1.0959, -0.5120],
        [ 1.4085,  0.1191,  0.7920,  ..., -0.0760,  0.6158,  0.9418]],
       requires_grad=True)
positive:  tensor([[ 0.3188,  0.1526, -2.4767,  ...,  1.2028, -1.2440,  1.5630],
        [ 0.9972, -0.5618,  0.7896,  ...,  0.6141, -2.1275, -0.0886],
        [-0.8415, -0.6662,  1.8936,  ...,  0.1795,  0.1603, -0.1437],
        ...,
        [ 1.1995, -0.5566,  1.1635,  ..., -0.5103, -0.2374,  0.0360],
        [ 1.2933,  0.9803, -1.3911,  ...,  0.9058,  1.0857, -0.8077],
        [-0.7215,  0.4894, -1.4079,  ...,  0.9180,  1.2208, -0.5951]],
       requires_grad=True)
negative:  tensor([[-0.0462,  0.9133, -0.4002,  ...,  0.73

## Kullback-Leibler Divergence Loss Function
- Approximating complex functions
- Multi-class classification tasks
- If you want to make sure that the distribution of predictions is similar to that of training data

In [13]:
input = torch.randn(2, 3, requires_grad=True)
target_4 = torch.randn(2, 3)

kl_loss = nn.KLDivLoss(reduction = 'batchmean')
output = kl_loss(input, target_4)
output.backward()

print('input: ', input)
print('target: ', target_4)
print('output: ', output)

input:  tensor([[-0.9284,  0.3639, -0.2313],
        [ 0.5626, -1.0062, -0.9481]], requires_grad=True)
target:  tensor([[-0.8218, -0.8638,  0.9639],
        [-0.6536, -1.5656,  0.7465]])
output:  tensor(0.3385, grad_fn=<DivBackward0>)


## Custom Loss Function

In [None]:
# type 1
def myCustomLoss(my_outputs, my_labels):
    #specifying the batch size
    my_batch_size = my_outputs.size()[0] 
    #calculating the log of softmax values           
    my_outputs = F.log_softmax(my_outputs, dim=1)  
    #selecting the values that correspond to labels
    my_outputs = my_outputs[range(my_batch_size), my_labels] 
    #returning the results
    return -torch.sum(my_outputs)/number_examples

# type 2
# in binary classification problem
class DiceLoss(nn.Module):
    def __init__(self, weight=None, size_average=True):
        super(DiceLoss, self).__init__()
 
    def forward(self, inputs, targets, smooth=1):        
        inputs = F.sigmoid(inputs)       
        
        inputs = inputs.view(-1)
        targets = targets.view(-1)
        
        intersection = (inputs * targets).sum()                            
        dice = (2.*intersection + smooth)/(inputs.sum() + targets.sum() + smooth)  
        
        return 1 - dice