# PyTorch Loss Functions Summary

In [1]:
import random

import torch.nn as nn
import torch.nn.functional as F
import torch

import numpy as np

## nn.L1Loss

In [2]:
# nn.L1Loss(size_average=None, recude=None, reduction='elementwise_mean')
l1loss = nn.L1Loss(reduction='elementwise_mean')

In [3]:
input_x = torch.ones(10, 5, requires_grad=True)
target = torch.zeros(10, 5)

loss = l1loss(input_x, target)
print(loss)
loss.backward()

print(input_x.grad)

print(target.grad)

tensor(1., grad_fn=<L1LossBackward>)
tensor([[0.0200, 0.0200, 0.0200, 0.0200, 0.0200],
        [0.0200, 0.0200, 0.0200, 0.0200, 0.0200],
        [0.0200, 0.0200, 0.0200, 0.0200, 0.0200],
        [0.0200, 0.0200, 0.0200, 0.0200, 0.0200],
        [0.0200, 0.0200, 0.0200, 0.0200, 0.0200],
        [0.0200, 0.0200, 0.0200, 0.0200, 0.0200],
        [0.0200, 0.0200, 0.0200, 0.0200, 0.0200],
        [0.0200, 0.0200, 0.0200, 0.0200, 0.0200],
        [0.0200, 0.0200, 0.0200, 0.0200, 0.0200],
        [0.0200, 0.0200, 0.0200, 0.0200, 0.0200]])
None


In [4]:
# Custom L1Loss
class CustomL1Loss(nn.Module):
    def __init__(self, reduce=True, size_average=True):
        super(CustomL1Loss, self).__init__()
        self.reduce = reduce
        self.size_average = size_average
        
    def forward(self, _input, target):
        abs_metric = torch.abs(_input - target)
        if self.reduce:
            return torch.mean(abs_metric) if self.size_average else torch.sum(abs_metric)
        else:
            return abs_metric
    
l1loss = CustomL1Loss(size_average=True)
loss = l1loss(input_x, target)

print(loss)
input_x.grad = None
loss.backward()

print(input_x.grad)

tensor(1., grad_fn=<MeanBackward1>)
tensor([[0.0200, 0.0200, 0.0200, 0.0200, 0.0200],
        [0.0200, 0.0200, 0.0200, 0.0200, 0.0200],
        [0.0200, 0.0200, 0.0200, 0.0200, 0.0200],
        [0.0200, 0.0200, 0.0200, 0.0200, 0.0200],
        [0.0200, 0.0200, 0.0200, 0.0200, 0.0200],
        [0.0200, 0.0200, 0.0200, 0.0200, 0.0200],
        [0.0200, 0.0200, 0.0200, 0.0200, 0.0200],
        [0.0200, 0.0200, 0.0200, 0.0200, 0.0200],
        [0.0200, 0.0200, 0.0200, 0.0200, 0.0200],
        [0.0200, 0.0200, 0.0200, 0.0200, 0.0200]])


## nn.MSELoss

In [5]:
# nn.MSELoss(size_average=None, reduce=None, reduction='elementwise_mean')
mseloss = nn.MSELoss(reduction='elementwise_mean')

# input_x = torch.ones(10, 5, requires_grad=True)
# target = torch.zeros(10, 5)

loss = mseloss(input_x, target)

print(loss)
input_x.grad = None
loss.backward()

print(input_x.grad)

tensor(1., grad_fn=<MseLossBackward>)
tensor([[0.0400, 0.0400, 0.0400, 0.0400, 0.0400],
        [0.0400, 0.0400, 0.0400, 0.0400, 0.0400],
        [0.0400, 0.0400, 0.0400, 0.0400, 0.0400],
        [0.0400, 0.0400, 0.0400, 0.0400, 0.0400],
        [0.0400, 0.0400, 0.0400, 0.0400, 0.0400],
        [0.0400, 0.0400, 0.0400, 0.0400, 0.0400],
        [0.0400, 0.0400, 0.0400, 0.0400, 0.0400],
        [0.0400, 0.0400, 0.0400, 0.0400, 0.0400],
        [0.0400, 0.0400, 0.0400, 0.0400, 0.0400],
        [0.0400, 0.0400, 0.0400, 0.0400, 0.0400]])


In [6]:
# Custom MSELoss
class CustomMSELoss(nn.Module):
    def __init__(self, reduce=True, size_average=True):
        super(CustomMSELoss, self).__init__()
        self.reduce = reduce
        self.size_average = size_average
        
    def forward(self, _input, target):
        mse_metric = torch.pow(_input - target, 2)
        if self.reduce:
            return torch.mean(mse_metric) if self.size_average else torch.sum(mse_metric)
        else:
            return mse_metric
    
mseloss = CustomMSELoss(size_average=True)

loss = mseloss(input_x, target)

print(loss)
input_x.grad = None
loss.backward()

print(input_x.grad)

tensor(1., grad_fn=<MeanBackward1>)
tensor([[0.0400, 0.0400, 0.0400, 0.0400, 0.0400],
        [0.0400, 0.0400, 0.0400, 0.0400, 0.0400],
        [0.0400, 0.0400, 0.0400, 0.0400, 0.0400],
        [0.0400, 0.0400, 0.0400, 0.0400, 0.0400],
        [0.0400, 0.0400, 0.0400, 0.0400, 0.0400],
        [0.0400, 0.0400, 0.0400, 0.0400, 0.0400],
        [0.0400, 0.0400, 0.0400, 0.0400, 0.0400],
        [0.0400, 0.0400, 0.0400, 0.0400, 0.0400],
        [0.0400, 0.0400, 0.0400, 0.0400, 0.0400],
        [0.0400, 0.0400, 0.0400, 0.0400, 0.0400]])


## nn.CrossEntropyLoss

In [7]:
from IPython.display import Math
Math(r'loss(x, class) = -\log(e^{x[class]} / \sum(e^{x[j]})) = -x[class] + \log(\sum(e^{x[j]}))')

<IPython.core.display.Math object>

In [20]:
# nn.CrossEntropyLoss(weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='elementwise_mean')
'''This criterion combines nn.LogSoftmax() and nn.NLLLoss() in one single class.
Parameters:
    weight-A manual rescaling weight given to each class. If given, has to be a Tensor of size C. this is
        particularly useful when you have an unbalanced training set.
    ignore_index-Specifies a target value that is ignored and does not contribute to the input gradient.
        When size_average is True, the loss is averaged over non-ignored tragets.
'''
CrossEntropyLoss = nn.CrossEntropyLoss(reduce=True, size_average=True)

# Set RANDOM_SEED
RANDOM_SEED = 1000
torch.manual_seed(RANDOM_SEED) # CPU
torch.cuda.manual_seed(RANDOM_SEED) # GPU
np.random.seed(RANDOM_SEED) # Numpy
random.seed(RANDOM_SEED) # Random
torch.backends.cudnn.deterministic = True

input_x = torch.randn(10, 5, requires_grad=True)
target = torch.empty(10, dtype=torch.long).random_(5)

loss = CrossEntropyLoss(input_x, target)
print(loss)

loss.backward()
print(input_x.grad)

# CrossEntropyLoss with weight
weights = torch.FloatTensor([0.1, 0.2, 0.4, 0.8, 0.1])
CrossEntropyLoss = nn.CrossEntropyLoss(weight=weights, reduce=True, size_average=True)
loss = CrossEntropyLoss(input_x, target)
print(loss)

input_x.grad = None
loss.backward()
print(target)
print(input_x.grad)

tensor(1.9935, grad_fn=<NllLossBackward>)
tensor([[ 0.0247,  0.0136,  0.0214, -0.0805,  0.0208],
        [-0.0673,  0.0078,  0.0297,  0.0244,  0.0055],
        [ 0.0292,  0.0138,  0.0183, -0.0660,  0.0047],
        [ 0.0368,  0.0081,  0.0197,  0.0140, -0.0786],
        [ 0.0146, -0.0920,  0.0186,  0.0171,  0.0417],
        [-0.0719,  0.0096,  0.0280,  0.0288,  0.0054],
        [ 0.0029,  0.0066, -0.0988,  0.0671,  0.0223],
        [ 0.0542,  0.0024, -0.0912,  0.0072,  0.0274],
        [ 0.0483,  0.0133,  0.0046, -0.0843,  0.0181],
        [ 0.0559,  0.0135, -0.0870,  0.0103,  0.0073]])
tensor(1.9793, grad_fn=<NllLossBackward>)
tensor([3, 0, 3, 4, 1, 0, 2, 2, 3, 2])
tensor([[ 0.0482,  0.0265,  0.0418, -0.1571,  0.0406],
        [-0.0164,  0.0019,  0.0072,  0.0060,  0.0013],
        [ 0.0570,  0.0269,  0.0357, -0.1289,  0.0092],
        [ 0.0090,  0.0020,  0.0048,  0.0034, -0.0192],
        [ 0.0071, -0.0449,  0.0091,  0.0083,  0.0203],
        [-0.0175,  0.0023,  0.0068,  0.0070,  0.001

In [9]:
# Custom CrossEntropyLoss
class CustomCrossEntropyLoss(nn.Module):
    def __init__(self, weight=None, reduce=True, size_average=True):
        super(CustomCrossEntropyLoss, self).__init__()
        self.weight = weight
        self.reduce = reduce
        self.size_average = size_average
        
    def forward(self, input_x, target):
        '''Parameters:
            input_x: (minibatch, C)
            target: (minibatch) where each value is 0 <= target[i] <= C-1
        '''
        row_idxs = torch.arange(input_x.size(0))
        log_sum = torch.log(torch.sum(torch.exp(input_x), 1))
        
        cross_entropy_metric = -1 * input_x[row_idxs, target] + log_sum
        if self.weight is not None:
            # TODO: assert
            cross_entropy_metric = self.weight[target] * cross_entropy_metric
        if self.reduce:
            if self.size_average:
                if self.weight is None:
                    return torch.mean(cross_entropy_metric)
                return torch.sum((1.0 / self.weight[target].sum()) * cross_entropy_metric)
            else:
                return torch.sum(cross_entropy_metric)
        else:
            return cross_entropy_metric
        
CrossEntropyLoss = CustomCrossEntropyLoss(size_average=True)
loss = CrossEntropyLoss(input_x, target)
print(loss)

input_x.grad = None
loss.backward()
print(input_x.grad)

weights = torch.FloatTensor([0.1, 0.2, 0.4, 0.8, 0.1])
CrossEntropyLoss = CustomCrossEntropyLoss(weight=weights, reduce=True, size_average=True)
loss = CrossEntropyLoss(input_x, target)
print(loss)

input_x.grad = None
loss.backward()
print(target)
print(input_x.grad)

tensor(1.9935, grad_fn=<MeanBackward1>)
tensor([[ 0.0247,  0.0136,  0.0214, -0.0805,  0.0208],
        [-0.0673,  0.0078,  0.0297,  0.0244,  0.0055],
        [ 0.0292,  0.0138,  0.0183, -0.0660,  0.0047],
        [ 0.0368,  0.0081,  0.0197,  0.0140, -0.0786],
        [ 0.0146, -0.0920,  0.0186,  0.0171,  0.0417],
        [-0.0719,  0.0096,  0.0280,  0.0288,  0.0054],
        [ 0.0029,  0.0066, -0.0988,  0.0671,  0.0223],
        [ 0.0542,  0.0024, -0.0912,  0.0072,  0.0274],
        [ 0.0483,  0.0133,  0.0046, -0.0843,  0.0181],
        [ 0.0559,  0.0135, -0.0870,  0.0103,  0.0073]])
tensor(1.9793, grad_fn=<SumBackward0>)
tensor([3, 0, 3, 4, 1, 0, 2, 2, 3, 2])
tensor([[ 0.0482,  0.0265,  0.0418, -0.1571,  0.0406],
        [-0.0164,  0.0019,  0.0072,  0.0060,  0.0013],
        [ 0.0570,  0.0269,  0.0357, -0.1289,  0.0092],
        [ 0.0090,  0.0020,  0.0048,  0.0034, -0.0192],
        [ 0.0071, -0.0449,  0.0091,  0.0083,  0.0203],
        [-0.0175,  0.0023,  0.0068,  0.0070,  0.0013],
 

## nn.NLLLoss

In [22]:
# nn.NLLLoss(weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='elementwise_mean')
'''The input given through a forward call is expected to contain log-probabilities of each class.
    Parameters: see nn.CrossEntropyLoss's parameters for more details.
'''

"The input given through a forward call is expected to contain log-probabilities of each class.\n    Parameters: see nn.CrossEntropyLoss's parameters for more details.\n"

## ![image.png](https://s1.ax1x.com/2018/10/21/iBdNTJ.png) 

In [11]:
# nllloss = nn.NLLLoss(reduce=True, size_average=True)
nllloss = nn.NLLLoss(reduction='elementwise_mean')

# Need to contain log-probabilities of each of class.
log_softmax = nn.LogSoftmax(dim=1)

loss = nllloss(log_softmax(input_x), target)
print(loss)

input_x.grad = None
loss.backward()
print(input_x.grad)

# nn.NLLLoss with weiht
# nllloss = nn.NLLLoss(weight=weights, reduce=True, size_average=True)
nllloss = nn.NLLLoss(weight=weights, reduction='elementwise_mean')
loss = nllloss(log_softmax(input_x), target)
print(loss)

input_x.grad = None
loss.backward()
print(input_x.grad)

tensor(1.9935, grad_fn=<NllLossBackward>)
tensor([[ 0.0247,  0.0136,  0.0214, -0.0805,  0.0208],
        [-0.0673,  0.0078,  0.0297,  0.0244,  0.0055],
        [ 0.0292,  0.0138,  0.0183, -0.0660,  0.0047],
        [ 0.0368,  0.0081,  0.0197,  0.0140, -0.0786],
        [ 0.0146, -0.0920,  0.0186,  0.0171,  0.0417],
        [-0.0719,  0.0096,  0.0280,  0.0288,  0.0054],
        [ 0.0029,  0.0066, -0.0988,  0.0671,  0.0223],
        [ 0.0542,  0.0024, -0.0912,  0.0072,  0.0274],
        [ 0.0483,  0.0133,  0.0046, -0.0843,  0.0181],
        [ 0.0559,  0.0135, -0.0870,  0.0103,  0.0073]])
tensor(1.9793, grad_fn=<NllLossBackward>)
tensor([[ 0.0482,  0.0265,  0.0418, -0.1571,  0.0406],
        [-0.0164,  0.0019,  0.0072,  0.0060,  0.0013],
        [ 0.0570,  0.0269,  0.0357, -0.1289,  0.0092],
        [ 0.0090,  0.0020,  0.0048,  0.0034, -0.0192],
        [ 0.0071, -0.0449,  0.0091,  0.0083,  0.0203],
        [-0.0175,  0.0023,  0.0068,  0.0070,  0.0013],
        [ 0.0028,  0.0064, -0.0964,

In [12]:
# CustomNLLLoss
class CustomNLLLoss(nn.Module):
    def __init__(self, weight=None, reduce=True, size_average=True):
        super(CustomNLLLoss, self).__init__()
        self.weight = weight
        self.reduce = reduce
        self.size_average = size_average
        
    def forward(self, input_x, target):
        '''Parameters:
            input_x: (minibatch, C) where each value is log-probabilities
            target: (minibatch) where each value is 0 <= target[i] <= C-1
        '''
        row_idxs = torch.arange(input_x.size(0))
        nllloss_metric = -input_x[row_idxs, target]
        if self.weight is not None:
            # TODO: assert
            nllloss_metric = self.weight[target] * nllloss_metric
            
        if self.reduce:
            if self.size_average:
                if self.weight is not None:
                    return torch.sum((1.0 / self.weight[target].sum()) * nllloss_metric)
                else:
                    return torch.mean(nllloss_metric)
            else:
                return torch.sum(nllloss_metric)
        else:
            return nllloss_metric
        
# CustomNLLLoss without weight
nllloss = CustomNLLLoss(reduce=True, size_average=True)
loss = nllloss(log_softmax(input_x), target)
print(loss)

input_x.grad = None
loss.backward()
print(input_x.grad)

# CustomNLLLoss with weight
nllloss = CustomNLLLoss(weight=weights, reduce=True, size_average=True)
loss = nllloss(log_softmax(input_x), target)
print(loss)

input_x.grad = None
loss.backward()
print(input_x.grad)

tensor(1.9935, grad_fn=<MeanBackward1>)
tensor([[ 0.0247,  0.0136,  0.0214, -0.0805,  0.0208],
        [-0.0673,  0.0078,  0.0297,  0.0244,  0.0055],
        [ 0.0292,  0.0138,  0.0183, -0.0660,  0.0047],
        [ 0.0368,  0.0081,  0.0197,  0.0140, -0.0786],
        [ 0.0146, -0.0920,  0.0186,  0.0171,  0.0417],
        [-0.0719,  0.0096,  0.0280,  0.0288,  0.0054],
        [ 0.0029,  0.0066, -0.0988,  0.0671,  0.0223],
        [ 0.0542,  0.0024, -0.0912,  0.0072,  0.0274],
        [ 0.0483,  0.0133,  0.0046, -0.0843,  0.0181],
        [ 0.0559,  0.0135, -0.0870,  0.0103,  0.0073]])
tensor(1.9793, grad_fn=<SumBackward0>)
tensor([[ 0.0482,  0.0265,  0.0418, -0.1571,  0.0406],
        [-0.0164,  0.0019,  0.0072,  0.0060,  0.0013],
        [ 0.0570,  0.0269,  0.0357, -0.1289,  0.0092],
        [ 0.0090,  0.0020,  0.0048,  0.0034, -0.0192],
        [ 0.0071, -0.0449,  0.0091,  0.0083,  0.0203],
        [-0.0175,  0.0023,  0.0068,  0.0070,  0.0013],
        [ 0.0028,  0.0064, -0.0964,  0.0

## nn.BCELoss 

In [13]:
# nn.BCELoss(weight=None, size_average=None, reduce=None, reduction='elementwise_mean')
'''Creates a criterion that measures the Binary Cross Entropy between the target and the output.
    Parameters: to see nn.CrossEntropyLoss's parameters for more details.
        weight-A manual rescaling weight given to the loss of each batch element. If given, has to be a Tensor of size "nbatch". 
Note that the targets y should be numbers between 0 and 1.
'''

'Creates a criterion that measures the Binary Cross Entropy between the target and the output.\n    Parameters: to see nn.CrossEntropyLoss\'s parameters for more details.\n        weight-A manual rescaling weight given to the loss of each batch element. If given, has to be a Tensor of size "nbatch". \nNote that the targets y should be numbers between 0 and 1.\n'

[![iBs861.png](https://s1.ax1x.com/2018/10/21/iBs861.png)](https://imgchr.com/i/iBs861)

In [14]:
# Need to get a probability value 
sigmoid = nn.Sigmoid()

# nn.BCELoss without weight
bceloss = nn.BCELoss(weight=None, reduction='elementwise_mean')

input_x = torch.randn(10, requires_grad=True)
target = torch.empty(10).random_(2)

loss = bceloss(sigmoid(input_x), target)
print(loss)
loss.backward()
print(input_x.grad)

# nn.BCELoss with weight
weights = torch.FloatTensor([0.4, 0.6]) # weight of each class
weights = weights[torch.empty(10, dtype=torch.long).random_(2)]

bceloss = nn.BCELoss(weight=weights, reduction='elementwise_mean')
loss = bceloss(sigmoid(input_x), target)
print(loss)

input_x.grad = None
loss.backward()
print(input_x.grad)

tensor(0.9453, grad_fn=<BinaryCrossEntropyBackward>)
tensor([ 0.0262, -0.0762, -0.0492,  0.0358,  0.0891, -0.0288,  0.0289, -0.0656,
        -0.0840,  0.0549])
tensor(0.4822, grad_fn=<BinaryCrossEntropyBackward>)
tensor([ 0.0105, -0.0305, -0.0197,  0.0215,  0.0534, -0.0173,  0.0173, -0.0394,
        -0.0336,  0.0330])


In [15]:
# CustomBCELoss
class CustomBCELoss(nn.Module):
    def __init__(self, weight=None, reduce=True, size_average=True):
        super(CustomBCELoss, self).__init__()
        self.weight = weight
        self.reduce = reduce
        self.size_average = size_average
        
    def forward(self, input_x, target):
        '''Parameters:
            input_x = sigmoid(input_x) with shape (N, )
            target with shape (N, )
        '''
        bce_metric = target * torch.log(input_x) + (1 - target) * torch.log(1 - input_x)
        bce_metric = -1.0 * bce_metric
        if self.weight is not None:
            bce_metric = self.weight * bce_metric
            
        if self.reduce:
            return torch.mean(bce_metric) if self.size_average else torch.sum(bce_metric)
        else:
            return bce_metric
        
# CustomBCELoss without weight
bceloss = CustomBCELoss(weight=None, reduce=True, size_average=True)
loss = bceloss(sigmoid(input_x), target)
print(loss)

input_x.grad = None
loss.backward()
print(input_x.grad)

# CustomBCELoss with weight
bceloss = CustomBCELoss(weight=weights, reduce=True, size_average=True)
loss = bceloss(sigmoid(input_x), target)
print(loss)

input_x.grad = None
loss.backward()
print(input_x.grad)

tensor(0.9453, grad_fn=<MeanBackward1>)
tensor([ 0.0262, -0.0762, -0.0492,  0.0358,  0.0891, -0.0288,  0.0289, -0.0656,
        -0.0840,  0.0549])
tensor(0.4822, grad_fn=<MeanBackward1>)
tensor([ 0.0105, -0.0305, -0.0197,  0.0215,  0.0534, -0.0173,  0.0173, -0.0394,
        -0.0336,  0.0330])


## nn.BCEWithLogitsLoss 

In [16]:
# nn.BCEWithLogitsLoss(weight=None, size_average=None, reduce=None, reduction='elementwise_mean', pos_weight=None)
'''This loss conmbines a Sigmoid layer and the BCELoss in one single class. This version is more numerically stable
than using a plain Sigmoid followed by a BCELoss as, by combining the operations into one layer, we take advantage
of the log-sum-exp trick for numerical stability.
    Parameters:
        pos_weight-It's possible to trade off recall and precision by adding weights to positive examples.
    must be a vector with length equal to the number of classes.
'''

"This loss conmbines a Sigmoid layer and the BCELoss in one single class. This version is more numerically stable\nthan using a plain Sigmoid followed by a BCELoss as, by combining the operations into one layer, we take advantage\nof the log-sum-exp trick for numerical stability.\n    Parameters:\n        pos_weight-It's possible to trade off recall and precision by adding weights to positive examples.\n    must be a vector with length equal to the number of classes.\n"

[![iBgYgH.png](https://s1.ax1x.com/2018/10/21/iBgYgH.png)](https://imgchr.com/i/iBgYgH)

In [17]:
# nn.BCEWithLogitsLoss without weight
bcewithlogitsloss = nn.BCEWithLogitsLoss(weight=None, reduction='elementwise_mean')
loss = bcewithlogitsloss(input_x, target)
print(loss)

input_x.grad = None
loss.backward()
print(input_x.grad)

# nn.BCEWithLogitsLoss with weight
bcewithlogitsloss = nn.BCEWithLogitsLoss(weight=weights, reduction='elementwise_mean')
loss = bcewithlogitsloss(input_x, target)
print(loss)

input_x.grad = None
loss.backward()
print(input_x.grad)

# nn.BCEWithLogitsLoss with pos_weight
pos_weights = torch.FloatTensor([0.7, 0.3])
pos_weights = pos_weights[torch.empty(10, dtype=torch.long).random_(2)]
print(pos_weights)
bcewithlogitsloss = nn.BCEWithLogitsLoss(pos_weight=pos_weights, reduction='elementwise_mean')
loss = bcewithlogitsloss(input_x, target)
print(loss)

input_x.grad = None
loss.backward()
print(input_x.grad)

tensor(0.9453, grad_fn=<MeanBackward1>)
tensor([ 0.0262, -0.0762, -0.0492,  0.0358,  0.0891, -0.0288,  0.0289, -0.0656,
        -0.0840,  0.0549])
tensor(0.4822, grad_fn=<MeanBackward1>)
tensor([ 0.0105, -0.0305, -0.0197,  0.0215,  0.0534, -0.0173,  0.0173, -0.0394,
        -0.0336,  0.0330])
tensor([0.7000, 0.3000, 0.7000, 0.7000, 0.3000, 0.3000, 0.7000, 0.3000, 0.3000,
        0.7000])
tensor(0.5976, grad_fn=<MeanBackward1>)
tensor([ 0.0262, -0.0229, -0.0345,  0.0358,  0.0891, -0.0086,  0.0289, -0.0197,
        -0.0252,  0.0549])


In [18]:
# CustomBCEWithLogitsLoss
class CustomBCEWithLogitsLoss(nn.Module):
    def __init__(self, weight=None, reduce=True, size_average=True, pos_weight=None):
        super(CustomBCEWithLogitsLoss, self).__init__()
        self.weight = weight
        self.reduce = reduce
        self.size_average = size_average
        self.pos_weight = pos_weight
        
    def forward(self, input_x, target):
        if self.pos_weight is not None:
            bce_metric = (target - 1) * input_x + (target * (1 - self.pos_weight) - 1) * torch.log((1 + torch.exp(-input_x)))
        else:
            bce_metric = (target - 1) * input_x - torch.log((1 + torch.exp(-input_x)))
        
        bce_metric = -1.0 * bce_metric
        if self.weight is not None:
            bce_metric = self.weight * bce_metric
            
        if self.reduce:
            return torch.mean(bce_metric) if self.size_average else torch.sum(bce_metric)
        else:
            return bce_metric
        
# CustomBCEWithLogitsLoss without weight
bcewithlogitsloss = CustomBCEWithLogitsLoss(weight=None, reduce=True, size_average=True)
loss = bcewithlogitsloss(input_x, target)
print(loss)

input_x.grad = None
loss.backward()
print(input_x.grad)

# CustomBCEWithLogitsLoss with weight
bcewithlogitsloss = CustomBCEWithLogitsLoss(weight=weights, reduce=True, size_average=True)
loss = bcewithlogitsloss(input_x, target)
print(loss)

input_x.grad = None
loss.backward()
print(input_x.grad)

# CustomBCEWithLogitsLoss with pos_weight
pos_weights = torch.FloatTensor([0.7, 0.3])
pos_weights = pos_weights[torch.empty(10, dtype=torch.long).random_(2)]
print(pos_weights)
bcewithlogitsloss = CustomBCEWithLogitsLoss(pos_weight=pos_weights, reduce=True, size_average=True)
loss = bcewithlogitsloss(input_x, target)
print(loss)

input_x.grad = None
loss.backward()
print(input_x.grad)

tensor(0.9453, grad_fn=<MeanBackward1>)
tensor([ 0.0262, -0.0762, -0.0492,  0.0358,  0.0891, -0.0288,  0.0289, -0.0656,
        -0.0840,  0.0549])
tensor(0.4822, grad_fn=<MeanBackward1>)
tensor([ 0.0105, -0.0305, -0.0197,  0.0215,  0.0534, -0.0173,  0.0173, -0.0394,
        -0.0336,  0.0330])
tensor([0.3000, 0.7000, 0.7000, 0.3000, 0.3000, 0.3000, 0.7000, 0.3000, 0.7000,
        0.3000])
tensor(0.7284, grad_fn=<MeanBackward1>)
tensor([ 0.0262, -0.0534, -0.0345,  0.0358,  0.0891, -0.0086,  0.0289, -0.0197,
        -0.0588,  0.0549])


## nn.SmoothL1Loss 

In [19]:
# nn.SmoothL1Loss(size_average=None, reduce=None, reduction='elementwise_mean')
'''Creates a criterion that uses a squared term if the absolute element-wise error falls below 1 and an L1 term
otherwise. It is less sensitive to outliers than the MSELoss and in some case prevents exploding graients.
'''

'Creates a criterion that uses a squared term if the absolute element-wise error falls below 1 and an L1 term\notherwise. It is less sensitive to outliers than the MSELoss and in some case prevents exploding graients.\n'

[![iB7KAJ.png](https://s1.ax1x.com/2018/10/21/iB7KAJ.png)](https://imgchr.com/i/iB7KAJ)

In [32]:
# Generate input and target
input_x = torch.randn(10, 4, requires_grad=True)
target = torch.randn(10, 4)

# nn.SmoothL1Loss
smoothl1loss = nn.SmoothL1Loss(reduction='elementwise_mean')
loss = smoothl1loss(input_x, target)
print(loss)

loss.backward()
print(input_x.grad)

tensor(0.7673, grad_fn=<SmoothL1LossBackward>)
tensor([[ 0.0098, -0.0232,  0.0016, -0.0079],
        [-0.0145, -0.0250,  0.0187, -0.0250],
        [ 0.0250,  0.0250, -0.0153,  0.0250],
        [ 0.0250, -0.0250,  0.0211, -0.0250],
        [-0.0250,  0.0250,  0.0033,  0.0121],
        [-0.0250,  0.0184,  0.0250,  0.0250],
        [-0.0039,  0.0086,  0.0250,  0.0135],
        [-0.0250,  0.0131, -0.0149,  0.0250],
        [-0.0247, -0.0250, -0.0250,  0.0054],
        [ 0.0201,  0.0092,  0.0147, -0.0250]])


In [33]:
# CustomSmoothL1Loss
class CustomSmoothL1Loss(nn.Module):
    def __init__(self, reduce=True, size_average=True):
        super(CustomSmoothL1Loss, self).__init__()
        self.reduce = reduce
        self.size_average = size_average
        
    def forward(self, input_x, target):
        lt_idxs = torch.sum(torch.abs(input_x - target), dim=1) < 1
        gq_idxs = lt_idxs == 0
        
        l1loss_metric1 = 0.5 * torch.pow(input_x[lt_idxs] - target[lt_idxs], 2)
        l1loss_metric2 = torch.abs(input_x[gq_idxs] - target[gq_idxs]) - 0.5
        
        l1loss_metric = torch.sum(l1loss_metric1) + torch.sum(l1loss_metric2)
        if self.reduce:
            return (l1loss_metric / input_x.size(0)) if self.size_average else torch.sum(l1loss_metric)
        else:
            return l1loss_metric
        
# CustomSmoothL1Loss
smoothl1loss = CustomSmoothL1Loss(reduce=True, size_average=True)
loss = smoothl1loss(input_x, target)
print(loss)

input_x.grad = None
loss.backward()
print(input_x.grad)

tensor(2.7596, grad_fn=<DivBackward0>)
tensor([[ 0.1000, -0.1000,  0.1000, -0.1000],
        [-0.1000, -0.1000,  0.1000, -0.1000],
        [ 0.1000,  0.1000, -0.1000,  0.1000],
        [ 0.1000, -0.1000,  0.1000, -0.1000],
        [-0.1000,  0.1000,  0.1000,  0.1000],
        [-0.1000,  0.1000,  0.1000,  0.1000],
        [-0.1000,  0.1000,  0.1000,  0.1000],
        [-0.1000,  0.1000, -0.1000,  0.1000],
        [-0.1000, -0.1000, -0.1000,  0.1000],
        [ 0.1000,  0.1000,  0.1000, -0.1000]])
