In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
import torch
from torch import nn


class LabelSmoothingLoss(nn.Module):
    def __init__(self,
                 size: int,
                 smoothing: float = 0.1,
                 padding_idx: int = 0,
                 normalize_length: bool = False):
        super(LabelSmoothingLoss, self).__init__()
        self.criterion = nn.KLDivLoss(reduction="none")
        self.padding_idx = padding_idx
        self.confidence = 1.0 - smoothing
        self.smoothing = smoothing
        self.size = size
        self.normalize_length = normalize_length

    def forward(self, x: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
        # (batch, seqlen, class)  (batch, seqlen)
        assert x.size(2) == self.size
        batch_size = x.size(0)
        x = x.view(-1, self.size)
        target = target.view(-1)
        # use zeros_like instead of torch.no_grad() for true_dist,
        # since no_grad() can not be exported by JIT
        true_dist = torch.zeros_like(x)
        true_dist.fill_(self.smoothing / (self.size - 1))
        ignore = target == self.padding_idx  # (B,)
        total = len(target) - ignore.sum().item()
        target = target.masked_fill(ignore, 0)  # avoid -1 index
        true_dist.scatter_(1, target.unsqueeze(1), self.confidence)
        kl = self.criterion(torch.log_softmax(x, dim=1), true_dist)
        denom = total if self.normalize_length else batch_size
        return kl.masked_fill(ignore.unsqueeze(1), 0).sum() / denom


In [None]:
x = torch.tensor([[[0.4, 0.6], [0.5, 0.5]], [[0.3, 0.7], [0.8, 0.2]]])
targets = torch.tensor([[1,2],[0,1]])

loss = LabelSmoothingLoss(2, 0.1)
loss(x,targets)

In [None]:
class CELoss(nn.Module):
    ''' Cross Entropy Loss with label smoothing '''

    def __init__(self, label_smooth=None, class_num=137):
        super().__init__()
        self.label_smooth = label_smooth
        self.class_num = class_num

    def forward(self, pred, target):
        '''
        Args:
            pred: prediction of model output    [N, M]
            target: ground truth of sampler [N]
        '''
        eps = 1e-12

        if self.label_smooth is not None:
            # cross entropy loss with label smoothing
            logprobs = F.log_softmax(pred, dim=1)  # softmax + log
            target = F.one_hot(target, self.class_num)  # 转换成one-hot

            # label smoothing
            # 实现 1
            # target = (1.0-self.label_smooth)*target + self.label_smooth/self.class_num
            # 实现 2
            # implement 2
            target = torch.clamp(target.float(), min=self.label_smooth / (self.class_num - 1),
                                 max=1.0 - self.label_smooth)
            loss = -1 * torch.sum(target * logprobs, 1)

        else:
            # standard cross entropy loss
            loss = -1. * pred.gather(1, target.unsqueeze(-1)) + torch.log(torch.exp(pred + eps).sum(dim=1))

        return loss.mean()


In [None]:
loss1 = nn.CrossEntropyLoss()
loss2 = CELoss(label_smooth=None, class_num=3)

x = torch.tensor([[5, 6, 7], [4, 4, 4]], dtype=torch.float)
y = torch.tensor([1, 2])

print(loss1(x, y), loss2(x, y))
# tensor(0.0018) tensor(0.0018)


In [None]:
loss1 = nn.CrossEntropyLoss()
loss2 = CELoss(label_smooth=0.05, class_num=3)

x = torch.tensor([[1, 8, 1], [1, 1, 8]], dtype=torch.float)
y = torch.tensor([1, 2])

print(loss1(x, y), loss2(x, y))
# tensor(0.0018) tensor(0.2352)


In [None]:
x = torch.tensor([[[2, 3], [4, 5]], [[8, 9], [10, 11]]])
targets = torch.tensor([1,2])

In [None]:
loss = LabelSmoothingLoss(2, 0.1)
loss(x,targets)

In [18]:
logits = torch.randn(3,5).log_softmax(dim=-1)
targets = torch.tensor([0,2,3])

logits,targets


(tensor([[-1.6336, -1.6944, -1.0283, -3.1764, -1.5064],
         [-1.6933, -0.8209, -2.0381, -3.3705, -1.5540],
         [-2.8446, -0.5310, -2.8125, -3.5543, -1.3274]]),
 tensor([0, 2, 3]))

In [23]:
F.cross_entropy(logits,targets,ignore_index=2,label_smoothing=0.1)


tensor(2.5357)