In [1]:
import torch
from torch import nn
from torch.nn import functional as F

In [33]:
y_true = torch.randint(0, 5, (10,))
y_true

tensor([2, 3, 4, 3, 4, 2, 4, 0, 0, 2])

In [34]:
y_pred = torch.rand(10, 5) * torch.rand(10, 5)
y_pred

tensor([[0.0425, 0.0749, 0.0449, 0.6374, 0.1391],
        [0.2501, 0.1247, 0.1282, 0.1452, 0.3988],
        [0.3246, 0.1203, 0.4313, 0.3277, 0.2178],
        [0.1636, 0.2433, 0.0581, 0.0532, 0.1914],
        [0.6481, 0.1551, 0.3308, 0.1160, 0.1311],
        [0.1976, 0.1371, 0.0551, 0.2703, 0.0354],
        [0.1589, 0.0804, 0.1883, 0.0041, 0.5144],
        [0.0485, 0.2014, 0.3141, 0.2138, 0.0239],
        [0.0620, 0.0690, 0.0153, 0.0071, 0.0535],
        [0.4265, 0.2986, 0.1358, 0.1418, 0.3854]])

# CrossEntropyLoss

In [35]:
nn.CrossEntropyLoss()(y_pred, y_true)

tensor(1.6692)

# LogSoftmax + NLLLoss = CrossEntropyLoss

In [36]:
log_softmax = nn.LogSoftmax(dim=-1)

In [37]:
y_pred_ = log_softmax(y_pred)
y_pred_

tensor([[-1.7833, -1.7510, -1.7809, -1.1884, -1.6868],
        [-1.5745, -1.6998, -1.6964, -1.6794, -1.4257],
        [-1.5747, -1.7791, -1.4681, -1.5717, -1.6815],
        [-1.5905, -1.5109, -1.6960, -1.7010, -1.5628],
        [-1.2592, -1.7522, -1.5765, -1.7913, -1.7762],
        [-1.5548, -1.6153, -1.6973, -1.4821, -1.7170],
        [-1.6559, -1.7344, -1.6265, -1.8108, -1.3004],
        [-1.7272, -1.5743, -1.4616, -1.5619, -1.7518],
        [-1.5891, -1.5821, -1.6359, -1.6441, -1.5977],
        [-1.4677, -1.5957, -1.7585, -1.7525, -1.5089]])

In [38]:
nn.NLLLoss()(y_pred_, y_true)

tensor(1.6692)

## LogSoftmax = Log(Softmax(x))

In [39]:
torch.log(torch.softmax(y_pred, dim=-1))

tensor([[-1.7833, -1.7510, -1.7809, -1.1884, -1.6868],
        [-1.5745, -1.6998, -1.6964, -1.6794, -1.4257],
        [-1.5747, -1.7791, -1.4681, -1.5717, -1.6815],
        [-1.5905, -1.5109, -1.6960, -1.7010, -1.5628],
        [-1.2592, -1.7522, -1.5765, -1.7913, -1.7762],
        [-1.5548, -1.6153, -1.6973, -1.4821, -1.7170],
        [-1.6559, -1.7344, -1.6265, -1.8108, -1.3004],
        [-1.7272, -1.5743, -1.4616, -1.5619, -1.7518],
        [-1.5891, -1.5821, -1.6359, -1.6441, -1.5977],
        [-1.4677, -1.5957, -1.7585, -1.7525, -1.5089]])

In [40]:
nn.NLLLoss()(torch.log(torch.softmax(y_pred, dim=-1)), y_true)

tensor(1.6692)

# label_smoothing

In [41]:
label_smoothing = 0.1

In [42]:
nn.CrossEntropyLoss(label_smoothing=label_smoothing)(y_pred, y_true)

tensor(1.6642)

## 尝试复现label_smoothing

In [43]:
y_true_ont_hot = F.one_hot(y_true)
y_true_ont_hot = y_true_ont_hot.type(torch.float32)
y_true_ont_hot

tensor([[0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 0., 1.],
        [1., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0.]])

In [44]:
y_true_ont_hot == 1

tensor([[False, False,  True, False, False],
        [False, False, False,  True, False],
        [False, False, False, False,  True],
        [False, False, False,  True, False],
        [False, False, False, False,  True],
        [False, False,  True, False, False],
        [False, False, False, False,  True],
        [ True, False, False, False, False],
        [ True, False, False, False, False],
        [False, False,  True, False, False]])

In [45]:
y_true_ont_hot[y_true_ont_hot == 1] = 1 - label_smoothing
y_true_ont_hot[y_true_ont_hot == 0] = label_smoothing / (5 - 1)

In [46]:
y_true_ont_hot

tensor([[0.0250, 0.0250, 0.9000, 0.0250, 0.0250],
        [0.0250, 0.0250, 0.0250, 0.9000, 0.0250],
        [0.0250, 0.0250, 0.0250, 0.0250, 0.9000],
        [0.0250, 0.0250, 0.0250, 0.9000, 0.0250],
        [0.0250, 0.0250, 0.0250, 0.0250, 0.9000],
        [0.0250, 0.0250, 0.9000, 0.0250, 0.0250],
        [0.0250, 0.0250, 0.0250, 0.0250, 0.9000],
        [0.9000, 0.0250, 0.0250, 0.0250, 0.0250],
        [0.9000, 0.0250, 0.0250, 0.0250, 0.0250],
        [0.0250, 0.0250, 0.9000, 0.0250, 0.0250]])

In [49]:
nn.CrossEntropyLoss()(y_pred, y_true_ont_hot)

tensor(1.6629)

# 多类别分类(一个目标有多个标签)

In [50]:
torch.sigmoid(y_pred)

tensor([[0.5106, 0.5187, 0.5112, 0.6542, 0.5347],
        [0.5622, 0.5311, 0.5320, 0.5362, 0.5984],
        [0.5805, 0.5300, 0.6062, 0.5812, 0.5542],
        [0.5408, 0.5605, 0.5145, 0.5133, 0.5477],
        [0.6566, 0.5387, 0.5819, 0.5290, 0.5327],
        [0.5492, 0.5342, 0.5138, 0.5672, 0.5088],
        [0.5396, 0.5201, 0.5469, 0.5010, 0.6258],
        [0.5121, 0.5502, 0.5779, 0.5532, 0.5060],
        [0.5155, 0.5172, 0.5038, 0.5018, 0.5134],
        [0.6050, 0.5741, 0.5339, 0.5354, 0.5952]])

In [51]:
torch.log(torch.sigmoid(y_pred))

tensor([[-0.6721, -0.6564, -0.6709, -0.4244, -0.6260],
        [-0.5759, -0.6327, -0.6311, -0.6232, -0.5135],
        [-0.5439, -0.6348, -0.5006, -0.5427, -0.5902],
        [-0.6147, -0.5789, -0.6645, -0.6669, -0.6020],
        [-0.4207, -0.6186, -0.5414, -0.6368, -0.6298],
        [-0.5992, -0.6270, -0.6660, -0.5671, -0.6756],
        [-0.6168, -0.6537, -0.6034, -0.6911, -0.4687],
        [-0.6692, -0.5975, -0.5484, -0.5920, -0.6812],
        [-0.6626, -0.6593, -0.6855, -0.6896, -0.6668],
        [-0.5024, -0.5550, -0.6276, -0.6248, -0.5189]])

In [72]:
y_true_ont_hot_ = F.one_hot(y_true)
y_true_ont_hot_

tensor([[0, 0, 1, 0, 0],
        [0, 0, 0, 1, 0],
        [0, 0, 0, 0, 1],
        [0, 0, 0, 1, 0],
        [0, 0, 0, 0, 1],
        [0, 0, 1, 0, 0],
        [0, 0, 0, 0, 1],
        [1, 0, 0, 0, 0],
        [1, 0, 0, 0, 0],
        [0, 0, 1, 0, 0]])

In [73]:
y_true_ont_hot_[0, 1] = 1
y_true_ont_hot_[1, 1:3] = 1
y_true_ont_hot_[2, 2] = 1
y_true_ont_hot_[3, 0] = 1
y_true_ont_hot_[4, 3] = 1
y_true_ont_hot_[5, 3] = 1
y_true_ont_hot_[6, 0:2] = 1
y_true_ont_hot_[7, 2] = 1
y_true_ont_hot_[8, 1] = 1
y_true_ont_hot_[9, 0:3] = 1
y_true_ont_hot_

tensor([[0, 1, 1, 0, 0],
        [0, 1, 1, 1, 0],
        [0, 0, 1, 0, 1],
        [1, 0, 0, 1, 0],
        [0, 0, 0, 1, 1],
        [0, 0, 1, 1, 0],
        [1, 1, 0, 0, 1],
        [1, 0, 1, 0, 0],
        [1, 1, 0, 0, 0],
        [1, 1, 1, 0, 0]])

## BECLoss

In [80]:
nn.BCELoss()(torch.sigmoid(y_pred), y_true_ont_hot_.type(torch.float32))

tensor(0.7147)

## Log(Sigmoid(x)) + NLLLoss

In [81]:
nn.NLLLoss()(torch.sigmoid(y_pred).flatten(), y_true_ont_hot_.flatten())

tensor(-0.5106)

In [82]:
nn.NLLLoss()(torch.log(torch.sigmoid(y_pred)).flatten(), y_true_ont_hot_.flatten())

tensor(0.6721)