# 一、什么是Loss Function？   
**损失函数** 用于衡量模型预测与真实值之间的差距，是训练过程中的目标函数。  
> 损失越小，代表模型的预测越接近真实标签。

# 二、常见的损失函数   
![常见损失函数](images/Loss_Functions.png)

In [None]:
# torch.nn.L1Loss
#     size_average (bool, optional) 
#     reduce (bool, optional):
#     # 上面两者已经过时，推荐使用 reduction 即可
#     reduction (str, optional):
#         'none': no reduction will be applied, 
#         'mean': the sum of the output will be divided by the number of elements in the output, 
#         'sum': the output will be summed.  
#         Default: 'mean'

In [31]:
import torch
import torchvision
from torch import nn
from collections import OrderedDict

In [23]:
# 可以直接在数据中加入一个float
inputs = torch.tensor([1.0, 2, 3])
# 也可以指定类型
targets = torch.tensor([1, 2, 5], dtype = torch.float32)
print(inputs.shape)
inputs = torch.reshape(inputs, (1, 3))
targets = torch.reshape(targets, (1, 3))
print(inputs.shape)

torch.Size([3])
torch.Size([1, 3])


In [24]:
loss = nn.L1Loss(reduction = 'sum')
result = loss(inputs, targets)
print(result)
loss = nn.L1Loss(reduction = 'mean')
result = loss(inputs, targets)
print(result)

tensor(2.)
tensor(0.6667)


In [25]:
# torch.nn.MSELoss
#     size_average (bool, optional)
#     reduce (bool, optional)
#     # 上面两者已弃用
#     reduction (str, optional):
#         'none': no reduction will be applied, 
#         'mean': the sum of the output will be divided by the number of elements in the output, 
#         'sum': the output will be summed. 
#         Default: 'mean'

In [26]:
loss_MSE = nn.MSELoss(reduction = 'sum')
result_MSE = loss_MSE(inputs, targets)
print(result_MSE)
loss_MSE = nn.MSELoss(reduction = 'mean')
result_MSE = loss_MSE(inputs, targets)
print(result_MSE)

tensor(4.)
tensor(1.3333)


In [29]:
# nn.CrossEntropyLoss 结合了 nn.LogSoftmax 和 nn.NLLLoss, 所以:
	# •输入是 原始 logits（未经过 softmax）；
	# •内部会自动做 log_softmax 处理；
	# •对数似然越大，损失越小。
# torch.nn.CrossEntropyLoss(
#     weight (Tensor, optional)              类别权重（shape=[C]），用于类别不平衡
#     size_average (bool, optional)
#     ignore_index (int, optional)           忽略某个类别的标签索引（如在语义分割中忽略边界)
#     reduce (bool, optional)
#     reduction (str, optional)              同 L1Loss和 MSELoss
#     label_smoothing (float, optional)      标签平滑，防止过拟合
#         将原本 “硬标签” 的 one-hot 向量，稍微“软化”，变成接近的概率分布，使模型训练更加“宽容”，避免过拟合。
#         例如：真实类别是 → [0, 0, 1, 0]，模型如果预测 [0.01, 0.02, 0.96, 0.01]本该有较大的 loss，
#              但这已经是一个很好的预测，引入label_smoothing减少这种标签的损失函数，减少过拟合。

In [28]:
x = torch.tensor([0.1, 0.2, 0.3])
y = torch.tensor([1])
x = torch.reshape(x, (1, 3))
loss_cross = nn.CrossEntropyLoss()
result_cross = loss_cross(x, y)
print(result_cross)

tensor(1.1019)


In [30]:
# 如何在神经网络中写入 LossFunction

In [35]:
class Tudui(nn.Module):
    def __init__(self):
        super(Tudui, self).__init__()
        self.model1 = nn.Sequential(OrderedDict([
            ('conv1', nn.Conv2d(in_channels=3, out_channels=32, kernel_size=5, padding=2)),
            ('pool1', nn.MaxPool2d(kernel_size=2)),
            ('conv2', nn.Conv2d(in_channels=32, out_channels=32, kernel_size=5, padding=2)),
            ('pool2', nn.MaxPool2d(kernel_size=2)),
            ('conv3', nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2)),
            ('pool3', nn.MaxPool2d(kernel_size=2)),
            ('flatten', nn.Flatten()),
            ('fc1', nn.Linear(1024, 64)),  # 注意：1024 = 64通道 × 4 × 4（针对输入32x32）
            ('fc2', nn.Linear(64, 10))
        ]))

    def forward(self, x):
        return self.model1(x)

In [47]:
dataset = torchvision.datasets.CIFAR10("../datasets/CIFAR10/", train = False, transform = torchvision.transforms.ToTensor(), download = True)
dataloader = torch.utils.data.DataLoader(dataset, batch_size = 64)

Files already downloaded and verified


In [49]:
tudui = Tudui()
loss = nn.CrossEntropyLoss()
for data in dataloader:
    imgs, targets = data
    outputs = tudui(imgs)
    print("output:")
    print(outputs)
    print("targets:")
    print(targets)
    print("loss:")
    result_loss = loss(outputs, targets)
    print(result_loss)
    result_loss.backward()
    print("ok")

output:
tensor([[-0.0454, -0.1226, -0.1178, -0.0982, -0.0821,  0.0668,  0.0672, -0.0511,
          0.0825,  0.0267],
        [-0.0574, -0.1152, -0.1160, -0.1401, -0.0926,  0.0437,  0.0666, -0.0763,
          0.0839,  0.0191],
        [-0.0567, -0.1346, -0.1177, -0.1378, -0.0913,  0.0547,  0.0501, -0.0702,
          0.0858,  0.0090],
        [-0.0510, -0.1357, -0.1131, -0.1304, -0.0903,  0.0558,  0.0674, -0.0747,
          0.0776,  0.0073],
        [-0.0637, -0.1253, -0.1040, -0.1110, -0.0803,  0.0662,  0.0642, -0.0534,
          0.0919,  0.0276],
        [-0.0570, -0.1241, -0.1189, -0.0997, -0.0709,  0.0680,  0.0722, -0.0461,
          0.0802,  0.0326],
        [-0.0672, -0.1238, -0.0831, -0.1098, -0.0487,  0.0644,  0.0308, -0.0662,
          0.0883,  0.0206],
        [-0.0582, -0.1206, -0.1050, -0.1154, -0.0782,  0.0640,  0.0854, -0.0528,
          0.0870,  0.0332],
        [-0.0409, -0.1295, -0.1156, -0.1147, -0.0882,  0.0623,  0.0764, -0.0702,
          0.0753,  0.0246],
        [-0

In [None]:
# backward() 是 PyTorch 中自动求导（autograd）机制的核心方法，用于执行反向传播（backpropagation），计算所有需要梯度的张量的 .grad 值。
# 一般配合优化器来用，详见下一节