In [5]:
import torchvision

In [6]:
print(torchvision.__version__)

0.20.1+cpu


In [3]:
class A:
    def __init__(self):
        print("A initialized")

class B(A):
    def __init__(self):
        super().__init__()
        print("B initialized")

class C(A):
    def __init__(self):
        super().__init__()
        print("C initialized")

class D(B, C):
    def __init__(self):
        super().__init__() # 必须手动调用父类的初始化函数
        print("D initialized")

# 创建 D 类的实例
d = D()

A initialized
C initialized
B initialized
D initialized


# 小实验

展示 train_loss += loss 和 train_loss += loss.item() 这两种写法最后 train_loss 的类型、显存使用的区别

In [1]:
import torch
import gc

# 一个简单的线性模型和数据
model = torch.nn.Linear(10, 1)
x = torch.randn(32, 10)
y = torch.randn(32, 1)
loss_fn = torch.nn.MSELoss()

# 情况1: 使用 loss 直接累加
train_loss_tensor = 0  # Python int
for _ in range(5):
    out = model(x)
    loss = loss_fn(out, y)
    train_loss_tensor += loss  # 会变成 Tensor
type_tensor_loss = type(train_loss_tensor)
is_tensor_requires_grad = getattr(train_loss_tensor, "requires_grad", None)

# 清理显存
del train_loss_tensor, out, loss
gc.collect()
torch.cuda.empty_cache() if torch.cuda.is_available() else None

# 情况2: 使用 loss.item() 累加
train_loss_float = 0.0  # Python float
for _ in range(5):
    out = model(x)
    loss = loss_fn(out, y)
    train_loss_float += loss.item()  # 纯 float
type_float_loss = type(train_loss_float)

(type_tensor_loss, is_tensor_requires_grad, type_float_loss)


(torch.Tensor, True, float)

📌 **总结**：

* `loss`：Tensor，带梯度，累加会“拖着计算图跑”。
* `loss.item()`：普通数值，累加安全、轻量，训练日志和统计指标都应该用这个。