In [1]:
import torchvision

In [4]:
print(torchvision.__version__)

0.20.1+cpu


In [3]:
class A:
    def __init__(self):
        print("A initialized")

class B(A):
    def __init__(self):
        super().__init__()
        print("B initialized")

class C(A):
    def __init__(self):
        super().__init__()
        print("C initialized")

class D(B, C):
    def __init__(self):
        super().__init__() # 必须手动调用父类的初始化函数
        print("D initialized")

# 创建 D 类的实例
d = D()

A initialized
C initialized
B initialized
D initialized


# 小实验

展示 train_loss += loss 和 train_loss += loss.item() 这两种写法最后 train_loss 的类型、显存使用的区别

In [1]:
import torch
import gc

# 一个简单的线性模型和数据
model = torch.nn.Linear(10, 1)
x = torch.randn(32, 10)
y = torch.randn(32, 1)
loss_fn = torch.nn.MSELoss()

# 情况1: 使用 loss 直接累加
train_loss_tensor = 0  # Python int
for _ in range(5):
    out = model(x)
    loss = loss_fn(out, y)
    train_loss_tensor += loss  # 会变成 Tensor
type_tensor_loss = type(train_loss_tensor)
is_tensor_requires_grad = getattr(train_loss_tensor, "requires_grad", None)

# 清理显存
del train_loss_tensor, out, loss
gc.collect()
torch.cuda.empty_cache() if torch.cuda.is_available() else None

# 情况2: 使用 loss.item() 累加
train_loss_float = 0.0  # Python float
for _ in range(5):
    out = model(x)
    loss = loss_fn(out, y)
    train_loss_float += loss.item()  # 纯 float
type_float_loss = type(train_loss_float)

(type_tensor_loss, is_tensor_requires_grad, type_float_loss)


(torch.Tensor, True, float)

📌 **总结**：

* `loss`：Tensor，带梯度，累加会“拖着计算图跑”。
* `loss.item()`：普通数值，累加安全、轻量，训练日志和统计指标都应该用这个。

# GPU


In [1]:
import torch

In [2]:
print(torch.cuda.is_available())

True


In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda:0


# random_split

In [None]:
from torch.utils.data import random_split


# max vs. argmax

In [5]:
import torch
outputs = torch.tensor([[2.5, 1.0, 0.2],
                        [0.1, 3.2, 2.7]])

# 方法1: max
values, indices = outputs.max(1)
print(values)   # tensor([2.5, 3.2])
print(indices)  # tensor([0, 1])  -> 类别索引

# 方法2: argmax
predicted = outputs.argmax(1)
print(predicted)  # tensor([0, 1])


tensor([2.5000, 3.2000])
tensor([0, 1])
tensor([0, 1])


In [6]:
import torch

# 假设有一批预测结果和真实标签
predicted = torch.tensor([0, 2, 1, 3])
labels = torch.tensor([0, 1, 1, 3])

# 方法1: 使用 ==
correct_eq = (predicted == labels).sum().item()

# 方法2: 使用 .eq()
correct_method = predicted.eq(labels).sum().item()

# 输出结果和类型
(correct_eq, type(correct_eq), correct_method, type(correct_method))


(3, int, 3, int)

In [7]:
(predicted == labels)

tensor([ True, False,  True,  True])

In [8]:
predicted.eq(labels)

tensor([ True, False,  True,  True])

# tqdm

# 多个字符串字面量直接相邻放在一起

Python 允许多个字符串字面量直接相邻放在一起，Python 会自动把它们拼接成一个字符串。这个特性叫做 implicit string concatenation（隐式字符串拼接）。

In [None]:
# --- Log ---
print(f"Epoch [{epoch+1}/{num_epochs}] "f"Train Loss: {avg_train_loss:.4f}, Train Acc: {train_acc:.4f} | "f"Val Loss: {avg_val_loss:.4f}, Val Acc: {val_acc:.4f}")