In [1]:
import torch
from torch import nn
import d2l.torch as d2l  # 如果你用的是《动手学》工具包

# 定义 dropout 函数（教材代码）
def dropout_layer(X, dropout):
    assert 0 <= dropout <= 1
    if dropout == 1:
        return torch.zeros_like(X)
    if dropout == 0:
        return X
    mask = (torch.rand(X.shape) > dropout).float()
    return mask * X / (1.0 - dropout)

In [2]:
X = torch.tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])

print("Dropout 0.0（不丢）:\n", dropout_layer(X, 0.0))
print("Dropout 0.5（丢一半）:\n", dropout_layer(X, 0.5))
print("Dropout 1.0（全丢）:\n", dropout_layer(X, 1.0))

Dropout 0.0（不丢）:
 tensor([[1., 2., 3.],
        [4., 5., 6.]])
Dropout 0.5（丢一半）:
 tensor([[ 2.,  0.,  6.],
        [ 8., 10.,  0.]])
Dropout 1.0（全丢）:
 tensor([[0., 0., 0.],
        [0., 0., 0.]])


Dropout ≠ “精确丢掉一半元素”，
Dropout = “每个神经元 有 50% 概率 被丢掉”，整体上期望是丢一半，但每次都不一样！

In [3]:
# 三层网络的权重
num_inputs, num_outputs, num_hiddens1, num_hiddens2 = 784, 10, 256, 256

W1 = nn.Parameter(torch.randn(num_inputs, num_hiddens1) * 0.01)
b1 = nn.Parameter(torch.zeros(num_hiddens1))
W2 = nn.Parameter(torch.randn(num_hiddens1, num_hiddens2) * 0.01)
b2 = nn.Parameter(torch.zeros(num_hiddens2))
W3 = nn.Parameter(torch.randn(num_hiddens2, num_outputs) * 0.01)
b3 = nn.Parameter(torch.zeros(num_outputs))
params = [W1, b1, W2, b2, W3, b3]

In [4]:
dropout1, dropout2 = 0.2, 0.5
train = True  # 控制是否应用 dropout

In [5]:
def net(X):
    X = X.reshape((-1, num_inputs))
    H1 = relu(X @ W1 + b1)
    if train:
        H1 = dropout_layer(H1, dropout1)
    H2 = relu(H1 @ W2 + b2)
    if train:
        H2 = dropout_layer(H2, dropout2)
    return H2 @ W3 + b3