In [8]:
import torch
from torch import nn
from d2l import torch as d2l
from torch import optim
from tqdm import tqdm

In [2]:
LeNet = nn.Sequential(
    nn.Conv2d(1, 6, kernel_size=5, padding=2),
    nn.Sigmoid(),
    nn.AvgPool2d(kernel_size=2, stride=2),
    nn.Conv2d(6, 16, kernel_size=5),
    nn.Sigmoid(),
    nn.AvgPool2d(kernel_size=2, stride=2),
    nn.Flatten(),
    nn.Linear(16*5*5, 120),
    nn.Sigmoid(),
    nn.Linear(120, 84),
    nn.Sigmoid(),
    nn.Linear(84, 10)
)
LeNet

Sequential(
  (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (1): Sigmoid()
  (2): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (4): Sigmoid()
  (5): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (6): Flatten(start_dim=1, end_dim=-1)
  (7): Linear(in_features=400, out_features=120, bias=True)
  (8): Sigmoid()
  (9): Linear(in_features=120, out_features=84, bias=True)
  (10): Sigmoid()
  (11): Linear(in_features=84, out_features=10, bias=True)
)

In [3]:
X = torch.randn((1, 1, 28, 28), dtype=torch.float32)
for layer in LeNet:
    X = layer(X)
    print(layer.__class__.__name__, 'output shape: \t', X.shape)

Conv2d output shape: 	 torch.Size([1, 6, 28, 28])
Sigmoid output shape: 	 torch.Size([1, 6, 28, 28])
AvgPool2d output shape: 	 torch.Size([1, 6, 14, 14])
Conv2d output shape: 	 torch.Size([1, 16, 10, 10])
Sigmoid output shape: 	 torch.Size([1, 16, 10, 10])
AvgPool2d output shape: 	 torch.Size([1, 16, 5, 5])
Flatten output shape: 	 torch.Size([1, 400])
Linear output shape: 	 torch.Size([1, 120])
Sigmoid output shape: 	 torch.Size([1, 120])
Linear output shape: 	 torch.Size([1, 84])
Sigmoid output shape: 	 torch.Size([1, 84])
Linear output shape: 	 torch.Size([1, 10])


In [4]:
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size=batch_size)

In [9]:
def evaluate_accuracy_gpu(net, data_iter, device='cpu'):
    metric = d2l.Accumulator(2)
    with torch.no_grad():
        for X, y in tqdm(data_iter):
            X = X.to(device)
            y = y.to(device)
            metric.add(d2l.accuracy(net(X), y), y.numel())
    return metric[0] / metric[1]

In [10]:
#@save
def train_ch6(net, train_iter, test_iter, num_epochs, lr, device):
    """用GPU训练模型(在第六章定义)"""
    def init_weights(m):
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            nn.init.xavier_uniform_(m.weight)
    net.apply(init_weights)
    print('training on', device)
    net.to(device)
    optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    loss = nn.CrossEntropyLoss()
    for epoch in range(num_epochs):
        # 训练损失之和，训练准确率之和，范例数
        train_loss, train_sum, total_sum = 0.,0.,0.
        net.train()
        for X, y in tqdm(train_iter):
            optimizer.zero_grad()
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            optimizer.step()
            with torch.no_grad():
                train_loss += l
                train_sum += d2l.accuracy(y_hat, y)
                total_sum += len(X)
        train_acc = train_sum / total_sum
        train_loss = train_loss / total_sum
        print(f'[epoch {epoch+1}/{epochs}], train loss: {train_loss}, train acc: {train_acc}')
        test_acc = evaluate_accuracy_gpu(net, test_iter, device)
        print(f'[test acc: {test_acc}')
        
    

In [11]:
lr, epochs = 0.3, 20
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu' )
optimizer = optim.SGD(LeNet.parameters(), lr)
loss = nn.CrossEntropyLoss()
train_ch6(LeNet, train_iter, test_iter, epochs, lr, device)

training on cuda


100%|██████████| 235/235 [00:05<00:00, 39.41it/s]


[epoch 1/20], train loss: 0.009061122313141823, train acc: 0.0997


100%|██████████| 40/40 [00:00<00:00, 51.55it/s]


[epoch 1/20], test acc: 0.1


100%|██████████| 235/235 [00:05<00:00, 39.45it/s]


[epoch 2/20], train loss: 0.009029599837958813, train acc: 0.10798333333333333


100%|██████████| 40/40 [00:00<00:00, 51.55it/s]


[epoch 2/20], test acc: 0.1


100%|██████████| 235/235 [00:05<00:00, 39.26it/s]


[epoch 3/20], train loss: 0.00743579538539052, train acc: 0.32365


100%|██████████| 40/40 [00:00<00:00, 51.80it/s]


[epoch 3/20], test acc: 0.5114


100%|██████████| 235/235 [00:05<00:00, 39.48it/s]


[epoch 4/20], train loss: 0.0043045044876635075, train acc: 0.5807333333333333


100%|██████████| 40/40 [00:00<00:00, 51.98it/s]


[epoch 4/20], test acc: 0.6039


100%|██████████| 235/235 [00:05<00:00, 39.43it/s]


[epoch 5/20], train loss: 0.0035952029284089804, train acc: 0.6482333333333333


100%|██████████| 40/40 [00:00<00:00, 51.22it/s]


[epoch 5/20], test acc: 0.6717


100%|██████████| 235/235 [00:05<00:00, 39.55it/s]


[epoch 6/20], train loss: 0.0032484042458236217, train acc: 0.6877333333333333


100%|██████████| 40/40 [00:00<00:00, 51.80it/s]


[epoch 6/20], test acc: 0.7068


100%|██████████| 235/235 [00:05<00:00, 39.19it/s]


[epoch 7/20], train loss: 0.0029931680765002966, train acc: 0.7111833333333333


100%|██████████| 40/40 [00:00<00:00, 51.55it/s]


[epoch 7/20], test acc: 0.7138


100%|██████████| 235/235 [00:06<00:00, 38.90it/s]


[epoch 8/20], train loss: 0.002759768860414624, train acc: 0.7294166666666667


100%|██████████| 40/40 [00:00<00:00, 51.82it/s]


[epoch 8/20], test acc: 0.7328


100%|██████████| 235/235 [00:05<00:00, 42.61it/s]


[epoch 9/20], train loss: 0.002616737736389041, train acc: 0.7403


100%|██████████| 40/40 [00:00<00:00, 54.88it/s]


[epoch 9/20], test acc: 0.7346


100%|██████████| 235/235 [00:05<00:00, 41.40it/s]


[epoch 10/20], train loss: 0.0025268015451729298, train acc: 0.7476833333333334


100%|██████████| 40/40 [00:00<00:00, 54.06it/s]


[epoch 10/20], test acc: 0.7415


100%|██████████| 235/235 [00:05<00:00, 41.25it/s]


[epoch 11/20], train loss: 0.0024164391215890646, train acc: 0.7583833333333333


100%|██████████| 40/40 [00:00<00:00, 54.45it/s]


[epoch 11/20], test acc: 0.7612


100%|██████████| 235/235 [00:05<00:00, 43.92it/s]


[epoch 12/20], train loss: 0.0023769796825945377, train acc: 0.76465


100%|██████████| 40/40 [00:00<00:00, 55.51it/s]


[epoch 12/20], test acc: 0.7565


100%|██████████| 235/235 [00:05<00:00, 45.42it/s]


[epoch 13/20], train loss: 0.0022918405011296272, train acc: 0.7735166666666666


100%|██████████| 40/40 [00:00<00:00, 56.65it/s]


[epoch 13/20], test acc: 0.7664


100%|██████████| 235/235 [00:05<00:00, 45.70it/s]


[epoch 14/20], train loss: 0.0022305522579699755, train acc: 0.7795833333333333


100%|██████████| 40/40 [00:00<00:00, 55.72it/s]


[epoch 14/20], test acc: 0.7661


100%|██████████| 235/235 [00:05<00:00, 46.02it/s]


[epoch 15/20], train loss: 0.00218005757778883, train acc: 0.7857


100%|██████████| 40/40 [00:00<00:00, 54.77it/s]


[epoch 15/20], test acc: 0.7805


100%|██████████| 235/235 [00:05<00:00, 44.19it/s]


[epoch 16/20], train loss: 0.0021238988265395164, train acc: 0.7930666666666667


100%|██████████| 40/40 [00:00<00:00, 54.82it/s]


[epoch 16/20], test acc: 0.7715


100%|██████████| 235/235 [00:05<00:00, 44.61it/s]


[epoch 17/20], train loss: 0.0020722660701721907, train acc: 0.7989


100%|██████████| 40/40 [00:00<00:00, 55.28it/s]


[epoch 17/20], test acc: 0.7701


100%|██████████| 235/235 [00:05<00:00, 44.39it/s]


[epoch 18/20], train loss: 0.002012816723436117, train acc: 0.8041


100%|██████████| 40/40 [00:00<00:00, 54.46it/s]


[epoch 18/20], test acc: 0.8036


100%|██████████| 235/235 [00:05<00:00, 44.63it/s]


[epoch 19/20], train loss: 0.0019526125397533178, train acc: 0.81325


100%|██████████| 40/40 [00:00<00:00, 54.51it/s]


[epoch 19/20], test acc: 0.8075


100%|██████████| 235/235 [00:05<00:00, 45.19it/s]


[epoch 20/20], train loss: 0.0019294003723189235, train acc: 0.816


100%|██████████| 40/40 [00:00<00:00, 54.94it/s]

[epoch 20/20], test acc: 0.7774



