# LeNet

In [2]:
import sys
import time
sys.path.append("..")
import library.d2lzh_pytorch as d2l
from torch import nn, optim
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [40]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1),
            nn.Sigmoid(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5),
            nn.Sigmoid(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.fc = nn.Sequential(
            nn.Linear(in_features=16*4*4, out_features=120),
            nn.Sigmoid(),
            nn.Linear(in_features=120, out_features=84),
            nn.Sigmoid(),
            nn.Linear(in_features=84, out_features=10)
        )
    
    def forward(self, img):
        feature = self.conv(img)
        return self.fc(feature.view(img.shape[0], -1))

In [41]:
net = LeNet()  
print(net)

LeNet(
  (conv): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (1): Sigmoid()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (4): Sigmoid()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=256, out_features=120, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=120, out_features=84, bias=True)
    (3): Sigmoid()
    (4): Linear(in_features=84, out_features=10, bias=True)
  )
)


In [42]:
batch_size = 256  
train_iter, test_iter =  d2l.load_data_fashion_mnist(batch_size=batch_size)

In [43]:
# 本函数已保存在d2lzh_pytorch包中方便以后使用。该函数将被逐步改进。  
def evaluate_accuracy(data_iter, net,  device = torch.device('cuda' if torch.cuda.is_available()  else 'cpu')):  
    acc_sum, n = 0.0, 0
    with torch.no_grad(): # 计算正确率的时候，关闭梯度
        for X, y in iter(data_iter):
            if isinstance(net, torch.nn.Module):
                net.eval() # 评估模式，关闭dropout
                acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
                net.train() # 改回训练模式
            else:
                if 'is_training' in net.__code__.co_varnames: # 如果有is_training这个参数
                    acc_sum += (net(X,  is_training=False).argmax(dim=1) == y).float().sum().item()
                else:
                    acc_sum += (net(X).argmax(dim=1) ==  y).float().sum().item()
            n += y.shape[0]
        return acc_sum / n
                

In [46]:
def train_ch5(net, train_iter, test_iter, batch_size, optimizer,  device, num_epochs):
    net = net.to(device)
    print("training on ", device)
    loss = torch.nn.CrossEntropyLoss()
    batch_count = 0
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            X = X.to(device)
            y = y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            train_l_sum += l.cpu().item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
            n += y.shape[0]
            batch_count += 1
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f,  time %.1f sec'  % (epoch + 1, train_l_sum / batch_count,  train_acc_sum / n, test_acc, time.time() - start))

In [47]:
lr, num_epochs = 0.001, 5
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
print(next(iter(train_iter))[0].shape)
train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)

torch.Size([256, 1, 28, 28])
training on  cuda
epoch 1, loss 0.9226, train acc 0.641, test acc 0.691,  time 5.0 sec
epoch 2, loss 0.3795, train acc 0.717, test acc 0.726,  time 6.1 sec
epoch 3, loss 0.2283, train acc 0.740, test acc 0.743,  time 16.3 sec
epoch 4, loss 0.1577, train acc 0.757, test acc 0.760,  time 19.2 sec
epoch 5, loss 0.1179, train acc 0.771, test acc 0.770,  time 17.9 sec
