# 1 Convolutional layer
$[B, C_i,H_i, W_i]\rightarrow[B, C_o,H_o,W_o]$

$$H_o=(H_i+2*p_H-k_H+1)/s_H\\W_o=(W_i+2*p_W-k_W+1)/s_W$$

(10+2*1-3+1)/2=5

(10+2*2-3+1)/1=12

In [1]:
import torch
from torch import nn

# kernel_size, stride, padding can also be int
net = nn.Conv2d(in_channels=1, out_channels=3, kernel_size=(3, 3), stride=(2, 1), padding = (1, 2))

# batch_size:8
# input channel:1
# height: 10
# width: 10
x = torch.randn((8, 1, 10, 10))

net(x).shape

torch.Size([8, 3, 5, 12])

# 2 Pooling layer
$[B,C,H_i,W_i]\rightarrow[B,C,H_o,W_o]$
$$H_o=(H_i-k_H)/s_H\\W_o=(W_i-k_W)/s_W$$
(5-2)/2=2

(12-3)/2=5

In [2]:
# kernel_size, stride can also be int
net2 = nn.MaxPool2d(kernel_size=(2, 3), stride=2)
net2(net(x)).shape

torch.Size([8, 3, 2, 5])

# 3 LeNet

In [3]:
import time
import torch
from torch import nn, optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv = nn.Sequential(
            # 28-5+1=24
            nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5),
            nn.Sigmoid(),
            # 24/2=12
            nn.MaxPool2d(kernel_size=2, stride=2),
            # 12-5+1=8
            nn.Conv2d(6, 16, 5),
            nn.Sigmoid(),
            # 8/2=4
            nn.MaxPool2d(2, 2)
        )
        self.fc = nn.Sequential(
            nn.Linear(16*4*4, 120),
            nn.Sigmoid(),
            nn.Linear(120, 84),
            nn.Sigmoid(),
            nn.Linear(84, 10)
        )
    def forward(self, img):
        feature = self.conv(img)
        output = self.fc(feature.view(img.shape[0], -1))
        return output

In [4]:
net = LeNet()
print(net)

LeNet(
  (conv): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (1): Sigmoid()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (4): Sigmoid()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=256, out_features=120, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=120, out_features=84, bias=True)
    (3): Sigmoid()
    (4): Linear(in_features=84, out_features=10, bias=True)
  )
)


In [5]:
def evaluate_accuracy(data_iter, net, device=None):
    if device is None and isinstance(net, torch.nn.Module):
        # Use the device net is on
        device = list(net.parameters())[0].device
    acc_sum, n = 0.0, 0
    with torch.no_grad():
        for X, y in data_iter:
            net.eval() # close Dropout
            y_hat = net(X.to(device))
            acc_sum += (y_hat.argmax(dim=1) == y.to(device)).float().sum().cpu().item()
            n += y.shape[0]
    return acc_sum / n

def train(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs):
    net = net.to(device)
    print(" training on ", device)
    loss = nn.CrossEntropyLoss()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, batch_count, start = 0.0, 0.0, 0, 0, time.time()
        for X, y in train_iter:
            X = X.to(device)
            y = y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            # We don not use to store them on GPU
            train_l_sum += l.cpu().item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
            n += y.shape[0]
            batch_count += 1
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
              % (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start))

In [6]:
import torchvision
import torchvision.transforms as transforms
lr, num_epochs = 0.001, 5
batch_size = 256

train_data = torchvision.datasets.FashionMNIST(root="./data/FashionMNIST", train=True, transform=transforms.ToTensor(), download=True)
test_data = torchvision.datasets.FashionMNIST(root="./data/FashionMNIST", train=False, transform=transforms.ToTensor(), download=True)

train_iter = torch.utils.data.DataLoader(train_data, batch_size, shuffle=True)
test_iter = torch.utils.data.DataLoader(test_data, batch_size)
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
train(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)

 training on  cuda
epoch 1, loss 1.9093, train acc 0.307, test acc 0.562, time 13.3 sec
epoch 2, loss 0.9618, train acc 0.620, test acc 0.660, time 12.4 sec
epoch 3, loss 0.7688, train acc 0.716, test acc 0.730, time 11.6 sec
epoch 4, loss 0.6764, train acc 0.742, test acc 0.748, time 12.5 sec
epoch 5, loss 0.6185, train acc 0.761, test acc 0.759, time 12.2 sec
