In [1]:
batch_size = 256
num_workers = 4
lr = 1
num_epochs = 50

In [2]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

In [3]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv = nn.Sequential(nn.Conv2d(1, 6, 5),
                                  nn.Sigmoid(),
                                  nn.MaxPool2d(2, stride=2),
                                  nn.Conv2d(6, 16, 5),
                                  nn.Sigmoid(),
                                  nn.MaxPool2d(2, stride=2)
                                 )
        self.linear = nn.Sequential(nn.Linear(16 * 4 * 4, 120),
                                    nn.Sigmoid(),
                                    nn.Linear(120, 84),
                                    nn.Sigmoid(),
                                    nn.Linear(84, 10))
    
    def forward(self, x):
        b, c, h, w = x.shape #256 1 28 28
        feature = self.conv(x) #256, 16, 4, 4
        return self.linear(feature.view(b, -1))

In [4]:
mnist_train = torchvision.datasets.FashionMNIST(root='~/Datasets/FashionMNIST', train=True, download=True, transform=transforms.ToTensor())
mnist_test = torchvision.datasets.FashionMNIST(root='~/Datasets/FashionMNIST', train=False, download=True, transform=transforms.ToTensor())
print(len(mnist_train), len(mnist_test))
train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)

60000 10000


In [5]:
net = LeNet()
optimizer = torch.optim.SGD(net.parameters(), lr)
# (params, lr=<required parameter>, momentum=0, dampening=0, weight_decay=0, nesterov=False)
loss = torch.nn.CrossEntropyLoss()

In [6]:
def train_FashionMNIST(net, train_iter, optimizer):
    train_loss = 0.0
    train_acc = 0.0
    train_num = 0
    
    for X, y in train_iter:
        y_hat = net(X)
        l = loss(y_hat, y)
        optimizer.zero_grad()
        # net.zero_grad()
        l.backward()
        optimizer.step()
        
        train_loss += l.item()
        train_acc += (y_hat.argmax(dim=1) == y).sum().item()
        train_num += y.shape[0]
    
    train_loss /= train_num
    train_acc /= train_num
    print('train loss: %.4f, train acc: %.3f' % (train_loss, train_acc))

In [7]:
def test_FashionMNIST(net, test_iter):
    test_acc = 0.0
    test_num = 0
    
    for X, y in test_iter:
        y_hat = net(X)
        test_acc += (y_hat.argmax(dim=1) == y).sum().item()
        test_num += y.shape[0]
    
    test_acc /= test_num
    print('test acc: %.3f' % (test_acc))

In [8]:
for i in range(num_epochs):
    print(f'epoch: {i}')
    train_FashionMNIST(net, train_iter, optimizer)
    test_FashionMNIST(net, test_iter)
    print('----------------')

epoch: 0
train loss: 0.0091, train acc: 0.101
test acc: 0.100
----------------
epoch: 1
train loss: 0.0090, train acc: 0.100
test acc: 0.100
----------------
epoch: 2
train loss: 0.0090, train acc: 0.100
test acc: 0.100
----------------
epoch: 3
train loss: 0.0090, train acc: 0.101
test acc: 0.100
----------------
epoch: 4
train loss: 0.0090, train acc: 0.103
test acc: 0.174
----------------
epoch: 5
train loss: 0.0087, train acc: 0.173
test acc: 0.225
----------------
epoch: 6
train loss: 0.0061, train acc: 0.350
test acc: 0.469
----------------
epoch: 7
train loss: 0.0043, train acc: 0.552
test acc: 0.580
----------------
epoch: 8
train loss: 0.0036, train acc: 0.639
test acc: 0.653
----------------
epoch: 9
train loss: 0.0031, train acc: 0.690
test acc: 0.665
----------------
epoch: 10
train loss: 0.0028, train acc: 0.725
test acc: 0.735
----------------
epoch: 11
train loss: 0.0026, train acc: 0.741
test acc: 0.720
----------------
epoch: 12
train loss: 0.0024, train acc: 0.757
tes

In [None]:
# SGD: lr=1, epoch 47, test_acc=0.871