In [2]:
import time
import torch
import torch.nn as nn
import torchvision
import torch.utils.data as Data
import torchvision.transforms as transforms

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1,6,5), # in_channels, out_channels, kerner_size
            nn.Sigmoid(),
            nn.MaxPool2d(2,2), # kernel_size, stride
            nn.Conv2d(6,16,5),
            nn.Sigmoid(),
            nn.MaxPool2d(2,2)
        )
        
        self.fc = nn.Sequential(
            nn.Linear(16*4*4, 120),
            nn.Sigmoid(),
            nn.Linear(120,84),
            nn.Sigmoid(),
            nn.Linear(84,10),
        )
    
    def forward(self, img):
        feature = self.conv(img)
        output = self.fc(feature.view(img.shape[0], -1))
        return output

In [4]:
net = LeNet()
print(net)

LeNet(
  (conv): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (1): Sigmoid()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (4): Sigmoid()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=256, out_features=120, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=120, out_features=84, bias=True)
    (3): Sigmoid()
    (4): Linear(in_features=84, out_features=10, bias=True)
  )
)


In [30]:
batch_size = 256
transform = transforms.ToTensor()
root=r'D:\notebook_canticle\Datasets\fmnist/'
fmnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, transform=transform)
fmnist_test  = torchvision.datasets.FashionMNIST(root=root, train=False, transform=transform)
train_iter = Data.DataLoader(fmnist_train, batch_size=batch_size, shuffle=True)
test_iter  = Data.DataLoader(fmnist_test,  batch_size=batch_size, shuffle=False)

In [34]:
def evaluate_accuracy( data_iter, net,
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') ):
    acc_sum, n = 0.0, 0
    with torch.no_grad():
        for X, y in data_iter:
            
            net.eval()
            acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
            net.train()
            
            n += y.shape[0]
    return acc_sum/n

In [36]:
net = net.to(device)
print("Training on", device)

num_epochs, lr = 5, 0.001
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
loss_func = nn.CrossEntropyLoss()

for epoch in range(num_epochs):
    batch_count, train_l_sum, train_acc_sum, n, start = 0, 0.0, 0.0, 0, time.time()
    for X, y in train_iter:
        X = X.to(device)
        y = y.to(device)
        y_hat = net(X)
        loss = loss_func(y_hat, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_l_sum += loss.cpu().item()
        train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
        n += y.shape[0]
        batch_count += 1
    
    test_acc = evaluate_accuracy(test_iter, net)
    print("epoch %d, loss %.4f, train_acc %.4f, test_acc %.4f, time %.1f secs"
          % (epoch+1, train_l_sum/batch_count, train_acc_sum/n, test_acc, time.time()-start))
    

Training on cuda
epoch 1, loss 0.7640, train_acc 0.7134, test_acc 0.7254, time 6.3 secs
epoch 2, loss 0.6860, train_acc 0.7357, test_acc 0.7394, time 6.2 secs
epoch 3, loss 0.6359, train_acc 0.7514, test_acc 0.7533, time 6.3 secs
epoch 4, loss 0.5997, train_acc 0.7646, test_acc 0.7640, time 6.1 secs
epoch 5, loss 0.5717, train_acc 0.7752, test_acc 0.7770, time 6.1 secs
