In [35]:
import torch
import torchvision
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from torch import nn, optim
import time
import tqdm
import torch.nn.functional as F
import torch.utils.data

%matplotlib inline

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [36]:
class FlattenLayer(torch.nn.Module):
    def __init__(self):
        super(FlattenLayer, self).__init__()
    def forward(self, x):
        return x.view(x.shape[0],-1)

In [37]:
def nin_block(in_channels, out_channels, kernel_size, stride, padding):
    blk = nn.Sequential(nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding),
                        nn.ReLU(),
                        nn.Conv2d(out_channels, out_channels, kernel_size=1),
                        nn.ReLU(),
                        nn.Conv2d(out_channels, out_channels, kernel_size=1),
                        nn.ReLU(),
                       )
    return blk

In [38]:
class GlobalAvgPool2d(nn.Module):
    def __init__(self):
        super(GlobalAvgPool2d, self).__init__()
    def forward(self, x):
        return F.avg_pool2d(x, kernel_size=x.size()[2:])

In [39]:
net = nn.Sequential(
    nin_block(1, 96, kernel_size=11, stride=4, padding=0),
    nn.MaxPool2d(kernel_size=3, stride=2),
    nin_block(96, 256, kernel_size=5, stride=1, padding=2),
    nn.MaxPool2d(kernel_size=3, stride=2),
    nin_block(256, 384, kernel_size=3, stride=1, padding=1),
    nn.MaxPool2d(kernel_size=3, stride=2),
    nn.Dropout(0.5),
    
    nin_block(384, 10, kernel_size=3, stride=1, padding=1),
    GlobalAvgPool2d(),
    FlattenLayer()
)

In [40]:
X = torch.rand(1, 1, 224, 224)
for name, blk in net.named_children():
    X = blk(X)
    print(name, 'output shape;', X.shape)

0 output shape; torch.Size([1, 96, 54, 54])
1 output shape; torch.Size([1, 96, 26, 26])
2 output shape; torch.Size([1, 256, 26, 26])
3 output shape; torch.Size([1, 256, 12, 12])
4 output shape; torch.Size([1, 384, 12, 12])
5 output shape; torch.Size([1, 384, 5, 5])
6 output shape; torch.Size([1, 384, 5, 5])
7 output shape; torch.Size([1, 10, 5, 5])
8 output shape; torch.Size([1, 10, 1, 1])
9 output shape; torch.Size([1, 10])


In [41]:
batch_size = 128

def load_data_fmnist(batch_size, resize=None, root=r'D:\notebook_canticle\Datasets\fmnist/'):
    trans = []
    if resize:
        trans.append(torchvision.transforms.Resize(size=resize))
    trans.append(torchvision.transforms.transforms.ToTensor())

    transform = torchvision.transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, transform=transform)
    mnist_test  = torchvision.datasets.FashionMNIST(root=root, train=False, transform=transform)
    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True)
    test_iter  = torch.utils.data.DataLoader(mnist_test,  batch_size=batch_size, shuffle=False)
    
    return train_iter, test_iter

train_iter, test_iter = load_data_fmnist(batch_size, 224)

In [42]:
def evaluate_accuracy(data_iter, net, device):
    acc_sm, n = 0.0, 0
    with torch.no_grad():
        for X, y in data_iter:
            net.eval()
            acc_sm += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
            net.train()
            n += y.shape[0]
    return acc_sm / n

In [43]:
def train_model(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs):
    net = net.to(device)
    print('train on: ', device)
    loss = torch.nn.CrossEntropyLoss()
    for epoch in range(num_epochs):
        batch_ct, train_l_sm, train_acc_sm, n, start = 0, 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            X = X.to(device)
            y = y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            
            train_l_sm += l.cpu().item()
            train_acc_sm += (y_hat.argmax(dim=1) == y).sum().cpu().item()
            n += y.shape[0]
            batch_ct += 1
        test_acc = evaluate_accuracy(test_iter, net, device)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
              %(epoch+1, train_l_sm/batch_ct, train_acc_sm/n, test_acc, time.time()-start)) 

In [44]:
num_epochs, lr = 3, 0.002
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
train_model(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)

train on:  cuda
epoch 1, loss 1.2842, train acc 0.543, test acc 0.751, time 91.9 sec
epoch 2, loss 0.6163, train acc 0.783, test acc 0.812, time 92.6 sec
epoch 3, loss 0.5045, train acc 0.817, test acc 0.821, time 92.2 sec
