In [48]:
import torch
import torchvision
import torch.utils.data as Data
import matplotlib.pyplot as plt
%matplotlib inline
import torchvision.transforms as transforms
import numpy as np
import pandas as pd
from torch import nn, optim
import time
import torch.nn.functional as F
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [49]:
class FlattenLayer(nn.Module):
    def __init__(self):
        super(FlattenLayer, self).__init__()
    def forward(self, x):
        return x.view(x.shape[0], -1)

In [50]:
class GlobalAvgPool2d(nn.Module):
    def __init__(self):
        super(GlobalAvgPool2d, self).__init__()
    def forward(self, x):
        return F.avg_pool2d(x, kernel_size=x.size()[2:])

In [51]:
class Residual(nn.Module):
    def __init__(self, in_c, out_c, use_1x1conv=False, stride=1):
        super(Residual, self).__init__()
        self.conv1 = nn.Conv2d(in_c, out_c, kernel_size=3, padding=1, stride=stride)
        self.conv2 = nn.Conv2d(out_c, out_c, kernel_size=3, padding=1)
        if use_1x1conv:
            self.conv3 = nn.Conv2d(in_c, out_c, kernel_size=1, stride=stride)
        else:
            self.conv3 = None
        self.bn1 = nn.BatchNorm2d(out_c)
        self.bn2 = nn.BatchNorm2d(out_c)
    
    def forward(self, X):
        Y = F.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        return F.relu(Y+X)

In [52]:
blk = Residual(3,3)
X = torch.rand((4, 3, 6, 6))
blk(X).shape

torch.Size([4, 3, 6, 6])

In [53]:
net = nn.Sequential(
    nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2, padding=1))

In [54]:
def resnet_block(in_c, out_c, num_residuals, first_block=False):
    if first_block:
        assert in_c == out_c
    blk = []
    for i in range(num_residuals):
        if i == 0 and not first_block:
            blk.append(Residual(in_c, out_c, use_1x1conv=True, stride=2))
        else:
            blk.append(Residual(out_c, out_c))
    return nn.Sequential(*blk)

In [55]:
net.add_module("resnet_block1", resnet_block(64, 64, 2, first_block=True))
net.add_module("resnet_block2", resnet_block(64, 128, 2))
net.add_module("resnet_block3", resnet_block(128, 256, 2))
net.add_module("resnet_block4", resnet_block(256, 512, 2))

In [56]:
net.add_module("global_avg_pool", GlobalAvgPool2d())
net.add_module("fc", nn.Sequential(FlattenLayer(), nn.Linear(512, 10)))

In [57]:
X = torch.rand((1,1,224,224))
for name, layer in net.named_children():
    X = layer(X)
    print(name, ' output shape:\t', X.shape)

0  output shape:	 torch.Size([1, 64, 112, 112])
1  output shape:	 torch.Size([1, 64, 112, 112])
2  output shape:	 torch.Size([1, 64, 112, 112])
3  output shape:	 torch.Size([1, 64, 56, 56])
resnet_block1  output shape:	 torch.Size([1, 64, 56, 56])
resnet_block2  output shape:	 torch.Size([1, 128, 28, 28])
resnet_block3  output shape:	 torch.Size([1, 256, 14, 14])
resnet_block4  output shape:	 torch.Size([1, 512, 7, 7])
global_avg_pool  output shape:	 torch.Size([1, 512, 1, 1])
fc  output shape:	 torch.Size([1, 10])


In [58]:
batch_size = 256
rt = r'D:\notebook_canticle\Datasets\fmnist/'
def load_fm(rt, batch_size, resize=None):
    trans = []
    if resize:
        trans.append(transforms.Resize(size=resize))
    trans.append(transforms.ToTensor())
    transform = transforms.Compose(trans)
    fm_train = torchvision.datasets.FashionMNIST(root=rt, train=True, transform=transform)
    fm_test  = torchvision.datasets.FashionMNIST(root=rt, train=False, transform=transform)
    train_iter = Data.DataLoader(fm_train, batch_size=batch_size, shuffle=True)
    test_iter  = Data.DataLoader(fm_test,  batch_size=batch_size, shuffle=False)
    return train_iter, test_iter
train_iter, test_iter = load_fm(rt, batch_size, 96)

In [59]:
def evaluate_acc(net, data_iter, device):
    acc_sm, n = 0.0, 0
    with torch.no_grad():
        for X, y in data_iter:
            net.eval()
            acc_sm += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
            net.train()
            n += y.shape[0]
    return acc_sm/n

In [60]:
def train_model(net, optimizer, train_iter, test_iter, device, num_epochs):
    net = net.to(device)
    print('train on: ', device)
    loss = nn.CrossEntropyLoss()
    for epoch in range(num_epochs):
        train_l_sm, train_acc_sm, n, start, batch_ct = 0.0, 0.0, 0, time.time(), 0
        for X, y in train_iter:
            X = X.to(device); y = y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            
            train_l_sm += l.cpu().item()
            batch_ct += 1
            n += y.shape[0]
            train_acc_sm += (y_hat.argmax(dim=1) == y).sum().cpu().item()
        test_acc = evaluate_acc(net, test_iter, device)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.3f sec'
              %(epoch+1, train_l_sm/batch_ct, train_acc_sm/n, test_acc, time.time()-start))

In [61]:
num_epochs, lr = 5, 0.001
optimizer = optim.Adam(net.parameters(), lr=lr)
train_model(net, optimizer, train_iter, test_iter, device, num_epochs)

train on:  cuda
epoch 1, loss 0.4086, train acc 0.851, test acc 0.850, time 59.447 sec
epoch 2, loss 0.2498, train acc 0.907, test acc 0.901, time 59.352 sec
epoch 3, loss 0.2097, train acc 0.923, test acc 0.885, time 59.292 sec
epoch 4, loss 0.1813, train acc 0.933, test acc 0.900, time 59.470 sec
epoch 5, loss 0.1542, train acc 0.943, test acc 0.903, time 59.264 sec
