In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [2]:
batch_size = 256
num_workers = 4
lr = 0.01
num_epochs = 50

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from tqdm import tqdm

In [4]:
def conv_block(in_channels, out_channels):
    return nn.Sequential(
        nn.BatchNorm2d(in_channels),
        nn.ReLU(),
        nn.Conv2d(in_channels, out_channels, 3, 1, 1))

In [5]:
class DenseBlock(nn.Module):
    def __init__(self, num_convs, in_channels, out_channels):
        super(DenseBlock, self).__init__()
        blk = []
        for i in range(num_convs):
            blk.append(conv_block(in_channels, out_channels))
            in_channels = in_channels + out_channels
        self.net = nn.ModuleList(blk)
        self.out_channels = in_channels
        
    def forward(self, x):
        for blk in self.net:
            Y = blk(x)
            x = torch.cat((x, Y), dim=1)
        return x

In [6]:
def transition_block(in_channels, out_channels):
    return nn.Sequential(
        nn.BatchNorm2d(in_channels),
        nn.ReLU(),
        nn.Conv2d(in_channels, out_channels, 3, 1, 1),
        nn.MaxPool2d(2, 2))

In [7]:
class DenseNet(nn.Module):
    def __init__(self, num_channels, growth_rate, num_convs_in_dense_block):
        super(DenseNet, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, num_channels, 7, 2, 3),
            nn.BatchNorm2d(num_channels),
            nn.ReLU(),
            nn.MaxPool2d(3, 2))
        blk = []
        for i, num_convs in enumerate(num_convs_in_dense_block):
            DB = DenseBlock(num_convs, num_channels, growth_rate)
            blk.append(DB)
            num_channels = DB.out_channels
            if i != len(num_convs_in_dense_block) - 1:
                blk.append(transition_block(num_channels, num_channels // 2))
                num_channels = num_channels // 2
        blk.append(nn.BatchNorm2d(num_channels))
        blk.append(nn.ReLU())
        blk.append(nn.AdaptiveAvgPool2d(1))
        self.dense = nn.Sequential(*blk)
        self.fc = nn.Linear(248, 10)
    
    def forward(self, x):
        b, c, h, w = x.shape
        x = self.conv1(x)
        x = self.dense(x)
        return self.fc(x.view(b, -1))

In [8]:
net = DenseNet(64, 32, [4, 4, 4, 4]).cuda()
print(net)
optimizer = torch.optim.Adam(net.parameters(), lr)
loss = torch.nn.CrossEntropyLoss()

DenseNet(
  (conv1): Sequential(
    (0): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (dense): Sequential(
    (0): DenseBlock(
      (net): ModuleList(
        (0): Sequential(
          (0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (1): ReLU()
          (2): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        )
        (1): Sequential(
          (0): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (1): ReLU()
          (2): Conv2d(96, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        )
        (2): Sequential(
          (0): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (1): ReLU()
          (2): Conv2d(

In [9]:
trans = []
trans.append(transforms.Resize((96, 96)))
trans.append(transforms.ToTensor())
transform = transforms.Compose(trans)
mnist_train = torchvision.datasets.FashionMNIST(root='~/Datasets/FashionMNIST', train=True, download=True, transform=transform)
mnist_test = torchvision.datasets.FashionMNIST(root='~/Datasets/FashionMNIST', train=False, download=True, transform=transform)
print(len(mnist_train), len(mnist_test))
train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)

60000 10000


In [10]:
def train_FashionMNIST(net, train_iter, optimizer):
    train_loss = 0.0
    train_acc = 0.0
    train_num = 0
    
    for X, y in train_iter:
        X = X.cuda()
        y = y.cuda()
        y_hat = net(X)
        l = loss(y_hat, y)
        optimizer.zero_grad()
        # net.zero_grad()
        l.backward()
        optimizer.step()
        
        train_loss += l.item()
        train_acc += (y_hat.argmax(dim=1) == y).sum().item()
        train_num += y.shape[0]
    
    train_loss /= train_num
    train_acc /= train_num
    print('train loss: %.4f, train acc: %.3f' % (train_loss, train_acc))

In [11]:
def test_FashionMNIST(net, test_iter):
    test_acc = 0.0
    test_num = 0
    
    for X, y in test_iter:
        X = X.cuda()
        y = y.cuda()
        y_hat = net(X)
        test_acc += (y_hat.argmax(dim=1) == y).sum().item()
        test_num += y.shape[0]
    
    test_acc /= test_num
    print('test acc: %.3f' % (test_acc))

In [12]:
for i in range(num_epochs):
    print(f'epoch: {i}')
    train_FashionMNIST(net, train_iter, optimizer)
    test_FashionMNIST(net, test_iter)
    print('----------------')

epoch: 0
train loss: 0.0019, train acc: 0.818
test acc: 0.887
----------------
epoch: 1
train loss: 0.0010, train acc: 0.903
test acc: 0.901
----------------
epoch: 2
train loss: 0.0008, train acc: 0.920
test acc: 0.911
----------------
epoch: 3
train loss: 0.0007, train acc: 0.931
test acc: 0.921
----------------
epoch: 4
train loss: 0.0007, train acc: 0.938
test acc: 0.914
----------------
epoch: 5
train loss: 0.0006, train acc: 0.945
test acc: 0.917
----------------
epoch: 6
train loss: 0.0005, train acc: 0.951
test acc: 0.927
----------------
epoch: 7
train loss: 0.0005, train acc: 0.956
test acc: 0.926
----------------
epoch: 8
train loss: 0.0004, train acc: 0.963
test acc: 0.925
----------------
epoch: 9
train loss: 0.0003, train acc: 0.967
test acc: 0.924
----------------
epoch: 10
train loss: 0.0003, train acc: 0.973
test acc: 0.925
----------------
epoch: 11
train loss: 0.0002, train acc: 0.976
test acc: 0.925
----------------
epoch: 12
train loss: 0.0002, train acc: 0.981
tes

In [13]:
# Adam: lr=0.001, epoch 181, test_acc=0.938, batch_size=256
# Adam: lr=0.01, epoch 0, test_acc=0.100, batch_size=256
# Adam: lr=0.0005, epoch 25, test_acc=0.918, batch_size=256