# 0 DataLoader for FashionMNIST

In [1]:
import torchvision
import torchvision.transforms as transforms
import torch
from torch.utils.data import DataLoader 

def load_data_fashion_mnist(batch_size, resize=None):
    # compose transforms.Resize() and transforms.ToTensor() together
    trans = []
    if resize:
        trans.append(transforms.Resize(size=resize))
    trans.append(transforms.ToTensor())
    # compose transforms.Resize() and transforms.ToTensor() together
    transform = transforms.Compose(trans)
    train_data = torchvision.datasets.FashionMNIST(root = "./data/FashionMNIST", train=True, transform=transform, download=True)
    test_data = torchvision.datasets.FashionMNIST(root = "./data/FashionMNIST", train=False, transform=transform, download=True)

    train_iter = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=4)
    test_iter = DataLoader(test_data, batch_size=batch_size, shuffle=False, num_workers=4)

    return train_iter, test_iter

# 1 AlexNet(simplified)
1. AlexNet uses ReLU insted of sigmoid
2. AlexNet uses Dropout

In [2]:
import time
import torch
from torch import nn, optim
import torchvision

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet, self).__init__()
        self.conv = nn.Sequential(
            #1*227*227->96*55*55
            nn.Conv2d(in_channels=1, out_channels=96, kernel_size=11, stride=4),
            nn.ReLU(),
            #96*55*55->96*27*27
            nn.MaxPool2d(kernel_size=3, stride=2),
            #96*27*27->256*27*27
            nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            #256*27*27->256*13*13
            nn.MaxPool2d(kernel_size=3, stride=2),
            #256*13*13->384*13*13
            nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(),
            #384*13*13->384*13*13
            nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            #384*13*13->256*13*13
            nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            #256*13*13->256*6*6
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.fc = nn.Sequential(
            nn.Linear(256*6*6, 4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, 10)
        )

    def forward(self, img):
        feature = self.conv(img)
        output = self.fc(feature.view(img.shape[0], -1))
        return output

In [3]:
net = AlexNet()
print(net)

AlexNet(
  (conv): Sequential(
    (0): Conv2d(1, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): LeakyReLU(negative_slope=0.01)
    (8): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=9216, out_features=4096, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=4096, out_features=4096, bias=True)
    (4): ReLU()
    (5): Dropout(p=0

In [4]:
batch_size = 128
train_iter, test_iter = load_data_fashion_mnist(batch_size=batch_size, resize=227)

In [5]:
def evaluate_accuracy(data_iter, net, device=None):
    if device is None and isinstance(net, torch.nn.Module):
        # Use the device net is on
        device = list(net.parameters())[0].device
    acc_sum, n = 0.0, 0
    with torch.no_grad():
        for X, y in data_iter:
            net.eval() # close Dropout
            y_hat = net(X.to(device))
            acc_sum += (y_hat.argmax(dim=1) == y.to(device)).float().sum().cpu().item()
            n += y.shape[0]
            net.train() # reuse Dropout
    return acc_sum / n

def train(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs):
    net = net.to(device)
    print(" training on ", device)
    loss = nn.CrossEntropyLoss()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, batch_count, start = 0.0, 0.0, 0, 0, time.time()
        for X, y in train_iter:
            X = X.to(device)
            y = y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            # We do not use to store them on GPU
            train_l_sum += l.cpu().item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
            n += y.shape[0]
            batch_count += 1
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
              % (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start))


In [6]:
lr, num_epochs = 0.001, 2
optimizer = torch.optim.Adam(net.parameters(), lr=lr)

train(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)

 training on  cuda
epoch 1, loss 0.6048, train acc 0.771, test acc 0.874, time 1016.0 sec


KeyboardInterrupt: 