In [1]:
from torch import nn, optim
import torch
import torchvision
import numpy as np
import time

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
batch_size = 128

In [2]:
def load_data_fashion_mnist(batch_size=batch_size, resize=None, root='D:/Datasets'):
    trans = []
    if resize:
        trans.append(torchvision.transforms.Resize(size=resize))
    trans.append(torchvision.transforms.ToTensor())
    transform = torchvision.transforms.Compose(trans)
    data_train = torchvision.datasets.FashionMNIST(train=True,
                                                root='D:/Datasets',
                                                download=True, 
                                                transform=transform)
    data_test = torchvision.datasets.FashionMNIST(train=False,
                                                root='D:/Datasets',
                                                download=True, 
                                                transform=transform)
    train_iter = torch.utils.data.DataLoader(data_train, 
                                            shuffle=True, 
                                            batch_size=batch_size)
    test_iter = torch.utils.data.DataLoader(data_test, 
                                            shuffle=True, 
                                            batch_size=batch_size)
    feature, label = data_train[0]
    print(feature.shape, label) # Channel x Height X Width
    return train_iter, test_iter

In [3]:
batch_size = 128
train_iter, test_iter = load_data_fashion_mnist(batch_size=batch_size,resize=224)

torch.Size([1, 224, 224]) 9


In [5]:
train_iter.dataset

Dataset FashionMNIST
    Number of datapoints: 60000
    Root location: D:/Datasets
    Split: Train
    StandardTransform
Transform: Compose(
               Resize(size=224, interpolation=PIL.Image.BILINEAR)
               ToTensor()
           )

In [7]:
torch.cuda.is_available()

True

In [9]:
torch.cuda.device_count()
torch.cuda.get_device_name(0)

'GeForce GTX 1660 Ti'

In [10]:
def evaluate_accuracy(data_iter, net, device = torch.device('cuda' 
                                        if torch.cuda.is_available()
                                                           else 'cpu')):
    acc_sum, n = 0.0, 0
    with torch.no_grad():
        for X, y in data_iter:
            if isinstance(net, torch.nn.Module):
                net.eval()
                acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
                net.train()
            else:
                if('is_training' in net.__code__.co_varnames):
                    acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item()
                else:
                    acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
            n += y.shape[0]
    return acc_sum/n


In [31]:
def train_model(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs):
    net = net.to(device)
    print("training on ..", device)
    loss = torch.nn.CrossEntropyLoss()
    batch_count = 0
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start= 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            X = X.to(device)
            y = y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            train_l_sum += l.cpu().item()
            train_acc_sum += (y_hat.argmax(1) == y).float().sum().cpu().item()
            n += y.shape[0]
            batch_count += 1
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f,time %.1f sec' 
              % (epoch + 1, train_l_sum / batch_count,
train_acc_sum / n, test_acc, time.time() - start))   
    
    

In [18]:
for X, y in train_iter:
    print("X:",X.shape,"y:",y.shape)
    break

X: torch.Size([128, 1, 224, 224]) y: torch.Size([128])


In [26]:
class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet, self).__init__()
        self.conv = nn.Sequential(
            # in_channels, out_channels,kernel_size, stride, padding
            nn.Conv2d(1, 96, 11, 4),
            nn.ReLU(),
            nn.MaxPool2d(3, 2), # kernel_size, stride
            # 减⼩卷积窗⼝，使⽤填充为2来使得输⼊与输出的⾼和宽⼀致，且增⼤输出通道数
            nn.Conv2d(96, 256, 5, 1, 2),
            nn.ReLU(),
            nn.MaxPool2d(3, 2),
            # 连续3个卷积层，且使⽤更⼩的卷积窗⼝。除了最后的卷积层外，进⼀步增⼤了输出通道数。
            # 前两个卷积层后不使⽤池化层来减⼩输⼊的⾼和宽
            nn.Conv2d(256, 384, 3, 1, 1),
            nn.ReLU(),
            nn.Conv2d(384, 384, 3, 1, 1),
            nn.ReLU(),
            nn.Conv2d(384, 256, 3, 1, 1),
            nn.ReLU(),
            nn.MaxPool2d(3, 2)
        )
        self.fc = nn.Sequential(
            nn.Linear(256*5*5, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            # 输出层。由于这⾥使⽤Fashion-MNIST，所以⽤类别数为10，⽽⾮论⽂中的1000
            nn.Linear(4096, 10),
        )
    def forward(self, img):
        feature = self.conv(img)
        output = self.fc(feature.view(img.shape[0], -1))
        return output

In [27]:
net = AlexNet()
print(net)

AlexNet(
  (conv): Sequential(
    (0): Conv2d(1, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=6400, out_features=4096, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=4096, out_features=4096, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.5, inplace=False)
    (

In [32]:
lr, num_epochs = 0.001, 5
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
train_model(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)

training on .. cuda
epoch 1, loss 0.3501, train acc 0.870, test acc 0.881,time 83.7 sec
epoch 2, loss 0.1476, train acc 0.891, test acc 0.893,time 83.6 sec
epoch 3, loss 0.0879, train acc 0.903, test acc 0.903,time 82.7 sec
epoch 4, loss 0.0609, train acc 0.910, test acc 0.902,time 81.4 sec
epoch 5, loss 0.0450, train acc 0.916, test acc 0.909,time 81.3 sec
