In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt

In [2]:
# official code (reshape to 28x28)
def load_mnist(path, kind='train'):
    import os
    import gzip
    import numpy as np

    """Load MNIST data from `path`"""
    labels_path = os.path.join(path,
                               '%s-labels-idx1-ubyte.gz'
                               % kind)
    images_path = os.path.join(path,
                               '%s-images-idx3-ubyte.gz'
                               % kind)

    with gzip.open(labels_path, 'rb') as lbpath:
        labels = np.frombuffer(lbpath.read(), dtype=np.uint8,
                               offset=8)

    with gzip.open(images_path, 'rb') as imgpath:
        images = np.frombuffer(imgpath.read(), dtype=np.uint8,
                               offset=16).reshape(len(labels), 28, 28)

    return images, labels

In [3]:
def load_data():
    import copy

    X_train, y_train = load_mnist('./data/Fashion', kind='train')
    X_test, y_test = load_mnist('./data/Fashion', kind='t10k')
    
    X_train = copy.deepcopy(X_train)
    y_train = copy.deepcopy(y_train)
    X_test = copy.deepcopy(X_test)
    y_test = copy.deepcopy(y_test)
    
    def expand_x_dims(x):
        new_x = []
        for i in range(len(x)):
            x0 = x[i] / 255
            x0 = np.expand_dims(x0, axis=0)
            new_x.append(x0)
        
        return np.array(new_x)
    
    X_train = expand_x_dims(X_train)
    X_test = expand_x_dims(X_test)
    
    return X_train, y_train, X_test, y_test

In [4]:
X_train, y_train, X_test, y_test = load_data()

In [5]:
def make_loader(X_train, y_train, X_test, y_test):
    from torch.utils.data import TensorDataset, DataLoader
    
    X_train = torch.tensor(X_train, dtype=torch.float32)
    y_train = torch.tensor(y_train, dtype=torch.int64)
    X_test = torch.tensor(X_test, dtype=torch.float32)
    y_test = torch.tensor(y_test, dtype=torch.int64)
    
    train_set = TensorDataset(X_train, y_train)
    test_set = TensorDataset(X_test, y_test)
    
    train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
    test_loader = DataLoader(test_set, batch_size=64, shuffle=True)
    
    return train_loader, test_loader

In [6]:
train_loader, test_loader = make_loader(X_train, y_train, X_test, y_test)

In [7]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=3)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=3)
        self.fc1 = nn.Linear(in_features=12*5*5, out_features=100)
        self.fc2 = nn.Linear(in_features=100, out_features=50)
        self.out = nn.Linear(in_features=50, out_features=10)
        
    def forward(self, t):
        # 1st convolutional layer
        t = F.relu(self.conv1(t))
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        # 2nd convolutional layer
        t = F.relu(self.conv2(t))
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        # reshaping
        t = t.reshape(-1, 12*5*5)
        
        # 1st fully-connected layer
        t = F.relu(self.fc1(t))
        
        # 2nd fully-connected layer
        t = F.relu(self.fc2(t))
        
        # output layer
        t = self.out(t)
        
        return t

``` python
# 加载已有模型 method 1
network1 = CNN()
network1.load_state_dict(torch.load('./data/CNN-statedict.pth'))
network1.eval()

# 加载已有模型 method 2
network2 = torch.load('./data/CNN-model.pth')
network2.eval()
```

In [8]:
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [None]:
# 自主训练模型
network = CNN()
optimizer = optim.Adam(network.parameters(), lr=0.01)
epoch_num = 50

for epoch in range(epoch_num):
    
    total_loss = 0
    total_correct = 0
    
    for batch in train_loader:    # Get the batch
        images, labels = batch
        
        preds = network(images)   # Pass the batch
        loss = F.cross_entropy(preds, labels)    # Calculate the loss


        optimizer.zero_grad()
        loss.backward()    # Calculate the gradients
        optimizer.step()   # Update the weights

        total_loss += loss.item()
        total_correct += get_num_correct(preds, labels)
        
    print("Epoch:{} , accuracy:{:.2f}% , loss:{}".format(epoch, total_correct/len(train_set)*100, total_loss))

标记 |    0    |    1    |    2     |   3   |  4   |   5    |   6   |    7    |  8  |     9    
:--:| :-----: |:-------:|:--------:|:-----:|:----:|:------:|:-----:|:-------:|:---:|:---------:
类别 | T-shirt | Trouser | Pullover | Dress | Coat | Sandal | Shirt | Sneaker | Bag | Ankle boot

In [None]:
# 可视化卷积层特征
sample_batch = next(iter(train_loader))
images, labels = sample_batch

In [None]:
def feature_visualize(net, layer_num, net_input):
    def get_one_layer(net, layer_num, net_input):
        # 1st convolutional layer
        t = F.relu(net.conv1(net_input))
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        if layer_num == 1:
            return t
        
        # 2nd convolutional layer
        t = F.relu(net.conv2(t))
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        if layer_num == 2:
            return t
        
        # reshaping
        t = t.reshape(-1, 12*5*5)
        
        # 1st fully-connected layer
        t = F.relu(net.fc1(t))
        if layer_num == 3:
            return t
        
        # 2nd fully-connected layer
        t = F.relu(net.fc2(t))
        if layer_num == 4:
            return t
        
        # output layer
        t = net.out(t)
        return t
    
    # visualization using matplotlib
    x = get_one_layer(net, layer_num, net_input)
    x = x.squeeze().detach().numpy()
    fig, ax = plt.subplots(nrows=1, ncols=6, figsize = (12, 2), sharex='col', sharey='row')
    for i in range(6):
        ax[i].imshow(x[i])
    plt.show()

In [None]:
feature_visualize(network, layer_num=1, net_input=images[0].unsqueeze(0))

In [None]:
feature_visualize(network, layer_num=2, net_input=images[0].unsqueeze(0))

In [None]:
for name, param in model.named_parameters():
    print(name, '->', param.size())