## First Neural Network: Image Classification 

Objectives:
- Train a minimal image classifier on [MNIST](https://paperswithcode.com/dataset/mnist) using PyTorch
- Usese PyTorch and torchvision

In [None]:
# The usual imports
import torch # 导包
import torch.nn as nn # 导包
import torchvision # 导包
import torchvision.transforms as transforms # 导包

In [None]:
# load the data
class ReshapeTransform:
    def __init__(self, new_size):
        self.new_size = new_size # 参数初始化

    def __call__(self, img):
        return torch.reshape(img, self.new_size) # 重塑形状

transformations = transforms.Compose([
                                transforms.ToTensor(),
                                transforms.ConvertImageDtype(torch.float32),
                                ReshapeTransform((-1,))
                                ]) # 数据集转换，将shape:[28,28]转为shape:[784]

trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transformations) # 加载训练集

testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transformations) # 加载测试集

In [None]:
# check shape of data
trainset.data.shape, testset.data.shape # 训练集和测试集的形状

In [None]:
# data loader
BATCH_SIZE = 128 # 设置batch_size
train_dataloader = torch.utils.data.DataLoader(trainset, 
                                               batch_size=BATCH_SIZE,
                                               shuffle=True, 
                                               num_workers=0) # 训练集加载器

test_dataloader = torch.utils.data.DataLoader(testset, 
                                              batch_size=BATCH_SIZE,
                                              shuffle=False, 
                                              num_workers=0) # 测试集加载器

In [None]:
# model
# ReLU()是一种激活函数，表达形式为$f(x)=max(0,x)$，负半区导数为0，正半区导数为1，正半区梯度为0。https://zhuanlan.zhihu.com/p/428448728
model = nn.Sequential(nn.Linear(784, 512), nn.ReLU(), nn.Linear(512, 10)) # 第一层线性模型(输入特征784(28*28)，输出512)，第二层ReLU，第三层线性模型(in 512 out 10)

In [None]:
# training preparation
trainer = torch.optim.RMSprop(model.parameters()) # 优化器，RMS(root mean square)均方根
loss = nn.CrossEntropyLoss() # 交叉熵损失

In [None]:
def get_accuracy(output, target, batch_size):
    # Obtain accuracy for training round
    corrects = (torch.max(output, 1)[1].view(target.size()).data == target.data).sum() # 累加分类正确的
    accuracy = 100.0 * corrects/batch_size # 计算在一个batch中分类正确的百分比
    return accuracy.item() # 返回

In [None]:
# train
for ITER in range(5): # epoch=5
    train_acc = 0.0 # 训练精度累加初始化
    train_running_loss = 0.0 # 初始化loss

    model.train() # 训练模式
    for i, (X, y) in enumerate(train_dataloader): # 将训练数据集组合成可迭代序列，i为索引
        output = model(X) # 分类
        l = loss(output, y) # 计算loss

        # update the parameters
        l.backward() # 开启反向传播，梯度更新
        trainer.step() # 更新参数
        trainer.zero_grad() # 清空梯度

        # gather metrics
        train_acc += get_accuracy(output, y, BATCH_SIZE) # 累加精度
        train_running_loss += l.detach().item() # 累加loss

    print('Epoch: %d | Train loss: %.4f | Train Accuracy: %.4f' \
          %(ITER+1, train_running_loss / (i+1),train_acc/(i+1))) # 打印

In [None]:
# test
test_acc = 0.
model.eval()
for i, (X, y) in enumerate(test_dataloader): 
    with torch.no_grad(): 
        output = model(X)
        test_acc += get_accuracy(output, y, BATCH_SIZE)
    
print('Test Accuracy: %.4f' \
          %(test_acc/(i+1))) # 打印

### Other things to try

- Evaluate on test set
- Plot loss curve
- Add more layers to the model