In [42]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
%matplotlib inline

In [43]:
# device config
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyper parameters
num_epochs = 5
num_classes = 10
batch_size = 100
learning_rate = 0.001

In [44]:
# MNIST dataset 
train_dataset = torchvision.datasets.MNIST(root='./data/',
                                           train=True,
                                           download=True,
                                           transform=transforms.ToTensor()
                                           )

test_dataset = torchvision.datasets.MNIST(root='./data/',
                                           train=False,
                                           transform=transforms.ToTensor()
                                           )

# data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           shuffle=True,
                                           batch_size=batch_size
                                           #num_workers=2
                                           )

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                           shuffle=False,
                                           batch_size=batch_size
                                           #num_workers=2
                                           )

In [45]:
# convolutional NN (2 layers)
class ConvNN(nn.Module):
    def __init__(self, num_classes=10):
        super(ConvNN, self).__init__()
        self.layer1 = nn.Sequential(
                      # Conv2d(Channels, Kernel_num)
                      nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2),
                      nn.BatchNorm2d(16),
                      nn.ReLU(),
                      nn.MaxPool2d(kernel_size=2, stride=2)
                                    ) 
        self.layer2 = nn.Sequential(
                      nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),
                      nn.BatchNorm2d(32),
                      nn.ReLU(),
                      nn.MaxPool2d(kernel_size=2, stride=2)
                                    )
        # layer1,2 output size=(28 - 5 + 2p) + 1 = 28

        self.fc = nn.Linear(32*7*7, num_classes)        # Linear(in, out)   
    
    def forward(self, x):
        out1 = self.layer1(x)
        out2 = self.layer2(out1)
        out2 = out2.reshape(out2.size(0), -1)  # flatten the data(batchsize, 1d)
        out = self.fc(out2)
        return out

model = ConvNN(num_classes) 
# model = ConvNN(num_classes).to(device)    # use gpu

model.train()       # BN and dropout used train model

# define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# train model
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # Note, send x into cnn, don't care the dimension of input
        # but send x into fc, should change input x into (-1, fc_inputsize)
        # images = images.to(device)    # use gpu
        # labels = labels.to(device)    # use gpu

        # forward
        output = model(images)
        loss = criterion(output, labels)

        # backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

Epoch [1/5], Step [100/600], Loss: 0.1875
Epoch [1/5], Step [200/600], Loss: 0.0777
Epoch [1/5], Step [300/600], Loss: 0.1983
Epoch [1/5], Step [400/600], Loss: 0.1088
Epoch [1/5], Step [500/600], Loss: 0.0877
Epoch [1/5], Step [600/600], Loss: 0.0621
Epoch [2/5], Step [100/600], Loss: 0.0509
Epoch [2/5], Step [200/600], Loss: 0.0090
Epoch [2/5], Step [300/600], Loss: 0.0413
Epoch [2/5], Step [400/600], Loss: 0.0361
Epoch [2/5], Step [500/600], Loss: 0.1294
Epoch [2/5], Step [600/600], Loss: 0.0178
Epoch [3/5], Step [100/600], Loss: 0.0168
Epoch [3/5], Step [200/600], Loss: 0.0273
Epoch [3/5], Step [300/600], Loss: 0.0072
Epoch [3/5], Step [400/600], Loss: 0.0048
Epoch [3/5], Step [500/600], Loss: 0.0041
Epoch [3/5], Step [600/600], Loss: 0.0142
Epoch [4/5], Step [100/600], Loss: 0.0397
Epoch [4/5], Step [200/600], Loss: 0.0056
Epoch [4/5], Step [300/600], Loss: 0.0443
Epoch [4/5], Step [400/600], Loss: 0.0290
Epoch [4/5], Step [500/600], Loss: 0.0113
Epoch [4/5], Step [600/600], Loss:

BN和Dropout一般有两种模式，一个是训练时使用（训练前加上model.train()），
另一个测试时使用，**测试前要加上model.eval()**，使batch Norm使用训练好的值而非求平均值。


*   ***model.eval()*** will notify all your layers that you are in eval mode, that way, batchnorm or dropout layers will work in eval mode instead of training mode.

为什么测试不使用梯度
*   ***torch.no_grad()*** impacts the autograd engine and deactivate it. It will reduce memory usage and speed up computations but you won’t be able to backprop (which you don’t want in an eval script).

In [41]:
# test model
model.eval() 
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        #images = images.to(device)
        #labels = labels.to(device)
        output = model(images)
        prob, predict = torch.max(output, 1) 
        correct += (predict == labels).sum().item()
        total += labels.size(0)

    print('Accuracy of the model is: {}'.format(100 * correct / total))

# save model checkpoint
# torch.save(model.state_dict(), 'model.pkl')

Accuracy of the model is: 98.98
