In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

In [None]:
# device config
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# hyper-parameters
input_size = 784
hidden_size = 500
num_classes = 10
num_epochs = 5
batch_size = 100
learning_rate = 0.001

In [None]:
# data loader
train_dataset = torchvision.datasets.MNIST(root='./data/',
                                           train=True,
                                           download=True,                                           
                                           transform=transforms.ToTensor()
                                           )
test_dataset = torchvision.datasets.MNIST(root='./data/',
                                          train=False,
                                          transform=transforms.ToTensor()
                                         )
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True
                                           )
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                           batch_size=batch_size,
                                           shuffle=False
                                           )

# Fully connected neural network with one hidden layer
class NNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        in2hid = self.fc1(x)        # use model to fit
        active = self.relu(in2hid)  # send to active function
        output = self.fc2(active)
        return output

Epoch [1/5], Step [100/600], Loss: 0.2212
Epoch [1/5], Step [200/600], Loss: 0.1798
Epoch [1/5], Step [300/600], Loss: 0.2316
Epoch [1/5], Step [400/600], Loss: 0.1647
Epoch [1/5], Step [500/600], Loss: 0.1233
Epoch [1/5], Step [600/600], Loss: 0.1508
Epoch [2/5], Step [100/600], Loss: 0.1535
Epoch [2/5], Step [200/600], Loss: 0.1716
Epoch [2/5], Step [300/600], Loss: 0.1624
Epoch [2/5], Step [400/600], Loss: 0.0817
Epoch [2/5], Step [500/600], Loss: 0.0748
Epoch [2/5], Step [600/600], Loss: 0.0781
Epoch [3/5], Step [100/600], Loss: 0.0625
Epoch [3/5], Step [200/600], Loss: 0.0828
Epoch [3/5], Step [300/600], Loss: 0.0395
Epoch [3/5], Step [400/600], Loss: 0.0776
Epoch [3/5], Step [500/600], Loss: 0.0737
Epoch [3/5], Step [600/600], Loss: 0.0642
Epoch [4/5], Step [100/600], Loss: 0.0424
Epoch [4/5], Step [200/600], Loss: 0.0393
Epoch [4/5], Step [300/600], Loss: 0.0383
Epoch [4/5], Step [400/600], Loss: 0.0427
Epoch [4/5], Step [500/600], Loss: 0.0947
Epoch [4/5], Step [600/600], Loss:

详细说一下pytorch NN训练过程：
外层是多个epoch循环，一个epoch代表整个训练集都正向反向传播计算一遍。内层循环是指，每个epoch中，将训练集分成具有batch_size的几个batch（比如batch_size=100，则20000数据的训练集将被分成200份batch），每一个batch（train_loader里的每份batch：i, (images, labels)），正向反向传播一遍，所有batch计算完一遍后一个epoch结束。

所以每一份batch计算梯度前一定要zero_grad()，不然会累加上一个batch的梯度。

一个epoch中的循环次数也就是数据集分成的batch数，每份batch要进行batch_size个正、反向传播计算（因为每个样本都要算正反向传播求梯度）。

In [None]:
model = NNet(input_size, hidden_size, num_classes).to(device)

# loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# train model
total_step = len(train_loader)   # iteration number = batch number
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # move temsors to the configured device
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)

        # forward 
        outputs = model(images)
        loss = criterion(outputs, labels)

        # backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

In [None]:
# test model
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, input_size).to(device)
        labels = labels.to(device)
        outputs = model(images) 

        # prob: the probability of prediction
        # predict：classifcation of prob. result
        prob, predict = torch.max(outputs, 1)
        total += images.size(0)
        correct += (predict == labels).sum().item()

    print('Accuracy of model is {}'.format(correct/total*100))

Accuracy of model is 97.88


In [None]:
# save model checkpoints
# torch.save(model.state_dict(), 'model.pkl')