In [1]:
# 导入必要的包
import torch
import torch.nn as nn
import torch.functional as F
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
#import visdom

In [2]:
# 预置一些必要的（超）参数
BATCHSIZE = 128 # 批量大小
LR = 0.02       # 学习率
EPOCH = 10      # 迭代整个训练集的次数
INTERVAL = 100  # 输出训练过程中间信息（损失值）的间隔

# 选择使用GPU设备，如果有的话
device = torch.device(
    "cuda:0") if torch.cuda.is_available() else torch.device("cpu")

print(device)

cpu


In [3]:
# 定义LeNet5 网络模型
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 6, 5), # 32x32x1 -> 28x28x6
            nn.ReLU(True),
            nn.MaxPool2d(2, 2), # 28x28x6 -> 14x14x6
            nn.Conv2d(6, 16, 5), # 14x14x6 -> 10x10x16
            nn.ReLU(True),
            nn.MaxPool2d(2, 2), # 10x10x16 -> 5x5x16
            #nn.Conv2d(16, 120, 5) # 5x5x16 -> 1x1x120
        )
        self.classifier = nn.Sequential(
            nn.Linear(5*5*16, 120),
            nn.ReLU(True),
            nn.Linear(120, 84),
            nn.ReLU(True),
            nn.Linear(84, 10)
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(-1, 5 * 5 * 16)
        x = self.classifier(x)
        return x

# 需要训练的模型
net = LeNet5()
net.to(device) # 将模型移动到对应的设备上(CPU or GPU)

LeNet5(
  (features): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Linear(in_features=400, out_features=120, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=120, out_features=84, bias=True)
    (3): ReLU(inplace=True)
    (4): Linear(in_features=84, out_features=10, bias=True)
  )
)

In [4]:
# 加载并标准化MNIST数据集
# 如果本地不存在MNIST数据集，那么程序将自动从网络上下载
# transforms
transform = transforms.Compose(
    [transforms.Resize((32, 32)),
     transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))])

# datasets, download MNIST if download is True
trainset = torchvision.datasets.MNIST('./data',
    download=True,
    train=True,
    transform=transform)
testset = torchvision.datasets.MNIST('./data',
    download=True,
    train=False,
    transform=transform)

# dataloaders
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCHSIZE,
                                        shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=BATCHSIZE,
                                        shuffle=False, num_workers=2)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw
Processing...
Done!


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [5]:
# 定义损失函数和优化器。一个损失函数接受一对(output, target)作为输入，计算一个值来估计网络的输出和目标值相差多少。
# 这里使用交叉熵损失函数，SGD作为优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=LR)

In [6]:
"""
完成一个Epoch的训练，即迭代训练一遍训练集
"""
def train(epoch):
    net.train() # 将模型设置为train 模式
    running_loss = 0.0
    for i, (images, labels) in enumerate(trainloader):
        images, labels = images.to(device), labels.to(device)
        # 模型梯度值清零
        optimizer.zero_grad()

        # forward + backward + optimize
        output = net(images)
        loss = criterion(output, labels)

        # 统计并输出训练loss
        running_loss += loss.detach().cpu().item()
        if (i+1) % INTERVAL == 0:
            print('Train - [Epoch %d /Iteration %d] Loss: %f' % (epoch, i+1, running_loss/INTERVAL))
            running_loss = 0.0

        # forward and update model parameters
        loss.backward()
        optimizer.step()

"""
在测试集上对模型进行测试，输出 test loss 和准确率
"""
def test(epoch):
    # 将模型设置为测试模式
    net.eval()
    total_correct = 0
    avg_loss = 0.0
    val_iteration = 0
    for i, (images, labels) in enumerate(testloader):
        images, labels = images.to(device), labels.to(device)
        output = net(images)
        avg_loss += criterion(output, labels).sum()
        # 统计预测正确的样本个数
        pred = output.detach().max(1)[1]
        total_correct += pred.eq(labels.view_as(pred)).sum()
        val_iteration += 1

    avg_loss /= val_iteration
    print('Test - [Epoch %d] Avg Loss: %f, Accuracy: %f %%' % (epoch, avg_loss.detach().cpu().item(), 100.0*float(total_correct) / len(testset)))

In [7]:
# loop over the dataset multiple times
for epoch in range(EPOCH):
  train(epoch+1)
  test(epoch+1)

Train - [Epoch 1 /Iteration 100] Loss: 2.295071
Train - [Epoch 1 /Iteration 200] Loss: 2.261444
Train - [Epoch 1 /Iteration 300] Loss: 2.027999
Train - [Epoch 1 /Iteration 400] Loss: 1.155399
Test - [Epoch 1] Avg Loss: 0.520879, Accuracy: 84.280000 %
Train - [Epoch 2 /Iteration 100] Loss: 0.452537
Train - [Epoch 2 /Iteration 200] Loss: 0.304626
Train - [Epoch 2 /Iteration 300] Loss: 0.241123
Train - [Epoch 2 /Iteration 400] Loss: 0.213320
Test - [Epoch 2] Avg Loss: 0.156939, Accuracy: 95.120000 %
Train - [Epoch 3 /Iteration 100] Loss: 0.170911
Train - [Epoch 3 /Iteration 200] Loss: 0.152231
Train - [Epoch 3 /Iteration 300] Loss: 0.149747
Train - [Epoch 3 /Iteration 400] Loss: 0.128738
Test - [Epoch 3] Avg Loss: 0.105213, Accuracy: 96.690000 %
Train - [Epoch 4 /Iteration 100] Loss: 0.118659
Train - [Epoch 4 /Iteration 200] Loss: 0.116046
Train - [Epoch 4 /Iteration 300] Loss: 0.108657
Train - [Epoch 4 /Iteration 400] Loss: 0.104477
Test - [Epoch 4] Avg Loss: 0.087813, Accuracy: 97.19000

In [8]:
# 使用新的（超参数）训练一个新的LeNet5模型
BATCHSIZE = 64 # 批量大小
LR = 0.05       # 学习率
EPOCH = 5      # 迭代整个训练集的次数
INTERVAL = 200  # 输出训练过程中间信息（损失值）的间隔

# 新的dataloaders
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCHSIZE,
                                        shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=BATCHSIZE,
                                        shuffle=False, num_workers=2)

# 新的训练模型
net = LeNet5()
net.to(device) # 将模型移动到对应的设备上(CPU or GPU)

# 定义新的损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=LR)

# 训练和测试
# loop over the dataset multiple times
for epoch in range(EPOCH):
  train(epoch+1)
  test(epoch+1)

Train - [Epoch 1 /Iteration 200] Loss: 1.847450
Train - [Epoch 1 /Iteration 400] Loss: 0.400124
Train - [Epoch 1 /Iteration 600] Loss: 0.198089
Train - [Epoch 1 /Iteration 800] Loss: 0.140987
Test - [Epoch 1] Avg Loss: 0.105098, Accuracy: 96.620000 %
Train - [Epoch 2 /Iteration 200] Loss: 0.104183
Train - [Epoch 2 /Iteration 400] Loss: 0.085888
Train - [Epoch 2 /Iteration 600] Loss: 0.082229
Train - [Epoch 2 /Iteration 800] Loss: 0.079476
Test - [Epoch 2] Avg Loss: 0.059780, Accuracy: 97.900000 %
Train - [Epoch 3 /Iteration 200] Loss: 0.066094
Train - [Epoch 3 /Iteration 400] Loss: 0.062402
Train - [Epoch 3 /Iteration 600] Loss: 0.059802
Train - [Epoch 3 /Iteration 800] Loss: 0.053924
Test - [Epoch 3] Avg Loss: 0.048369, Accuracy: 98.420000 %
Train - [Epoch 4 /Iteration 200] Loss: 0.052968
Train - [Epoch 4 /Iteration 400] Loss: 0.045956
Train - [Epoch 4 /Iteration 600] Loss: 0.048635
Train - [Epoch 4 /Iteration 800] Loss: 0.046136
Test - [Epoch 4] Avg Loss: 0.040025, Accuracy: 98.70000