In [1]:
%matplotlib inline
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torchvision
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# 定义超参数
image_size = 28
num_classes = 10
num_epochs = 1
batch_size = 64
learning_rate = 0.001
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
# 构建pipeline，对图像处理
pipeline = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,),(0.3081,))
])

In [4]:
# 下载、加载数据集
train_dataset = dsets.MNIST(root='./data',
                        train=True,
                        download=True,
                        transform=pipeline) 
test_dataset = dsets.MNIST(root='./data',
                       train=False,
                       download=True,
                       transform=pipeline) 

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=True)

indices = range(len(test_dataset))
indices_val = indices[:5000]
indices_test = indices[5000:]

sampler_val = torch.utils.data.sampler.SubsetRandomSampler(indices_val)
sampler_test = torch.utils.data.sampler.SubsetRandomSampler(indices_test)

validation_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                               batch_size=batch_size,
                                               shuffle=False,
                                               sampler=sampler_val)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False,
                                          sampler=sampler_test)

In [5]:
# # 构建网络
# neure = list(range(10, 200, 20))
# class MLP(nn.Module):
#     def __init__(self):
#         super(MLP, self).__init__()
#         self.mlp = nn.Sequential(
#             nn.Linear(image_size * image_size, ),
#             nn.Linear(512, num_classes))
#     def forward(self, x):
#         output = self.mlp(x)
#         return output, x

In [6]:
# model = MLP().to(device)
# optimizer = torch.optim.Adam(mlp.parameters(), lr=learning_rate)
# loss = nn.CrossEntropyLoss()

In [7]:
def rightness(predictions, labels):
    pred = torch.max(predictions.data, 1)[1]
    pred = pred.to(device)
    rights = pred.eq(labels.data.view_as(pred).to(device)).sum()
    rights = rights.to(device)
    return rights, len(labels)

In [8]:
num_neures = list(range(10, 200, 20))
loss_list = []
count = 0

record = []
weights = []

for neure in num_neures:
    class MLP(nn.Module):
        def __init__(self):
            super(MLP, self).__init__()
            self.mlp = nn.Sequential(
                nn.Linear(image_size * image_size, neure),
                nn.Linear(neure, num_classes))
        def forward(self, x):
            x = x.view(-1, image_size * image_size)
            output = self.mlp(x)
            return output   
    
    model = MLP().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()
    

    for batch_idx, (data, target) in enumerate(train_loader):
        train_rights = []
        
        model.train()
        data, target = Variable(data).to(device), Variable(target).to(device)
        output = model(data)
        loss = criterion(output, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    if batch_idx % 30 == 0:
        model.eval()
        val_rights = []

        for (data, target) in validation_loader:
            data, traget = Variable(data).to(device), Variable(target).to(device)
            # data, traget = Variable(data), Variable(target)
            output = net(data)
            right = rightness(output, target)
            val_rights.append(right)

        train_r = (sum([tup[0] for tup in train_rights]), sum([tup[1] for tup in train_rights]))
        val_r = (sum([tup[0] for tup in val_rights]), sum([tup[1] for tup in val_rights]))
        print('训练周期:{}[{}/{}({:.0f}%)]\t, Loss:{:.6f}\t, 训练正确率:{:.2f}%\t, 校检正确率:{:.2f}%'.format(
            epoch, batch_idx * len(data), len(train_loader.dataset),
            100. * batch_idx / len(train_loader), loss.item(),
            100. * train_r[0] / train_r[1],
            100. * val_r[0] / val_r[1]))

        record.append(((100 - 100. * train_r[0] / train_r[1]).cpu(),
                       (100 - 100. * val_r[0] / val_r[1]).cpu()))
        model.eval()
    
    loss_list.append(loss.item())
    count += 1.
    print('\r训练进度:{:.2f}%'.format(count / len(num_neures) * 100), end='')

训练进度:100.00%

In [9]:
print(model)

MLP(
  (mlp): Sequential(
    (0): Linear(in_features=784, out_features=190, bias=True)
    (1): Linear(in_features=190, out_features=10, bias=True)
  )
)


In [10]:
loss_list

[0.5781953930854797,
 0.4566301703453064,
 0.9274514317512512,
 0.16988861560821533,
 0.17005279660224915,
 0.4711524248123169,
 0.34571120142936707,
 0.282652348279953,
 0.2480037808418274,
 0.42062607407569885]

In [11]:
num_neures

[10, 30, 50, 70, 90, 110, 130, 150, 170, 190]