In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import enum

# <font size=7>神经网络简单实现</font>

`torch.nn` 仅支持输入为 mini-batch，即输入变量应比神经网络输入端所要求的维度高一维

In [3]:
inputs = torch.randn(1, 1, 32, 32)
targets = torch.randn(10)
targets = targets.view(1, -1)

`nn.Module`包含神经网络的各种 layer 的模型，其通过封装参数的方式处理数据，进而高效地导出、加载数据并将其送至 GPU ；

`nn.Paramter` 一个当被指定为`Module`属性时被自动注册为参数的一张量；

In [4]:
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=3)
        self.conv2 = nn.Conv2d(6, 16, 3)
        self.fc1 = nn.Linear(16 * 6 * 6, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))  # 拉直卷积层输出
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    
    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


network = Network()

声明 loss function、优化器，并训练

In [5]:
loss_func = nn.MSELoss()
optimizer = optim.SGD(network.parameters(), lr=0.01)

for t in range(3):
    optimizer.zero_grad()  # 将上次反向传播时保留的梯度值清零
    outputs = network.forward(inputs)
    loss = loss_func(outputs, targets)
    loss.backward()
    optimizer.step()  # Do the update

In [None]:
network.load_state_dict()

#   

#   

# <font size=7>图像分类的简单实现</font>

通常情况下 Python 的数据包有：
- 对于图像，扩展包有Pillow, OpenCV

- 对于音频，扩展包有scipy, librosa

- 对于文本，扩展包有NLTK, SpaCy

此外，对于视觉图片，pytorch有一套专门的扩展包 torchvision，其能够加载 ImageNet, CIFAR10, MNIST 等数据集，同时能够处理图像数据转换

这里我们使用 CIFAR10 的数据集，其包括 10 个类别，每张图片均为 $3\times32\times32$ 大小的 RGB 图片

加载数据及预处理

In [4]:
import torchvision
import torchvision.transforms as transforms


transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ]
)

trainset = torchvision.datasets.CIFAR10(
    root="./data",
    train=True,
    download=True,
    transform=transform
)

testset = torchvision.datasets.CIFAR10(
    root="./data",
    train=False,
    download=True,
    transform=transform
)

trainloader = torch.utils.data.DataLoader(
    trainset,
    batch_size=4,
    shuffle=True,
    num_workers=0
)

testloader = torch.utils.data.DataLoader(
    testset,
    batch_size=4,
    shuffle=False,
    num_workers=0
)
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


可视化

In [None]:
import numpy as np
import matplotlib.pyplot as plt



def imshow(img):
    img = img / 2 + 0.5  # unnormalize
    np_img = img.numpy()
    plt.imshow(np.transpose(np_img, (1, 2, 0)))
    plt.show()



# get some random training images
data_iter = iter(trainloader)
images, labels = data_iter.next()

# show images
imshow(torchvision.utils.make_grid(images))

# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(4)))


搭建神经网络

In [None]:
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    
    
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    
network = Network()
loss_func = nn.CrossEntropyLoss()
optimizer = optim.SGD(network.parameters(), lr=0.001, momentum=0.9)

for epoch in range(1):
    running_loss = 0.
    for i, data in enumerate(trainloader):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = network(inputs)
        loss = loss_func(outputs, labels)
        loss.backward()
        running_loss += loss.item()
        if i % 2000 == 1999:
            print("[%d, %5d] loss: %.3f" % (epoch + 1, i + 1, running_loss/2000))
            running_loss = 0.

print("Finish")

<generator object Module.parameters at 0x000001F6A2ED1CC8>


Parameter containing:
tensor([[[[ 0.0715, -0.1070,  0.0901, -0.0183, -0.0486],
          [-0.0773, -0.0878,  0.0074, -0.0526, -0.0007],
          [-0.0693,  0.0135,  0.1086,  0.1140, -0.0793],
          [-0.0913,  0.1110,  0.0938, -0.1065, -0.0803],
          [-0.0327, -0.0799, -0.0620,  0.0910, -0.0642]],

         [[ 0.0032, -0.0902,  0.0187,  0.0273,  0.0013],
          [ 0.0168, -0.0550, -0.0617, -0.0363, -0.0891],
          [-0.0027,  0.0826, -0.0840,  0.0934, -0.0397],
          [-0.0467,  0.0909,  0.0567, -0.0134,  0.0685],
          [-0.1014, -0.0683, -0.0512, -0.0349, -0.0873]],

         [[-0.1077,  0.0988,  0.0007,  0.0599,  0.0799],
          [ 0.0165, -0.0046, -0.0878, -0.0085,  0.0597],
          [ 0.0804,  0.0837,  0.1123,  0.0726, -0.0480],
          [-0.0641, -0.0087, -0.1038,  0.0646,  0.0476],
          [ 0.0406,  0.0855, -0.0562,  0.1107,  0.1144]]],


        [[[-0.1002, -0.0992, -0.0734, -0.0603, -0.0066],
          [ 0.1109, -0.0654,  0.0482,  0.0818, -0.0292],
 

`optimizer.step(closure=None)`

执行一个优化步骤

- closure (callable): 一个重新评估模型并返回loss值的 closure

### <`model name`>.train()

`<model name>.train(mode=True)`

将模块设置为训练模式，该语句仅对某些特定模块起作用，对于那些被作用的模块，如`Dropout`类、`BatchNorm`类等，他们在训练/测试模式下行为详见相关的文档

**Args**

- mode: ``True``指代训练模式，``False``指代测试模式