# 03 Build The Neural Network

在PyTorch中，`torch.nn`集成了大部分搭建神经网络所需要的工具，且所有的模块都继承自`nn.Module`。神经网络就是一个大的模块，里面集成了许多小的模块（layer），这样的嵌套结构能够轻松实现复杂的模型结构。

以FashionMNIST为例，搭建神经网络用于分类：

In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms

device = 'cuda' if torch.cuda.is_available() else 'cpu'

print(f'Using {device} device!')

Using cuda device!


In [2]:
# Define a linear network
class NeuralNetwork(nn.Module):
    def __init__(self, ):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear = nn.Sequential(
            nn.Linear(28*28, 512), 
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10))
    def forward(self, x):
        x = self.flatten(x)
        x = self.linear(x)
        return x

model = NeuralNetwork().to(device)    
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


## `nn.Flatten()`

`nn.Flatten()`用于将多维的张量拉伸成一维，**同时保留minibatch的维度**。

In [12]:
# 以一个batch size为20的RGB图像为例
data = torch.rand(20, 3, 28, 28)
flatten_data = flatten_layer(data)
print(flatten_data.shape)

torch.Size([20, 2352])


In [13]:
# 以一个batch size为3的黑白图像为例
data = torch.rand(3, 28, 28)
flatten_layer = nn.Flatten()
flatten_data = flatten_layer(data)
print(flatten_data.shape)

torch.Size([3, 784])


## `nn.Linear`

`nn.Linear`用于构建全连接层，定义时只需要给定输入和输出的维度，以batch形式进行数据输入输出。

In [14]:
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flatten_data)
print(hidden1.size())

torch.Size([3, 20])


## nn.Softmax

`nn.Softmax`将指定维度上的张量归一化到[0,1]之间

In [18]:
softmax = nn.Softmax(dim=1)
print(softmax(hidden1))
softmax = nn.Softmax(dim=0)
print(softmax(hidden1))

tensor([[0.0297, 0.0954, 0.0510, 0.0297, 0.0580, 0.0469, 0.0744, 0.0280, 0.0613,
         0.0518, 0.0554, 0.0346, 0.0903, 0.0569, 0.0339, 0.0322, 0.0577, 0.0490,
         0.0360, 0.0276],
        [0.0358, 0.1267, 0.0421, 0.0293, 0.0605, 0.0307, 0.0621, 0.0307, 0.0671,
         0.0394, 0.0665, 0.0426, 0.0669, 0.0549, 0.0470, 0.0336, 0.0461, 0.0540,
         0.0348, 0.0293],
        [0.0284, 0.0761, 0.0520, 0.0294, 0.0548, 0.0494, 0.0603, 0.0357, 0.0579,
         0.0376, 0.0635, 0.0464, 0.1164, 0.0616, 0.0301, 0.0349, 0.0393, 0.0520,
         0.0328, 0.0412]], grad_fn=<SoftmaxBackward>)
tensor([[0.3173, 0.3205, 0.3535, 0.3374, 0.3360, 0.3716, 0.3795, 0.2982, 0.3302,
         0.4038, 0.3001, 0.2815, 0.3323, 0.3296, 0.3062, 0.3215, 0.4047, 0.3174,
         0.3487, 0.2829],
        [0.3866, 0.4296, 0.2944, 0.3358, 0.3536, 0.2458, 0.3198, 0.3300, 0.3650,
         0.3099, 0.3637, 0.3496, 0.2487, 0.3212, 0.4280, 0.3386, 0.3263, 0.3535,
         0.3405, 0.3041],
        [0.2961, 0.2498, 0.3521,

In [20]:
for name, param in model.named_parameters():
    print(f'{name} | Size: {param.shape} | Values: {param[:2]}' )

linear.0.weight | Size: torch.Size([512, 784]) | Values: tensor([[ 0.0226,  0.0243,  0.0088,  ..., -0.0278,  0.0331,  0.0266],
        [-0.0170, -0.0243,  0.0055,  ..., -0.0043, -0.0339, -0.0236]],
       device='cuda:0', grad_fn=<SliceBackward>)
linear.0.bias | Size: torch.Size([512]) | Values: tensor([-0.0219,  0.0115], device='cuda:0', grad_fn=<SliceBackward>)
linear.2.weight | Size: torch.Size([512, 512]) | Values: tensor([[-0.0207,  0.0115,  0.0362,  ...,  0.0167, -0.0041, -0.0337],
        [-0.0173, -0.0167, -0.0144,  ..., -0.0093,  0.0300, -0.0358]],
       device='cuda:0', grad_fn=<SliceBackward>)
linear.2.bias | Size: torch.Size([512]) | Values: tensor([-0.0012,  0.0398], device='cuda:0', grad_fn=<SliceBackward>)
linear.4.weight | Size: torch.Size([10, 512]) | Values: tensor([[-0.0104,  0.0245,  0.0105,  ...,  0.0360, -0.0108, -0.0270],
        [-0.0333,  0.0087, -0.0329,  ..., -0.0082, -0.0086, -0.0211]],
       device='cuda:0', grad_fn=<SliceBackward>)
linear.4.bias | Size: 