<a href="https://colab.research.google.com/github/Hinna0818/DL-self-practice/blob/main/pytorch/4network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [2]:
## 有gpu就用，没有就用cpu
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


In [3]:
## 定义网络类
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__() ## nnmodule是父类，继承父类的初始化方法
        self.flatten = nn.Flatten() ## 将[batch, c, w, h]展成[batch, c*w*h]
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [4]:
## 把network放到gpu里
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [5]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)  ## 不用model.forward(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)  ## 取最大的为输出
print(f"Predicted class: {y_pred}")

Predicted class: tensor([6], device='cuda:0')


In [6]:
## model layer
input_image = torch.rand(3, 28, 28)
print(f"input_image size: {input_image.size()}") # 第一个维度是batch

input_image size: torch.Size([3, 28, 28])


In [7]:
## nn.Flatten()展开
flatten = nn.Flatten()
image = flatten(input_image)
print(image.size()) ## dim=0保留作为第一个维度，后面所有维度乘积作为第二个维度

torch.Size([3, 784])


In [10]:
## nn.Linear()全连接层构建隐藏层
linear = nn.Linear(in_features = 28*28, out_features = 20) ## 指定输入维度为28*28， 输出20个features
hidden1 = linear(image)
print(f"hidden1 size: {hidden1.size()}")

hidden1 size: torch.Size([3, 20])


In [11]:
## nn.ReLU()激活函数构建非线性
print(f"before relu: {hidden1}\n\n")
relu = nn.ReLU()
hidden1 = relu(hidden1)
print(f"after relu: {hidden1}\n\n")

before relu: tensor([[-0.4138, -0.5380,  1.1898,  0.1957,  0.1699, -0.2012,  0.3226,  0.4311,
          0.0631,  0.1130,  0.3585,  0.3221, -0.0993,  0.2195,  0.1471,  0.5285,
          0.0434, -0.0572, -0.3245, -0.0780],
        [-0.1979, -0.1831,  1.0424,  0.1839,  0.3930, -0.4737,  0.1668,  0.2714,
         -0.0869,  0.3771,  0.1842,  0.3056, -0.1572,  0.4169,  0.1235,  0.3015,
         -0.1508, -0.1648, -0.0668,  0.2566],
        [-0.4163, -0.5757,  0.8150,  0.1344,  0.0659, -0.1451, -0.0881,  0.2826,
         -0.0071,  0.4506,  0.2347,  0.4475, -0.3098,  0.2240, -0.2321,  0.6294,
         -0.1053,  0.2307, -0.2095,  0.1370]], grad_fn=<AddmmBackward0>)


after relu: tensor([[0.0000, 0.0000, 1.1898, 0.1957, 0.1699, 0.0000, 0.3226, 0.4311, 0.0631,
         0.1130, 0.3585, 0.3221, 0.0000, 0.2195, 0.1471, 0.5285, 0.0434, 0.0000,
         0.0000, 0.0000],
        [0.0000, 0.0000, 1.0424, 0.1839, 0.3930, 0.0000, 0.1668, 0.2714, 0.0000,
         0.3771, 0.1842, 0.3056, 0.0000, 0.4169, 0.12

In [12]:
## nn.Sequential()构建一个连续的模型架构，相当于一个pipeline运行
seq_modules = nn.Sequential(
    flatten,
    linear,
    relu,
    nn.Linear(20, 10)
)

input_image = torch.rand(3, 28, 28)
logits = seq_modules(input_image)
print(logits)

tensor([[ 0.1329,  0.1441, -0.3184, -0.1736, -0.2150,  0.3128,  0.2132,  0.1192,
         -0.1181, -0.2449],
        [-0.0264,  0.2032, -0.4234, -0.2051, -0.1237,  0.3393,  0.1939,  0.1003,
         -0.2335, -0.1114],
        [ 0.0825,  0.0765, -0.3862, -0.1108, -0.3165,  0.1846,  0.1305,  0.1374,
         -0.1476, -0.2311]], grad_fn=<AddmmBackward0>)


In [13]:
## nn.Softmax()将最后一层线性层返回的结果转化为[0,1]的输出概率
softmax = nn.Softmax(dim = 1) ## 对每一行（样本）进行处理
pred_prob = softmax(logits)
print(pred_prob). ## 每一行加起来为1

tensor([[0.1134, 0.1146, 0.0722, 0.0834, 0.0801, 0.1357, 0.1229, 0.1118, 0.0882,
         0.0777],
        [0.0978, 0.1230, 0.0657, 0.0818, 0.0887, 0.1409, 0.1219, 0.1110, 0.0795,
         0.0898],
        [0.1129, 0.1123, 0.0707, 0.0931, 0.0758, 0.1251, 0.1185, 0.1193, 0.0897,
         0.0825]], grad_fn=<SoftmaxBackward0>)


In [14]:
## 模型参数
print(f"Model structure: {model}\n\n")

## model.named_parameters()：返回一个 (name, parameter) 的元组迭代器，方便你知道哪个参数属于哪一层
for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure: NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[-0.0023, -0.0231,  0.0221,  ...,  0.0132, -0.0187,  0.0063],
        [ 0.0221,  0.0102,  0.0106,  ...,  0.0179,  0.0190, -0.0306]],
       device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([-0.0102,  0.0296], device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values : tensor([[-0.0413,  0.0143,  0.0046,  ...,  0.0031, -0.0321,  0.0231],
        [ 0.0312, -0.0384, -0.0069,  ...,  0.0325,  0.0379, -0.0319]],
       device='cuda:0', grad_fn=<Sl