In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

# accelerator 加速装置
# available 可利用的
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"

print(f"Using {device} device")
print(torch.accelerator.is_available())


Using cpu device
False


验证设备是否存在加速装置，如果存在就利用他否则则使用CPU。

In [2]:
# neural 神经的
# network 网络

class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()  # 使平坦
        self.linear_relu_stack = nn.Sequential(  # 直线的， 激活函数， 堆叠， 按次序的
            nn.Linear(28 * 28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits


model = NeuralNetwork().to(device)
print(model)


NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [3]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")


Predicted class: tensor([4])


将图片张量输入模型，模型输出一个二维张量。每一行表示一个批次内不同的样本，每一列表示不同的类别。从每一行中选出概率最大的那个类别。也就是模型的预测结果。

In [5]:
# 模型曾

input_image = torch.rand(3, 28, 28)
print(input_image.size())


torch.Size([3, 28, 28])


In [6]:
# 利用nn.Flatten扁平化展开除每一批次的样本的张量。

flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())  # 展开后的形状 [3, 748]

print("-" * 70)

tensor = torch.tensor([
    [
        [
            [0, 0, 255],
            [0, 255, 255],
            [255, 0, 255],
        ],
        [
            [0, 255, 0],
            [255, 0, 255],
            [255, 255, 255]
        ],
        [
            [1, 5, 5],
            [3, 4, 7],
            [7, 8, 10]
        ]
    ],
    [
        [
            [255, 0, 100],
            [150, 255, 190],
            [100, 30, 255],
        ],
        [
            [2, 4, 5],
            [255, 150, 155],
            [190, 155, 255]
        ],
        [
            [2, 4, 7],
            [4, 7, 8],
            [255, 250, 200]
        ]
    ],
    [
        [
            [12, 155, 255],
            [255, 255, 255],
            [150, 150, 70]
        ],
        [
            [240, 255, 100],
            [10, 20, 30],
            [255, 255, 255]
        ],
        [
            [255, 255, 255],
            [250, 250, 250],
            [150, 250, 245]
        ]
    ]
])

print(tensor.size())
flat_tensor = flatten(tensor)
print(flat_tensor.size())
print()
print(flat_tensor)



torch.Size([3, 784])
----------------------------------------------------------------------
torch.Size([3, 3, 3, 3])
torch.Size([3, 27])

tensor([[  0,   0, 255,   0, 255, 255, 255,   0, 255,   0, 255,   0, 255,   0,
         255, 255, 255, 255,   1,   5,   5,   3,   4,   7,   7,   8,  10],
        [255,   0, 100, 150, 255, 190, 100,  30, 255,   2,   4,   5, 255, 150,
         155, 190, 155, 255,   2,   4,   7,   4,   7,   8, 255, 250, 200],
        [ 12, 155, 255, 255, 255, 255, 150, 150,  70, 240, 255, 100,  10,  20,
          30, 255, 255, 255, 255, 255, 255, 250, 250, 250, 150, 250, 245]])


通过Flatten将高维的张量进行展平，但是批次通道不变。
方法： from torch import nn flatten = nn.Flatten(), 然后输入图片张量进行展开 flat_image = flatten(input_image).

In [7]:
# nn.Linear  线性
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())


torch.Size([3, 20])


对展开的张量进行线性变换。in_features 和 out_features 参数分别代表输入全连接层的元素数量，也就是展开之后每个样本的一维张量里的元素数量。

In [8]:
# nn.ReLU  激活函数

print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")



Before ReLU: tensor([[-0.5035, -0.2911, -0.0655,  0.1264,  0.1276,  0.2771,  0.4132,  0.1046,
          0.1311, -0.2914, -0.3006, -0.4146,  0.0043, -0.0407,  1.1606,  0.6067,
          0.3848, -0.1609, -0.0526, -0.1813],
        [-0.3996, -0.4265, -0.1083, -0.0391,  0.1665,  0.4641,  0.4647,  0.1719,
          0.4001, -0.6122, -0.2807, -0.2160, -0.2635,  0.0955,  0.8089,  0.6653,
          0.5401, -0.0525, -0.1033,  0.0031],
        [-0.2210, -0.4284, -0.1172,  0.5052, -0.2690,  0.4579,  0.4986, -0.0300,
          0.1495, -0.5997, -0.4747, -0.2359, -0.1417, -0.0040,  0.6733,  0.5509,
          0.5146, -0.2834,  0.1559,  0.1633]], grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.0000, 0.0000, 0.0000, 0.1264, 0.1276, 0.2771, 0.4132, 0.1046, 0.1311,
         0.0000, 0.0000, 0.0000, 0.0043, 0.0000, 1.1606, 0.6067, 0.3848, 0.0000,
         0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.1665, 0.4641, 0.4647, 0.1719, 0.4001,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0955, 0.80

通过激活函数引入非线性映射，让神经网络学习到各种各样通用的特征，而非记住参数和顺序。
方法： out = nn.ReLU(Linear_in)

In [12]:
# nn.Sequential 使用这个容器来容纳所有的神经网络层

seq_modules = nn.Sequential(
     flatten,
     layer1,
     nn.ReLU(),
     nn.Linear(20, 10),
 )

input_image = torch.rand(3, 28, 28)
logits = seq_modules(input_image)

print(logits)



tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0092, 0.0000, 0.0000, 0.0000, 0.1112,
         0.0000],
        [0.0000, 0.0274, 0.0000, 0.0344, 0.0000, 0.0000, 0.1071, 0.0679, 0.0921,
         0.0000],
        [0.0000, 0.0000, 0.0308, 0.0402, 0.0000, 0.0000, 0.1293, 0.0195, 0.0195,
         0.0000]], grad_fn=<ReluBackward0>)


创建一个容器Sequential来容纳神经网络。
方法： seq_modules = nn.Sequential(flatten, nn.Linear(), nn.ReLU).

In [14]:
softmax = nn.Softmax(dim=1)
pred_probab = softmax(logits)

print(pred_probab)




tensor([[0.0987, 0.0987, 0.0987, 0.0987, 0.0997, 0.0987, 0.0987, 0.0987, 0.1104,
         0.0987],
        [0.0967, 0.0994, 0.0967, 0.1001, 0.0967, 0.0967, 0.1076, 0.1035, 0.1060,
         0.0967],
        [0.0976, 0.0976, 0.1006, 0.1016, 0.0976, 0.0976, 0.1110, 0.0995, 0.0995,
         0.0976]], grad_fn=<SoftmaxBackward0>)


In [17]:
# 模型参数

print(f"Model: {model}")

for name, param in model.named_parameters():
    print(f"Layer Name: {name} | Layer Size: {param.size()} | Layer Value: {param[:2]}")




Model: NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)
Layer Name: linear_relu_stack.0.weight | Layer Size: torch.Size([512, 784]) | Layer Value: tensor([[ 0.0258, -0.0059, -0.0350,  ...,  0.0020, -0.0166, -0.0305],
        [-0.0160, -0.0071, -0.0236,  ...,  0.0352, -0.0013,  0.0078]],
       grad_fn=<SliceBackward0>)
Layer Name: linear_relu_stack.0.bias | Layer Size: torch.Size([512]) | Layer Value: tensor([-0.0351,  0.0240], grad_fn=<SliceBackward0>)
Layer Name: linear_relu_stack.2.weight | Layer Size: torch.Size([512, 512]) | Layer Value: tensor([[ 0.0166,  0.0267, -0.0161,  ...,  0.0299,  0.0331, -0.0242],
        [ 0.0429, -0.0036, -0.0140,  ...,  0.0223, -0.0234,  0.0043]],
       grad_fn=<SliceBackward0>)
Layer N

通过 model.named_parameters() 方法来打印需要调整优化参数层的参数。ReLu层的参数不会被打印，因为映射的关系已经确定，不需要再去优化参数。
打印参数时注意切片，不然大矩阵会让打印信息很难观察。