# Build The Network

In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets,transforms

## Get Device fot Training

In [2]:
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


## Define the Class

In [3]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28,512),
            nn.ReLU(),
            nn.Linear(512,512),
            nn.ReLU(),
            nn.Linear(512,10)
        )
    def forward(self,x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [4]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [5]:
import matplotlib.pyplot as plt
import numpy as np
X = torch.rand(1,28,28,device=device)
print(X.shape)
logits = model(X).to("cpu")
print(logits)
# plt.plot(2,logits.cpu().detach().numpy())
# plt.plot(logits.cpu().detach().numpy().flatten())

pred_probab = nn.Softmax(dim=1)(logits)
print(pred_probab)
y_pred = pred_probab.argmax(1)
print(f"Predict class:{y_pred}")
# plt.show()

torch.Size([1, 28, 28])
tensor([[ 0.0487, -0.1042, -0.0931,  0.0443, -0.0891, -0.0218,  0.0757,  0.0102,
         -0.1230,  0.0472]], grad_fn=<ToCopyBackward0>)
tensor([[0.1069, 0.0917, 0.0928, 0.1064, 0.0931, 0.0996, 0.1098, 0.1029, 0.0900,
         0.1067]], grad_fn=<SoftmaxBackward0>)
Predict class:tensor([6])


In [6]:
input_image = torch.rand(3,28,28)
print(input_image.size())

torch.Size([3, 28, 28])


## nn.Flatten

In [None]:
flatten = nn.Flatten()  # 只转换第一维度的数据
flat_image = flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


## nn.Linear

In [8]:
layer1 = nn.Linear(in_features=28*28,out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


## nn.ReLU()

In [None]:
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)    # 省略负数
print(f"After ReLU: {hidden1}")

Before ReLU: tensor([[ 4.1915e-02,  3.5098e-01, -4.0096e-01, -5.7743e-01, -4.6115e-01,
          5.8596e-01, -4.3271e-01,  3.6938e-02,  1.8667e-01, -6.7483e-01,
         -5.5206e-02,  4.2921e-02, -1.8322e-01, -4.2530e-01, -7.1983e-02,
          1.5866e-01, -1.5080e-01, -1.8553e-01,  1.0469e-01, -4.2776e-01],
        [-2.5267e-01,  5.2599e-01, -1.0112e-01, -4.4960e-01, -5.8161e-01,
          5.4259e-01, -2.7093e-01,  2.8718e-02, -8.8020e-03, -8.0929e-01,
          9.2627e-02,  1.6276e-01, -5.7747e-01, -4.9831e-01, -2.3607e-01,
         -1.4806e-02,  1.4360e-01, -6.8105e-02,  8.2711e-02, -5.0145e-01],
        [-1.7730e-01,  4.5793e-01, -9.8486e-02, -7.9398e-01, -8.5264e-01,
          3.4841e-01, -2.6714e-01,  2.7903e-02,  1.6600e-02, -6.7068e-01,
         -1.3097e-01,  2.0937e-01, -2.9284e-01, -2.4954e-01, -3.6133e-01,
         -6.8760e-04,  3.3765e-01, -2.6114e-01,  5.0923e-02, -4.3754e-01]],
       grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.0419, 0.3510, 0.0000, 0.0000, 0.0000,

## nn.Sequential

In [11]:
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20,10)
)

## nn.Softmax

In [12]:
softmax = nn.Softmax(dim=1)
pred_probab = softmax(logits)

In [10]:
print(f"Model structure:{model}\n\n")

for name,param in model.named_parameters():
    print(f"Layer:{name} | size:{param.size()} | Values : {param[:2]} \n")

Model structure:NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


Layer:linear_relu_stack.0.weight | size:torch.Size([512, 784]) | Values : tensor([[ 0.0226,  0.0176,  0.0299,  ..., -0.0016, -0.0060, -0.0340],
        [-0.0237, -0.0071,  0.0156,  ...,  0.0323, -0.0034,  0.0171]],
       device='cuda:0', grad_fn=<SliceBackward0>) 

Layer:linear_relu_stack.0.bias | size:torch.Size([512]) | Values : tensor([ 0.0260, -0.0275], device='cuda:0', grad_fn=<SliceBackward0>) 

Layer:linear_relu_stack.2.weight | size:torch.Size([512, 512]) | Values : tensor([[ 0.0306, -0.0242,  0.0179,  ...,  0.0341, -0.0392, -0.0403],
        [-0.0333, -0.0309, -0.0026,  ..., -0.0067, -0.0140,  0.0296]],
       device='cuda:0', grad_fn=<SliceBack