In [7]:
import os
import torch
import torch.backends.mps
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [8]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


In [9]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [10]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [11]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([1], device='cuda:0')


In [12]:
input_image = torch.rand(3,28,28)
print(input_image.size())

torch.Size([3, 28, 28])


In [13]:
flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


In [14]:
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


In [15]:
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

Before ReLU: tensor([[-0.1865,  0.3252,  0.5540,  0.3375,  0.0511,  0.0715, -0.5719,  0.0238,
         -0.2622,  0.7355, -0.2974,  0.2401,  0.4922, -0.1385,  0.2543, -0.3054,
         -0.4283,  0.1079, -0.0576, -0.1219],
        [ 0.7335,  0.3209,  0.4772,  0.5215,  0.0738,  0.0206, -0.1496, -0.3679,
         -0.1392,  0.4444,  0.0507,  0.4881,  0.5084,  0.2497, -0.0305, -0.5596,
         -0.4330,  0.1285, -0.1195, -0.5454],
        [ 0.6495,  0.4413,  0.6179,  0.3510,  0.0021, -0.0300, -0.6370, -0.3748,
         -0.3117,  0.4760, -0.4230,  0.1260,  0.5685,  0.0358,  0.0863, -0.6011,
         -0.7971,  0.0570, -0.1153, -0.4232]], grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.0000, 0.3252, 0.5540, 0.3375, 0.0511, 0.0715, 0.0000, 0.0238, 0.0000,
         0.7355, 0.0000, 0.2401, 0.4922, 0.0000, 0.2543, 0.0000, 0.0000, 0.1079,
         0.0000, 0.0000],
        [0.7335, 0.3209, 0.4772, 0.5215, 0.0738, 0.0206, 0.0000, 0.0000, 0.0000,
         0.4444, 0.0507, 0.4881, 0.5084, 0.2497, 0.00

In [16]:
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10)
)
input_image = torch.rand(3,28,28)
logits = seq_modules(input_image)

In [17]:
softmax = nn.Softmax(dim=1)
pred_probab = softmax(logits)

In [19]:
print(f"Model structure: {model}\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure: NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[-0.0096, -0.0086, -0.0057,  ...,  0.0228,  0.0162, -0.0159],
        [-0.0144, -0.0201,  0.0015,  ...,  0.0273,  0.0021, -0.0294]],
       device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([ 0.0301, -0.0103], device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values : tensor([[-0.0049, -0.0272,  0.0408,  ..., -0.0056, -0.0344,  0.0118],
        [-0.0290, -0.0163, -0.0430,  ..., -0.0146,  0.0156,  0.0015]],
       device='cuda:0', grad_fn=<Sl

# ReLU 和 Linear 函数的使用

In [26]:
m = nn.ReLU()
input = torch.randn(9)
print(input)
output = m(input)
print(output)           # ReLU能够将负的去掉，而正数不变

tensor([-0.3419,  1.3507,  0.9073,  1.8979, -0.3773,  0.6790, -0.4330,  1.2628,
         0.3184])
tensor([0.0000, 1.3507, 0.9073, 1.8979, 0.0000, 0.6790, 0.0000, 1.2628, 0.3184])


In [29]:
m = nn.Linear(10, 30)       # 30个线性变换，每次变换有10个参数
input = torch.randn(3, 10)
print(input)
output = m(input)
print(output.size())
print(output)

tensor([[-0.7144, -0.5757, -2.0427,  0.2295, -0.7991,  0.3977,  0.1210,  0.1939,
          0.9462,  3.1425],
        [-0.9258, -1.2403,  0.9384, -1.8167, -0.6556, -1.5948,  1.2101,  0.2141,
         -1.8911,  0.2776],
        [-0.9880, -0.3446,  0.0548,  1.2187,  0.3613, -1.1489,  1.5772,  0.1145,
          0.2816, -0.2695]])
torch.Size([3, 30])
tensor([[ 0.0658, -1.9455,  0.5040, -0.5729, -0.2390,  0.5443, -0.4221, -0.9007,
          1.2941, -0.0184, -0.5119, -0.4126,  0.9303, -1.1310, -0.0843, -0.0430,
         -0.4137, -0.3369, -0.2792, -0.0210,  0.7487, -1.4029,  0.3435,  0.4354,
          0.3207,  1.5218,  0.6007,  0.1934,  0.3122,  0.5932],
        [ 0.8752,  0.9472,  0.9090,  0.2985,  1.0532, -0.9713,  0.1832,  0.6314,
         -0.0498, -0.4633,  0.8191,  0.8974,  0.9088,  0.9203,  0.9869,  0.6784,
         -0.7564,  1.3187, -0.6264,  0.5415, -1.1837,  0.4725, -0.2243,  1.0876,
          0.1194,  0.1556,  0.6739,  0.0814, -0.5278,  1.0811],
        [-0.0551, -0.3533, -0.1994,  0