# 神經網路

神經網路由一堆神經元所組成，當有一排神經元時可稱作一個layer
而layer又可以細分三個層，分別為input layer、hidden layer跟output layer

## 神經網路元件

* Activation function 中文稱作激勵函數或激活函數，用來表示該神經元是否被啟動如果該神經元被激活則代表其接受到的某種特徵數據有一定的重要性
* Weights 又稱為權重
* Bias 偏移量


In [1]:
# basic import
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))

Using cuda device


# 定義一個模型

In [3]:
class NeuralNetwork(nn.Module):

    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28 * 28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
            nn.ReLU()
        )
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [4]:
model = NeuralNetwork().to(device=device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
    (5): ReLU()
  )
)


In [14]:
# rand feature

X = torch.rand(size=(1, 28, 28), device=device)
logits = model(X)
print(f'logits is {logits}')
pred_probab = nn.Softmax(dim=1)(logits)
print(f'pred_probab is {pred_probab}')
y_pred = pred_probab.argmax(1)
print(f'Predicted Class : {y_pred}')

logits is tensor([[0.0808, 0.0000, 0.0079, 0.0000, 0.0000, 0.0770, 0.0273, 0.0000, 0.0000,
         0.0329]], device='cuda:0', grad_fn=<ReluBackward0>)
pred_probab is tensor([[0.1059, 0.0977, 0.0985, 0.0977, 0.0977, 0.1055, 0.1004, 0.0977, 0.0977,
         0.1010]], device='cuda:0', grad_fn=<SoftmaxBackward0>)
Predicted Class : tensor([0], device='cuda:0')


In [25]:
# minibatch of calculation

input_images = torch.rand(size = (3, 28, 28), device=device) # 模擬同時丟三張圖片進去model中
print(f'The shape(batch, W, H) = {input_images.shape}')
logits = model(input_images)
print(f'Ouput logits shape is {logits.shape}') # 輸出同時也會有三個結果
pred_probab = nn.Softmax(dim=1)(logits) # 因為輸出shape 為(3, 10) dim為1代表在10個元素中計算softmax機率分布(注意共計算三個)
print(f'pred_probab is {pred_probab}')
y_pred = pred_probab.argmax(1)
print(f'Predicted Class : {y_pred}')

The shape(batch, W, H) = torch.Size([3, 28, 28])
Ouput logits shape is torch.Size([3, 10])
pred_probab is tensor([[0.1050, 0.0984, 0.1022, 0.0984, 0.0984, 0.1019, 0.1008, 0.0984, 0.0984,
         0.0984],
        [0.1113, 0.0975, 0.1003, 0.0975, 0.0975, 0.1056, 0.0978, 0.0975, 0.0975,
         0.0975],
        [0.1070, 0.0975, 0.1032, 0.0975, 0.0975, 0.1026, 0.0983, 0.0975, 0.0975,
         0.1014]], device='cuda:0', grad_fn=<SoftmaxBackward0>)
Predicted Class : tensor([0, 0, 0], device='cuda:0')


## Model Layer

### nn.Flatten

In [30]:
flatten = nn.Flatten().to(device) 
flat_image = flatten(input_images) # 代表蔣每個影像攤平處理(不包含第一軸(batch))
print(f'Origin shape : {input_images.size()}')
print(f'Output shape : {flat_image.size()}')

Origin shape : torch.Size([3, 28, 28])
Output shape : torch.Size([3, 784])


### nn.Linear

In [31]:
layer1 = nn.Linear(in_features= 28 * 28, out_features= 20).to(device)
hidden1 = layer1(flat_image)
print(f'Output shape : {hidden1.size()}')

Output shape : torch.Size([3, 20])


### nn.ReLU

In [32]:
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

Before ReLU: tensor([[-0.1669, -0.1449, -0.1265, -0.1412,  0.3756, -0.2061, -0.2088,  0.0654,
         -0.0725, -0.0130, -0.1446, -0.4317, -0.1830,  0.2018, -0.1954,  0.4542,
         -0.0687, -0.4447, -0.1619,  0.3076],
        [ 0.1202, -0.2142, -0.4134, -0.0531,  0.6478,  0.2075, -0.3113,  0.3561,
         -0.1047,  0.0778, -0.3275, -0.2238, -0.0159,  0.4323, -0.1919,  0.4680,
         -0.3092, -0.1982,  0.0143,  0.2569],
        [ 0.0678, -0.2512, -0.3570, -0.0596,  0.5324,  0.1420,  0.1087,  0.2630,
          0.2454,  0.3016, -0.1586, -0.1986,  0.1387,  0.4295, -0.4479,  0.0813,
         -0.3629, -0.1127, -0.2666,  0.5528]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.3756, 0.0000, 0.0000, 0.0654, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.2018, 0.0000, 0.4542, 0.0000, 0.0000,
         0.0000, 0.3076],
        [0.1202, 0.0000, 0.0000, 0.0000, 0.6478, 0.2075, 0.0000, 0.3561, 0.0000,
         0.0778, 0.0000, 0.00

### nn.Sequential

`nn.Sequential` is an ordered 
container of modules. The data is passed through all the modules in the same order as defined. You can use
sequential containers to put together a quick network like `seq_modules`.

In [35]:
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10)
).to(device=device)
input_image = torch.rand(3,28,28, device=device)
logits = seq_modules(input_image)

### nn.Softmax

## Model parameters

Many layers inside a neural network are *parameterized*, i.e. have associated weights 
and biases that are optimized during training. Subclassing `nn.Module` automatically 
tracks all fields defined inside your model object, and makes all parameters 
accessible using your model's `parameters()` or `named_parameters()` methods.

In this example, we iterate over each parameter, and print its size and a preview of its values.

In [36]:
print("Model structure: ", model, "\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure:  NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
    (5): ReLU()
  )
) 


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[-0.0190,  0.0097,  0.0304,  ...,  0.0001,  0.0330, -0.0076],
        [ 0.0133, -0.0276, -0.0211,  ...,  0.0144, -0.0053, -0.0144]],
       device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([-0.0343, -0.0098], device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values : tensor([[ 0.0363, -0.0277, -0.0276,  ..., -0.0005,  0.0401, -0.0434],
        [-0.0441,  0.0440, -0.0240,  ..., -0.0374,  0.0011, -0.0248]],
       device='cu