In [6]:
import torch
import torch.nn as nn
import torchvision.models as models
import torch.nn.functional as F
import numpy as np
import torch.optim as optim



### 搭建模型

In [7]:
# 一個線性Layer

class LinearBNAC(nn.Module):
    def __init__(self, in_channels, out_channels, bias=True, dropout=0.3, is_output=False):
        super(LinearBNAC, self).__init__()
        if is_output and out_channels==1:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Sigmoid()
            )
        elif is_output:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Softmax(dim=1)
            )   
        else:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Dropout(dropout),
                nn.BatchNorm1d(out_channels),
                nn.LeakyReLU(inplace=True)
            )
            
    def forward(self, x):
        out = self.linear(x)
        return out

In [13]:
class Model(nn.Module):
    def __init__(self, input_dimention, output_classes=1):
        super(Model, self).__init__()
        self.layer1 = LinearBNAC(input_dimention, 128)
        # self.layer2 = "自行定義，只要確定模型能順利運行即可，參數值沒有一定限制"
        # self.layer3 = "自行定義，只要確定模型能順利運行即可，參數值沒有一定限制"
        self.layer2 = LinearBNAC(in_channels=128, out_channels=64)
        self.layer3 = LinearBNAC(in_channels=64, out_channels=32)
        self.output = LinearBNAC(32, output_classes, is_output=True)
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.output(x)
        return x 
        

### 準備輸入資料、優化器、標籤資料、模型輸出

In [14]:
model = Model(input_dimention=256,output_classes=10)
# optimizer = "使用Adam optimizer"
optimizer = optim.Adam(params=model.parameters())

In [15]:
batch_size = 4
input_features = 256
dummy_input = torch.randn(batch_size, input_features,)
print(dummy_input)      # (4, 256)

#target = torch.empty(4, dtype=torch.float).random_(10)
target = torch.tensor([9., 5., 4., 4.], dtype=torch.long)

tensor([[-0.0804,  0.4898, -0.2529,  ..., -0.5672, -0.0741,  2.1256],
        [ 0.1338, -0.4173, -0.9102,  ...,  0.1489, -0.0902,  0.8220],
        [ 0.6174,  0.2801,  2.0885,  ...,  0.4467,  0.3066,  2.0640],
        [-0.6622, -0.2231,  0.2120,  ...,  0.3078, -1.2583, -1.2880]])


In [16]:
# output = model("自行輸入")
output = model(dummy_input)     # forward!
print(output)

tensor([[0.0704, 0.2024, 0.1299, 0.1326, 0.0721, 0.0475, 0.0783, 0.0761, 0.0721,
         0.1185],
        [0.0597, 0.1553, 0.0513, 0.1006, 0.0588, 0.0895, 0.1494, 0.1273, 0.1169,
         0.0912],
        [0.0675, 0.2880, 0.0646, 0.0381, 0.0436, 0.0396, 0.1631, 0.0784, 0.1417,
         0.0754],
        [0.1058, 0.0874, 0.1252, 0.1524, 0.0726, 0.0680, 0.1749, 0.0656, 0.0908,
         0.0573]], grad_fn=<SoftmaxBackward>)


### 計算 CrossEntropy Loss
* 請注意哪一個 Loss最適合：我們已經使用 softmax
* 因為我們有使用dropout，並隨機產生dummy_input，所以各為學員得到的值會與解答不同，然而步驟原理需要相同

In [17]:
from torch.nn import NLLLoss, LogSoftmax, CrossEntropyLoss

In [22]:
# criterion = "自行輸入"
criterion = CrossEntropyLoss()

In [23]:
# loss = criterion(torch.log("自行輸入"), "自行輸入")
loss = criterion(output, target)

In [24]:
loss

tensor(2.3228, grad_fn=<NllLossBackward>)

In [31]:
criterion = NLLLoss()

loss = criterion(torch.log(output), target)
loss

tensor(2.5752, grad_fn=<NllLossBackward>)

### 完成back propagation並更新梯度

In [25]:
# "自行輸入"
loss.backward()

In [26]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[ 0.0595, -0.0022, -0.0386,  ..., -0.0165,  0.0070,  0.0437],
        [-0.0115, -0.0399,  0.0484,  ..., -0.0270,  0.0357, -0.0584],
        [ 0.0227,  0.0532, -0.0449,  ..., -0.0619, -0.0274,  0.0181],
        ...,
        [-0.0475, -0.0574,  0.0475,  ..., -0.0298, -0.0497,  0.0583],
        [ 0.0478, -0.0578, -0.0417,  ..., -0.0505, -0.0310, -0.0534],
        [ 0.0423,  0.0048, -0.0247,  ..., -0.0152, -0.0225,  0.0141]],
       requires_grad=True)


grad : tensor([[ 6.1626e-04, -2.5974e-03, -1.5029e-03,  ...,  2.1663e-03,
          7.1110e-06, -4.4279e-03],
        [-7.5792e-07,  2.4917e-05, -1.6483e-06,  ..., -2.4978e-05,
         -2.0186e-06,  1.1238e-04],
        [ 3.6274e-03,  1.4770e-03,  8.3650e-03,  ...,  1.4158e-03,
          3.2841e-03,  1.0445e-02],
        ...,
        [-4.0965e-04,  8.3024e-04,  4.2847e-03,  ...,  4.3836e-04,
          5.1644e-04, -7.4215e-03],
        [ 2.0411e-03,  3.8222e-04,  2.0484e-03,  ...,  3.5787e-04,
       

In [27]:
# "自行輸入"
optimizer.step()

In [28]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[ 0.0585, -0.0012, -0.0376,  ..., -0.0175,  0.0060,  0.0447],
        [-0.0105, -0.0409,  0.0494,  ..., -0.0260,  0.0366, -0.0594],
        [ 0.0217,  0.0522, -0.0459,  ..., -0.0629, -0.0284,  0.0171],
        ...,
        [-0.0465, -0.0584,  0.0465,  ..., -0.0308, -0.0507,  0.0593],
        [ 0.0468, -0.0588, -0.0427,  ..., -0.0515, -0.0320, -0.0544],
        [ 0.0413,  0.0038, -0.0257,  ..., -0.0142, -0.0235,  0.0131]],
       requires_grad=True)


grad : tensor([[ 6.1626e-04, -2.5974e-03, -1.5029e-03,  ...,  2.1663e-03,
          7.1110e-06, -4.4279e-03],
        [-7.5792e-07,  2.4917e-05, -1.6483e-06,  ..., -2.4978e-05,
         -2.0186e-06,  1.1238e-04],
        [ 3.6274e-03,  1.4770e-03,  8.3650e-03,  ...,  1.4158e-03,
          3.2841e-03,  1.0445e-02],
        ...,
        [-4.0965e-04,  8.3024e-04,  4.2847e-03,  ...,  4.3836e-04,
          5.1644e-04, -7.4215e-03],
        [ 2.0411e-03,  3.8222e-04,  2.0484e-03,  ...,  3.5787e-04,
       

### 清空 gradient

In [29]:
# "自行輸入"
optimizer.zero_grad()

In [30]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[ 0.0585, -0.0012, -0.0376,  ..., -0.0175,  0.0060,  0.0447],
        [-0.0105, -0.0409,  0.0494,  ..., -0.0260,  0.0366, -0.0594],
        [ 0.0217,  0.0522, -0.0459,  ..., -0.0629, -0.0284,  0.0171],
        ...,
        [-0.0465, -0.0584,  0.0465,  ..., -0.0308, -0.0507,  0.0593],
        [ 0.0468, -0.0588, -0.0427,  ..., -0.0515, -0.0320, -0.0544],
        [ 0.0413,  0.0038, -0.0257,  ..., -0.0142, -0.0235,  0.0131]],
       requires_grad=True)


grad : tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])


In [32]:
# 不需要softmax

loss = nn.CrossEntropyLoss()

input = torch.randn(3, 5, requires_grad=True)
target = torch.empty(3, dtype=torch.long).random_(5)

In [33]:
input

tensor([[ 0.4533, -0.9323, -1.2650, -0.0865,  1.4955],
        [-0.4729,  1.2305,  0.0577,  1.6709, -0.5790],
        [-1.1820, -0.8383, -0.2064,  0.4616, -1.3749]], requires_grad=True)

In [34]:
target

tensor([2, 0, 0])

In [35]:
output = loss(input, target)
output

tensor(2.8565, grad_fn=<NllLossBackward>)