In [18]:
import torch
import numpy as np
import torch.nn as nn
from torch.nn import functional as F

# Autograd

In [3]:
a = torch.tensor([1.], requires_grad=True)
b = torch.tensor([1.], requires_grad=True)

In [4]:
# a = 1, b = 1
sse = torch.pow((2 - a - b), 2) + torch.pow((4 - 3 * a - b), 2)

In [5]:
torch.autograd.grad(sse, [a, b])  # a = 1, b = 1 時sse對a和b的偏微分

(tensor([-0.]), tensor([-0.]))

In [6]:
x = torch.tensor(1., requires_grad=True)
y = x ** 2
z = y + 1
torch.autograd.grad(y, x)  # 微分值

(tensor(2.),)

# 微分計算圖

In [7]:
x = torch.tensor(1., requires_grad=True)
y = x ** 2
z = y + 1
x.grad  #  在最初x.grad是空值、得先進行反向傳播才有值

In [8]:
z

tensor(2., grad_fn=<AddBackward0>)

In [73]:
z.grad_fn

<AddBackward0 at 0x172588e0220>

In [9]:
z.backward()  # 從輸出節點回朔計算出使節點的微分值

In [11]:
print('x.grad: ', x.grad)  # z = x**2 + 1 在x=1時對x的微分 ---> 2x ---> 2
print('y.grad: ', y.grad)  # 僅返回最初的葉節點的微分 

x.grad:  tensor(2.)
y.grad:  None


In [12]:
x = torch.tensor(1., requires_grad=True)
y = x ** 2
y.retain_grad()  # 保存y在計算過程中保留的導數
z = y ** 2
z.backward()

In [13]:
y.grad  # z = y ** 2 對 y微分

tensor(2.)

# 阻止圖計算

## with torch.no_grad():

In [14]:
x = torch.tensor(1., requires_grad=True)
y = x ** 2
with torch.no_grad():
    z = y **2  # 
print(y.requires_grad)
print(z)
print(z.requires_grad)

True
tensor(1.)
False


## .detach()

In [15]:
x = torch.tensor(1., requires_grad=True)
y = x ** 2
y1 = y.detach()  # 創建一個新的不可導的張量
z = y1 ** 2
print(y)
print(y1)
print(z)

tensor(1., grad_fn=<PowBackward0>)
tensor(1.)
tensor(1.)


# 識別葉節點

In [16]:
x = torch.tensor(1., requires_grad=True)
y = x ** 2
z = y **2 
print(x.is_leaf)
print(y.is_leaf)

True
False


In [None]:
# 3分類 500個樣本 20個特徵 共3層 第一層13個神經元 第二層8個神經元

In [None]:
# 確定數據

In [21]:
torch.manual_seed(0)
x = torch.rand((500, 20), dtype=torch.float32) * 100
y = torch.randint(low=0, high=3, size=(500, ), dtype=torch.float32)

In [23]:
input_ = x.shape[1]  # 特徵數
output_ = len(y.unique())  # 類別數

In [None]:
# 定義神經網路架構

In [31]:
class Model(nn.Module):
    def __init__(self, in_features=40, out_features=2):
        super().__init__()
        self.linear1 = nn.Linear(in_features=in_features, out_features=13, bias=False)
        self.linear2 = nn.Linear(in_features=13, out_features=8, bias=False)
        self.output = nn.Linear(in_features=8, out_features=out_features, bias=True)
    
    def forward(self, x):
        sigma1 = torch.relu(self.linear1(x))
        sigma2 = torch.sigmoid(self.linear2(sigma1))
        zhat = self.output(sigma2)
        return zhat

In [25]:
torch.manual_seed(420)
net = Model(in_features=input_, out_features=output_)

In [26]:
zhat = net.forward(x)

In [27]:
criterion = nn.CrossEntropyLoss()

In [28]:
loss = criterion(zhat, y.long())

In [29]:
loss

tensor(1.1992, grad_fn=<NllLossBackward0>)

In [32]:
net.linear1.weight.grad  # 還沒反向傳播所以沒有梯度

In [33]:
loss.backward()  # 反向傳播

In [36]:
print(net.linear1.weight.grad.shape, net.linear1.weight.shape)
net.linear1.weight.grad  # 反向傳播過後就有梯度了

torch.Size([13, 20]) torch.Size([13, 20])


tensor([[-2.2252e-05, -1.8684e-05, -2.2276e-05, -1.0601e-05, -1.1619e-06,
         -6.0099e-06, -1.9208e-05, -1.1353e-05, -5.7474e-06, -2.6704e-06,
         -7.3304e-07, -1.7826e-06, -9.1094e-06, -1.7694e-05, -1.7605e-05,
         -4.0645e-07, -1.8555e-05, -2.4853e-06, -9.0114e-06, -6.7938e-06],
        [ 6.9786e-03, -8.8909e-03, -4.3138e-03, -1.8461e-03, -8.4016e-03,
         -9.5685e-04, -9.8347e-03, -6.9438e-03,  9.9324e-04, -1.3882e-02,
         -9.7108e-03,  2.7085e-04,  3.0346e-03, -1.1586e-02, -7.6946e-03,
         -1.5050e-02, -5.4895e-03, -8.0938e-03, -1.7266e-03, -2.0699e-02],
        [-3.1017e-02, -1.0266e-02, -1.9335e-02, -6.4472e-03, -1.7734e-03,
         -2.2988e-02, -5.6614e-03, -1.6383e-02, -3.3910e-02, -6.9265e-03,
         -7.9444e-03, -1.8821e-02, -1.2418e-02, -1.2930e-02, -1.1997e-02,
         -2.5591e-02, -2.0578e-02, -1.6955e-02, -2.3379e-02, -6.6936e-03],
        [ 1.3744e-02,  1.1576e-02,  4.0255e-03,  1.0368e-02,  5.7043e-03,
          8.3568e-03,  6.2944e-03, 