In [1]:
import torch
import numpy as np
print(torch.cuda.is_available())

True


## Pytorch 基礎操作

#### pytorch 基本資料型態

In [11]:
tensor_0d = torch.tensor(1)
print("0 維張量: ")
print(f"{tensor_0d}\nsize: {tensor_0d.size()}\ndim: {tensor_0d.ndim}\n---------------------------")
tensor_1d = torch.tensor([1,2])
print("1 維張量: ")
print(f"{tensor_1d}\nsize: {tensor_1d.size()}\ndim: {tensor_1d.ndim}\n---------------------------")
tensor_2d = torch.tensor([[1,2],[3,4]])
print("2 維張量: ")
print(f"{tensor_2d}\nsize: {tensor_2d.size()}\ndim: {tensor_2d.ndim}\n---------------------------")
tensor_3d = torch.tensor([[[1, 2], [3, 2]], [[1, 7], [5, 4]]])
print("3 維張量: ")
print(f"{tensor_3d}\nsize: {tensor_3d.size()}\ndim: {tensor_3d.ndim}\n---------------------------")

0 維張量: 
1
size: torch.Size([])
dim: 0
---------------------------
1 維張量: 
tensor([1, 2])
size: torch.Size([2])
dim: 1
---------------------------
2 維張量: 
tensor([[1, 2],
        [3, 4]])
size: torch.Size([2, 2])
dim: 2
---------------------------
3 維張量: 
tensor([[[1, 2],
         [3, 2]],

        [[1, 7],
         [5, 4]]])
size: torch.Size([2, 2, 2])
dim: 3
---------------------------


In [5]:
# tensor 也可由 numpy 產生
vector = [1,2]
np_array = np.array(vector)
x_np = torch.from_numpy(np_array)
print(x_np)

tensor([1, 2])


In [12]:
# tensor 的基本屬性
tensor = torch.tensor([[1,2],[3,4]])
print(f'Shape of tensor: {tensor.shape}')
print(f'Datatype of tensor: {tensor.dtype}')
print(f'Device tensor is sotered on: {tensor.device}')

Shape of tensor: torch.Size([2, 2])
Datatype of tensor: torch.int64
Device tensor is sotered on: cpu


#### tensor.torch()

In [15]:
a = torch.tensor(5)
print(a)
anp = np.asarray([4])  # 定義一個 Numpy 陣列
a = torch.tensor(anp)  # 將 Numpy 陣列轉為張量
print(a)

tensor(5)
tensor([4])


#### torch.Tensor()

In [20]:
a = torch.Tensor(2)
print(a)
b = torch.Tensor(2,2)
print(b)
c = torch.Tensor([2])
print(c)
d = torch.Tensor([1,2])
print(d)

tensor([4.4449e-16, 1.1477e-42])
tensor([[0.0000e+00, 2.3453e-02],
        [0.0000e+00, 1.1062e-05]])
tensor([2.])
tensor([1., 2.])


In [22]:
# 判斷一個物件是否為張量
torch.is_tensor(d)
# 獲取張量中元素個數
torch.numel(b)

4

#### 如何修改張量函數的預設類型

In [27]:
print(torch.get_default_dtype())   # 輸出預設類型
print(torch.Tensor([[1,3]]).dtype) # 輸出使用張量函數定義的張量類型

torch.set_default_dtype(torch.float64)  # 修改預設張量類型
print(torch.get_default_dtype())   
print(torch.Tensor([[1,3]]).dtype) 

torch.float32
torch.float32
torch.float64
torch.float64


#### type()

In [34]:
a = torch.FloatTensor([4,2])
print(a.type(torch.IntTensor))
print(a.double())
print(a.int())

print(a.mean())
print(a.sqrt())

tensor([4, 2], dtype=torch.int32)
tensor([4., 2.], dtype=torch.float64)
tensor([4, 2], dtype=torch.int32)
tensor(3.)
tensor([2.0000, 1.4142])


## Pytorch 與 Numpy

#### 張量與 Numpy 資料的互相轉換

In [3]:
a = torch.FloatTensor([1,2])
# 將張量轉成 Numpy 的物件
print(a.numpy())

anp = np.array([1,2])
# 將 numpy array 轉換為 tensor
print(torch.from_numpy(anp))
print(torch.tensor(anp))

[1. 2.]
tensor([1, 2])
tensor([1, 2])


#### 張量與 Numpy 的各自形狀獲取

In [20]:
x = torch.randn(2,1)
print(x.shape)    # 列印張量形狀，輸出 torch.Size([2, 1])
print(x.size())   # 列印張量大小，輸出 torch.Size([2, 1])

anp = np.array([[1],[3]])
print(anp.size,anp.shape)  # 2 (2, 1)

# 張量與 numpy 都可以使$ reshape 對陣列進行形狀改變
print(x.reshape(1,2).shape)    # torch.Size([1, 2])
print(anp.reshape(1,2).shape)  # (1, 2)

torch.Size([2, 1])
torch.Size([2, 1])
2 (2, 1)
torch.Size([1, 2])
(1, 2)


#### 張量與 Numpy 的切片

In [33]:
x = torch.randn(2,4)
print(x[0,2:4])   # 取第一列，第 2、3 個元素
anp = np.random.rand(2,4)  # 產生 2 x 4 的隨機 numpy array
print(anp[0,2:4])

tensor([0.7733, 0.1023])
[0.68533986 0.83971181]


#### 張量與 Numpy 轉換的陷阱 

In [35]:
nparray = np.array([1,1])
x = torch.from_numpy(nparray)
print(x)      # tensor([1, 1])
nparray += 1  # 直接更對 nparray 變數，造成參考此陣列的張量也受影響
print(x)      # tensor([2, 2])
y = torch.from_numpy(nparray)
print(y)      # tensor([2, 2])
nparray = nparray + 1  # 新建一個變數儲存更動後的 nparray，張量不會受影響
print(y)      # tensor([2, 2])

tensor([1, 1])
tensor([2, 2])
tensor([2, 2])
tensor([2, 2])


## 張量在 CPU 與 GPU 的操作

In [42]:
# 將 CPU 記憶體的張量轉化到 GPU
a = torch.FloatTensor([2,2])
b = a.cuda()  # 將張量重新在 GPU 所管理的記憶體中建立
print(b)
# a + b  ❌ 錯誤！不能跨裝置直接運算

# 直接在 GPU 上定義
a = torch.tensor([4], device="cuda")
print(a)

# 使用 to() 方法
a = torch.FloatTensor([4])
print(a)
print(a.to("cuda:0"))

tensor([2., 2.], device='cuda:0')
tensor([4], device='cuda:0')
tensor([4.])
tensor([4.], device='cuda:0')


## 生成隨機值張量

#### 設定隨機值種子

In [45]:
torch.initial_seed()
torch.manual_seed(2)
torch.initial_seed()

2

#### 按照指定形狀生成

In [46]:
torch.randn(2,3)

tensor([[ 0.3923, -0.2236, -0.3195],
        [-1.2050,  1.0445, -0.6332]])

#### 生成線性空間的隨機值

In [50]:
print(torch.arange(1, 10, step=2))  # 在 1 到 10 間，每 2 步進行取值，arange() 包含起始點但不包含終點 
print(torch.linspace(1, 9, steps=5))  # 在 1 到 9 間，均勻取出 5 個數，linspace() 包含起點與終點

tensor([1, 3, 5, 7, 9])
tensor([1., 3., 5., 7., 9.])


#### 生成為初始化的隨機矩陣

In [56]:
print(torch.empty(1,2))

tensor([[-1.8891e+26,  6.0240e-01]])


## 張量的數學運算

In [68]:
d = torch.FloatTensor([2,2])
print(d,d + d)
b = torch.add(a,a)
print(b)
c = torch.Tensor([0,0])
print(torch.add(a,a,out = c))

d.add_(b)
print(d)

tensor([2., 2.]) tensor([4., 4.])
tensor([4., 4.])
tensor([4., 4.])
tensor([6., 6.])


## 張量的資料操作

#### torch.reshape()

In [78]:
a = torch.tensor([[1,2],[3,4]])   # 定義一個二維張量
print(torch.reshape(a,(-1,1)))    
print(torch.reshape(a,(1,-1)))
print(torch.reshape(a,(2,2)))

tensor([[1],
        [2],
        [3],
        [4]])
tensor([[1, 2, 3, 4]])
tensor([[1, 2],
        [3, 4]])


#### torch.t()、torch.transpose()

In [81]:
b = torch.tensor([[5,6,7],[2,8,0]])
print(b)
print(torch.t(b))
print(torch.transpose(b, dim0 = 1,dim1 = 0))

tensor([[5, 6, 7],
        [2, 8, 0]])
tensor([[5, 2],
        [6, 8],
        [7, 0]])
tensor([[5, 2],
        [6, 8],
        [7, 0]])


#### torch.cat()

In [109]:
a = torch.tensor([[1,2],[3,4]])
b = torch.tensor([[5,6],[7,8]])
print(a)
print(b)
print(torch.cat([a,b],dim = 0))
print(torch.cat([a,b],dim = 1))


tensor([[1, 2],
        [3, 4]])
tensor([[5, 6],
        [7, 8]])
tensor([[1, 2],
        [3, 4],
        [5, 6],
        [7, 8]])
tensor([[1, 2, 5, 6],
        [3, 4, 7, 8]])


In [107]:
a = torch.tensor([1,2])    # shape: (1,2)
b = torch.tensor([5,6])    # shape: (1,2)
print(torch.cat([a,b], dim = 0), torch.cat([a,b], dim = 0).shape)  # shape: (4,)
print(torch.stack([a,b], dim = 0), torch.stack([a,b], dim = 0).shape)   # shape: (2,2)

tensor([1, 2, 5, 6]) torch.Size([4])
tensor([[1, 2],
        [5, 6]]) torch.Size([2, 2])


#### torch.chunk()

In [111]:
a = torch.tensor([[1,2],[3,4]])
# 將張量 a 沿著第 0 維度分割成 2 等份
print(torch.chunk(a, chunks = 2,dim = 0))
print(torch.chunk(a, chunks = 2,dim = 1))

(tensor([[1, 2]]), tensor([[3, 4]]))
(tensor([[1],
        [3]]), tensor([[2],
        [4]]))


#### pytorch.split()

In [112]:
a = torch.tensor([[5,6,7],[2,8,0]])
# 將張量依照第 1 維度分割成 (1,2) 等分的子張量
torch.split(a, split_size_or_sections = (1,2),dim = 1)

(tensor([[5],
         [2]]),
 tensor([[6, 7],
         [8, 0]]))

#### 按照設定值對張量過濾

In [114]:
a = torch.tensor([[1,2],[3,4]])
mask = a.ge(2)   # 找出大於或等於 2 的數
print(mask)      # mask 呈現的是每個元素經過比較後的結果，以 0/1 表示
torch.masked_select(a, mask)  # 將 a 張量依照 mask 遮罩結果進行過濾

tensor([[False,  True],
        [ True,  True]])


tensor([2, 3, 4])

#### 獲取資料的最大值、最小值

In [None]:
# 使用 argmax()、argmin()
a = torch.tensor([[1,2],[3,4]])
torch.argmax(a,dim = 0)
torch.argmin(a,dim = 0)

tensor([0, 0])

#### 根據設定值進行資料截斷

In [115]:
a = torch.tensor([[1,2],[3,4]])
torch.clamp(a, min = 2, max = 3)  # 定義最小值 2，最大值 3 進行截斷

tensor([[2, 2],
        [3, 3]])

## Variable 類型與自動微分模組

In [118]:
import torch
from torch.autograd import Variable
a = torch.FloatTensor([4])
print(Variable(a))  # 張量轉成支援計算梯度的 Variable 物件
print(Variable(a, requires_grad = True))  # 使用 requires_grad 指定該張量是否需要梯度計算
print(a.data)       # Variable 轉為張量物件

tensor([4.])
tensor([4.], requires_grad=True)
tensor([4.])


#### torch.no_grad()

In [123]:
x = torch.ones(2,2, requires_grad = True)
# 暫時關閉 Autograd 的追蹤功能，因此 y 不會被標記需要梯度計算
with torch.no_grad():
    y = x * 2
print(y.requires_grad)

False


In [124]:
@torch.no_grad()   # 用裝飾器的方式修飾函數
def double(x):     # 將張量的計算封裝到函數中
    return x * 2
z = double(x)
print(z.requires_grad)

False


#### torch.enable_grad()

In [125]:
x = torch.ones(2,2, requires_grad = True)
with torch.no_grad():
    with torch.enable_grad():
        y = x * 2
print(x.requires_grad)

True


In [128]:
@torch.enable_grad()
def double(x):
    return x * 2
with torch.no_grad():
    z = double(x)
print(z.requires_grad)

True


#### torch.set_grad_enabled()  對梯度計算進行統一管理

In [131]:
x = torch.ones(2,2, requires_grad = True)
torch.set_grad_enabled(False)   # 統一關閉梯度計算功能
y = x * 2
print(y.requires_grad)          # False
torch.set_grad_enabled(True)    # 統一開啟梯度計算功能
y = x * 2
print(y.requires_grad)          # True

False
True


#### grad_fn 與 is_leaf 屬性

In [138]:
x = torch.ones(2,2,requires_grad = True)
m = x + 2  # 經過計算得到的 Variable 物件
print(m.grad_fn)
# 對 x 變數求關於 m 的梯度
print(m.grad_fn(x))

<AddBackward0 object at 0x00000200F710E740>
(tensor([[1., 1.],
        [1., 1.]], requires_grad=True), None)


In [139]:
x = torch.ones(2,2,requires_grad = True)
print(x.is_leaf)
m = x + 2
print(m.is_leaf)

True
False


#### backward() 方法自動求導

In [148]:
w = torch.randn(3, requires_grad=True)
b = torch.randn(1, requires_grad=True)
x = torch.tensor([1.0,2.0,3.0])
y_pred = w @ x + b  # 內積 + 偏差，預測值
y_true = torch.tensor(10.0)  # 實際值
loss = (y_pred - y_true) ** 2  # 損失函數，為一個純量
loss.backward()  # 反向自動求導
# 注意一定要先用 backward() 求導，grad 屬性才有意義
print(w.grad)

tensor([-20.0948, -40.1896, -60.2843])


## 鉤子函數

#### Forward

In [18]:
import torch
import torch.nn as nn

model = nn.Sequential(
    nn.Linear(4, 3),
    nn.ReLU(),
    nn.Linear(3, 2)
)

# 定義 hook_function
def hook_fn(module, input, output):
    print("模型 : ",module)
    for val in input:
        print("輸入 : ",val)
    for out_val in output:
        print("輸出 : ",out_val)

# 掛在第 1 層（Linear(4, 3)）
handle = model[0].register_forward_hook(hook_fn)
x = torch.randn(1, 4)
y = model(x)  # 執行時 hook 會印出中間輸出

模型 :  Linear(in_features=4, out_features=3, bias=True)
輸入 :  tensor([[-1.8132, -1.7156,  1.2718,  0.0168]])
輸出 :  tensor([-0.5636, -1.0509,  0.5583], grad_fn=<UnbindBackward0>)


#### Backward

In [21]:
import torch
import torch.nn as nn

model = nn.Sequential(
    nn.Linear(4, 3),
    nn.ReLU(),
    nn.Linear(3, 2)
)

# 定義反向鉤子函數
def backward_hook(module, grad_input, grad_output):
    print("模型層：", module)
    print("傳入該層的梯度（grad_input）：", grad_input)
    print("傳出該層的梯度（grad_output）：", grad_output)

# 掛在第 0 層（Linear(4, 3)）
handle = model[0].register_full_backward_hook(backward_hook)

# 輸入與目標張量（注意：模型第一層輸入是 4 維）
x = torch.randn(1, 4)
target = torch.randn(1, 2)

criterion = nn.MSELoss()
# 前向傳播
output = model(x)
loss = criterion(output, target)
# backward() 計算梯度，此時 hook 被觸發
loss.backward()

模型層： Linear(in_features=4, out_features=3, bias=True)
傳入該層的梯度（grad_input）： (None,)
傳出該層的梯度（grad_output）： (tensor([[0.0296, 0.0000, 0.1728]]),)


## 神經網路

### Activation Function

In [10]:
import torch.nn as nn
# 創建一個包含 2 個隨機數的張量，並將其包裝成 Variable
input = torch.autograd.Variable(torch.randn(2))
print(input)
print(nn.Sigmoid()(input))
print(nn.LogSigmoid()(input))
print(nn.Tanh()(input))

tensor([ 1.9245, -0.6642])
tensor([0.8726, 0.3398])
tensor([-0.1362, -1.0794])
tensor([ 0.9583, -0.5811])


#### Swish & Mish

In [12]:
swish_activation = nn.SiLU()
print(swish_activation(input))
print(nn.Mish()(input))

tensor([ 1.6794, -0.2257])
tensor([ 1.8631, -0.2609])


In [13]:
print(nn.GELU()(input))

tensor([ 1.8723, -0.1682])


#### Softmax

In [None]:
# 分別是兩筆樣本各自對三個類別的輸出值
logits = x = torch.tensor([[2,0.5,6],[0.1,0,3]], requires_grad=True)
labels = torch.LongTensor([2,1])
print(logits)
print(labels)
# 計算 Softmax 輸出機率與 CE loss
print('Softmax :',torch.nn.Softmax(dim = 1)(logits))
print('Cross Entropy loss:',torch.nn.CrossEntropyLoss()(logits,labels))

tensor([[2.0000, 0.5000, 6.0000],
        [0.1000, 0.0000, 3.0000]], requires_grad=True)
tensor([2, 1])
Softmax : tensor([[0.0179, 0.0040, 0.9781],
        [0.0498, 0.0451, 0.9051]], grad_fn=<SoftmaxBackward0>)
Cross Entropy loss: tensor(1.5609, grad_fn=<NllLossBackward0>)


### Optimizer

In [None]:
model = nn.Sequential(
    nn.Linear(4, 3),
    nn.ReLU(),
    nn.Linear(3, 2)
)
# 建構 Adam 最佳化器物件
optimizer = torch.optim.Adam(model.parameters(),lr = 0.01)

In [35]:
# param_groups 屬性可以查看最佳化器中的參數
print(optimizer.param_groups[0].keys())

dict_keys(['params', 'lr', 'betas', 'eps', 'weight_decay', 'amsgrad', 'maximize', 'foreach', 'capturable', 'differentiable', 'fused', 'decoupled_weight_decay'])
