In [11]:
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
import torchvision
from torch.nn import Conv2d
from torch.utils.data import DataLoader

# Gradient descent
## Computation graphs

In [9]:
a = torch.tensor(2.0, requires_grad=True) # we set requires_grad=True to let PyTorch know to keep the graph
b = torch.tensor(1.0, requires_grad=True)
c = a + b  # 雖沒設定 requires_grad=True, 但也有
d = b + 1
e = c * d
print('c', c)
print('d', d)
print('e', e)

c tensor(3., grad_fn=<AddBackward0>)
d tensor(2., grad_fn=<AddBackward0>)
e tensor(6., grad_fn=<MulBackward0>)
<AddBackward0 object at 0x000001CA1FF92C10>


In [18]:
def f(x):  # 方程式
    return (x-2)**2

def fp(x):  # 解析解
    return 2*(x-2)

x = torch.tensor([1.0], requires_grad=True)

y = f(x)
y.backward()  # 算微分

print('Analytical f\'(x):', fp(x))  
print('PyTorch\'s f\'(x):', x.grad)  # 微分值

Analytical f'(x): tensor([-2.], grad_fn=<MulBackward0>)
PyTorch's f'(x): tensor([-2.])


In [19]:
def g(w):
    return 2*w[0]*w[1] + w[1]*torch.cos(w[0])

def grad_g(w):  # 解析解
    return torch.tensor([2*w[1] - w[1]*torch.sin(w[0]), 2*w[0] + torch.cos(w[0])])

w = torch.tensor([np.pi, 1], requires_grad=True)

z = g(w)
z.backward()  # 算微分  

print('Analytical grad g(w)', grad_g(w))
print('PyTorch\'s grad g(w)', w.grad)  #微分值

Analytical grad g(w) tensor([2.0000, 5.2832])
PyTorch's grad g(w) tensor([2.0000, 5.2832])


In [21]:
def f(x):  # 方程式
    return (x-2)**2

def fp(x):  # 解析解
    return 2*(x-2)

x = torch.tensor([5.0], requires_grad=True)
step_size = 0.25

print('iter,\tx,\tf(x),\tf\'(x),\tf\'(x) pytorch')
for i in range(15):
    y = f(x)
    y.backward() # compute the gradient
    
    print('{},\t{:.3f},\t{:.3f},\t{:.3f},\t{:.3f}'.format(i, x.item(), f(x).item(), fp(x).item(), x.grad.item()))
    
    x.data = x.data - step_size * x.grad # perform a GD update step
    
    # We need to zero the grad variable since the backward()
    # call accumulates the gradients in .grad instead of overwriting.
    # The detach_() is for efficiency. You do not need to worry too much about it.
    x.grad.detach_()
    x.grad.zero_()

iter,	x,	f(x),	f'(x),	f'(x) pytorch
0,	5.000,	9.000,	6.000,	6.000
1,	3.500,	2.250,	3.000,	3.000
2,	2.750,	0.562,	1.500,	1.500
3,	2.375,	0.141,	0.750,	0.750
4,	2.188,	0.035,	0.375,	0.375
5,	2.094,	0.009,	0.188,	0.188
6,	2.047,	0.002,	0.094,	0.094
7,	2.023,	0.001,	0.047,	0.047
8,	2.012,	0.000,	0.023,	0.023
9,	2.006,	0.000,	0.012,	0.012
10,	2.003,	0.000,	0.006,	0.006
11,	2.001,	0.000,	0.003,	0.003
12,	2.001,	0.000,	0.001,	0.001
13,	2.000,	0.000,	0.001,	0.001
14,	2.000,	0.000,	0.000,	0.000


# Activation functions

In [4]:
activation_fn = nn.ReLU()  # we instantiate an instance of the ReLU module
example_tensor = torch.tensor([-1.0, 1.0, 0.0])
activated = activation_fn(example_tensor)
print('example_tensor', example_tensor)
print('activated', activated)

example_tensor tensor([-1.,  1.,  0.])
activated tensor([0., 1., 0.])


# Loss functions

In [11]:
loss_fn = nn.MSELoss()
input = torch.tensor([0., 0, 0])  # dtype: torch.float32
target = torch.tensor([1, 0, -1])  # torch.int64
loss = loss_fn(input, target)
print(loss)  # (1 + 0 + 1) / 3 = 0.6667

tensor(0.6667)


# Optimizer

In [12]:
# create a simple model
model = nn.Linear(1, 1)

# create a simple dataset
X_simple = torch.tensor([[1.]])
y_simple = torch.tensor([[2.]])

# create our optimizer
optim = torch.optim.SGD(model.parameters(), lr=1e-2)
mse_loss_fn = nn.MSELoss()

y_hat = model(X_simple)
print('model params before:', model.weight)
loss = mse_loss_fn(y_hat, y_simple)
optim.zero_grad()
loss.backward()
optim.step()
print('model params after:', model.weight)

model params before: Parameter containing:
tensor([[0.6623]], requires_grad=True)
model params after: Parameter containing:
tensor([[0.6925]], requires_grad=True)


# torch.nn
## 取得Module的weght和bias

In [6]:
d_in = 3
d_out = 4
linear_module = nn.Linear(d_in, d_out)
print('W:', linear_module.weight)
print('b:', linear_module.bias)
example_tensor = torch.tensor([[1.,2,3], [4,5,6]])
# applys a linear transformation to the data
transformed = linear_module(example_tensor)
print('example_tensor', example_tensor.shape)
print('transormed', transformed.shape)
print()
print('We can see that the weights exist in the background\n')
print('W:', linear_module.weight)
print('b:', linear_module.bias)

W: Parameter containing:
tensor([[-0.1211,  0.4893,  0.0330],
        [-0.1761, -0.1207,  0.1824],
        [-0.3230,  0.4867,  0.4097],
        [-0.5031, -0.3502,  0.5532]], requires_grad=True)
b: Parameter containing:
tensor([ 0.1155, -0.1199,  0.1164, -0.3833], requires_grad=True)
example_tensor torch.Size([2, 3])
transormed torch.Size([2, 4])

We can see that the weights exist in the background

W: Parameter containing:
tensor([[-0.1211,  0.4893,  0.0330],
        [-0.1761, -0.1207,  0.1824],
        [-0.3230,  0.4867,  0.4097],
        [-0.5031, -0.3502,  0.5532]], requires_grad=True)
b: Parameter containing:
tensor([ 0.1155, -0.1199,  0.1164, -0.3833], requires_grad=True)


## 模型搭建

In [9]:
class Hank(nn.Module):
    def __init__(self):
        super().__init__()
        # 定義屬性

    def forward(self, input):
        # 調用該類別會使用forward
        output = input + 1
        return output


hank = Hank()
x = torch.tensor(1.0)
output = hank(x)
print(hank)
print(output)

Hank()
tensor(2.)


In [None]:
input = torch.tensor([[1, 2, 0, 3, 1],
                      [0, 1, 2, 3, 1],
                      [1, 2, 1, 0, 0],
                      [5, 2, 3, 1, 1],
                      [2, 1, 0, 1, 1]])

kernel = torch.tensor([[1, 2, 1],
                       [0, 1, 0],
                       [2, 1, 0]])

input = torch.reshape(input, (1, 1, 5, 5))
kernel = torch.reshape(kernel, (1, 1, 3, 3))

print(input.shape)
print(kernel.shape)

# input –
# input tensor of shape (\text{minibatch} , \text{in\_channels} , iH , iW)(minibatch,in_channels,iH,iW)
output = F.conv2d(input, kernel, stride=1)
print(output)

output2 = F.conv2d(input, kernel, stride=2)
print(output2)

output3 = F.conv2d(input, kernel, stride=1, padding=1)
print(output3)


In [17]:
dataset = torchvision.datasets.CIFAR10(
    "Data/dataset", train=False,
    transform=torchvision.transforms.ToTensor(),
    download=True
)
# 圖片大小: torch.Size([3, 32, 32])
dataloader = DataLoader(dataset, batch_size=64)


class Hank(nn.Module):
    def __init__(self):
        super(Hank, self).__init__()
        self.conv1 = Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)

    def forward(self, x):
        x = self.conv1(x)
        return x


hank = Hank()
print(hank)
for data in dataloader:
    imgs, targets = data
    output = hank(imgs)
    # print(imgs.shape)  # torch.Size([64, 3, 32, 32])  Batchsize C H W
    # print(output.shape)  # torch.Size([64, 6, 30, 30])
    output = torch.reshape(output, (-1, 3, 30, 30))  # torch.Size([64, 6, 30, 30])  -> [xxx, 3, 30, 30]

Files already downloaded and verified
Hank(
  (conv1): Conv2d(3, 6, kernel_size=(3, 3), stride=(1, 1))
)
