In [9]:
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
import torchvision
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential

# Gradient descent
## Computation graphs

In [10]:
a = torch.tensor(2.0, requires_grad=True) # we set requires_grad=True to let PyTorch know to keep the graph
b = torch.tensor(1.0, requires_grad=True)
c = a + b  # 雖沒設定 requires_grad=True, 但也有
d = b + 1
e = c * d
print('c', c)
print('d', d)
print('e', e)

c tensor(3., grad_fn=<AddBackward0>)
d tensor(2., grad_fn=<AddBackward0>)
e tensor(6., grad_fn=<MulBackward0>)


In [11]:
def f(x):  # 方程式
    return (x-2)**2

def fp(x):  # 解析解
    return 2*(x-2)

x = torch.tensor([1.0], requires_grad=True)

y = f(x)
y.backward()  # 算微分

print('Analytical f\'(x):', fp(x))  
print('PyTorch\'s f\'(x):', x.grad)  # 微分值

Analytical f'(x): tensor([-2.], grad_fn=<MulBackward0>)
PyTorch's f'(x): tensor([-2.])


In [12]:
def g(w):
    return 2*w[0]*w[1] + w[1]*torch.cos(w[0])

def grad_g(w):  # 解析解
    return torch.tensor([2*w[1] - w[1]*torch.sin(w[0]), 2*w[0] + torch.cos(w[0])])

w = torch.tensor([np.pi, 1], requires_grad=True)

z = g(w)
z.backward()  # 算微分  

print('Analytical grad g(w)', grad_g(w))
print('PyTorch\'s grad g(w)', w.grad)  #微分值

Analytical grad g(w) tensor([2.0000, 5.2832])
PyTorch's grad g(w) tensor([2.0000, 5.2832])


In [13]:
def f(x):  # 方程式
    return (x-2)**2

def fp(x):  # 解析解
    return 2*(x-2)

x = torch.tensor([5.0], requires_grad=True)
step_size = 0.25

print('iter,\tx,\tf(x),\tf\'(x),\tf\'(x) pytorch')
for i in range(15):
    y = f(x)
    y.backward() # compute the gradient
    
    print('{},\t{:.3f},\t{:.3f},\t{:.3f},\t{:.3f}'.format(i, x.item(), f(x).item(), fp(x).item(), x.grad.item()))
    
    x.data = x.data - step_size * x.grad # perform a GD update step
    
    # We need to zero the grad variable since the backward()
    # call accumulates the gradients in .grad instead of overwriting.
    # The detach_() is for efficiency. You do not need to worry too much about it.
    x.grad.detach_()
    x.grad.zero_()

iter,	x,	f(x),	f'(x),	f'(x) pytorch
0,	5.000,	9.000,	6.000,	6.000
1,	3.500,	2.250,	3.000,	3.000
2,	2.750,	0.562,	1.500,	1.500
3,	2.375,	0.141,	0.750,	0.750
4,	2.188,	0.035,	0.375,	0.375
5,	2.094,	0.009,	0.188,	0.188
6,	2.047,	0.002,	0.094,	0.094
7,	2.023,	0.001,	0.047,	0.047
8,	2.012,	0.000,	0.023,	0.023
9,	2.006,	0.000,	0.012,	0.012
10,	2.003,	0.000,	0.006,	0.006
11,	2.001,	0.000,	0.003,	0.003
12,	2.001,	0.000,	0.001,	0.001
13,	2.000,	0.000,	0.001,	0.001
14,	2.000,	0.000,	0.000,	0.000


# Activation functions

In [14]:
activation_fn = nn.ReLU()  # we instantiate an instance of the ReLU module
example_tensor = torch.tensor([-1.0, 1.0, 0.0])
activated = activation_fn(example_tensor)
print('example_tensor', example_tensor)
print('activated', activated)

example_tensor tensor([-1.,  1.,  0.])
activated tensor([0., 1., 0.])


# Loss functions

In [15]:
loss_fn = nn.MSELoss()
input = torch.tensor([0., 0, 0])  # dtype: torch.float32
target = torch.tensor([1, 0, -1])  # torch.int64
loss = loss_fn(input, target)
print(loss)  # (1 + 0 + 1) / 3 = 0.6667

tensor(0.6667)


# Optimizer

In [16]:
# create a simple model
model = nn.Linear(1, 1)

# create a simple dataset
X_simple = torch.tensor([[1.]])
y_simple = torch.tensor([[2.]])

# create our optimizer
optim = torch.optim.SGD(model.parameters(), lr=1e-2)
mse_loss_fn = nn.MSELoss()

y_hat = model(X_simple)
print('model params before:', model.weight)
loss = mse_loss_fn(y_hat, y_simple)
optim.zero_grad()
loss.backward()
optim.step()
print('model params after:', model.weight)

model params before: Parameter containing:
tensor([[0.7994]], requires_grad=True)
model params after: Parameter containing:
tensor([[0.8297]], requires_grad=True)


# torch.nn
## 取得Module的weght和bias

In [17]:
d_in = 3
d_out = 4
linear_module = nn.Linear(d_in, d_out)
print('W:', linear_module.weight)
print('b:', linear_module.bias)
example_tensor = torch.tensor([[1.,2,3], [4,5,6]])
# applys a linear transformation to the data
transformed = linear_module(example_tensor)
print('example_tensor', example_tensor.shape)
print('transormed', transformed.shape)
print()
print('We can see that the weights exist in the background\n')
print('W:', linear_module.weight)
print('b:', linear_module.bias)

W: Parameter containing:
tensor([[-0.1791, -0.2872, -0.0266],
        [-0.3199, -0.1829,  0.5222],
        [ 0.1710,  0.2210, -0.0474],
        [ 0.5580,  0.1654, -0.2440]], requires_grad=True)
b: Parameter containing:
tensor([ 0.1062, -0.3651,  0.4903,  0.4130], requires_grad=True)
example_tensor torch.Size([2, 3])
transormed torch.Size([2, 4])

We can see that the weights exist in the background

W: Parameter containing:
tensor([[-0.1791, -0.2872, -0.0266],
        [-0.3199, -0.1829,  0.5222],
        [ 0.1710,  0.2210, -0.0474],
        [ 0.5580,  0.1654, -0.2440]], requires_grad=True)
b: Parameter containing:
tensor([ 0.1062, -0.3651,  0.4903,  0.4130], requires_grad=True)


## 模型搭建

In [18]:
class Hank(nn.Module):
    def __init__(self):
        super(Hank, self).__init__()
        self.conv1 = Conv2d(3, 32, 5, padding=2)  # 可使用官方文件給的公式去求出padding
        self.maxpool1 = MaxPool2d(2)
        self.conv2 = Conv2d(32, 32, 5, padding=2)
        self.maxpool2 = MaxPool2d(2)
        self.conv3 = Conv2d(32, 64, 5, padding=2)
        self.maxpool3 = MaxPool2d(2)
        self.flatten = Flatten()
        self.linear1 = Linear(1024, 64)  # input數量如果算不準可以透過改forward算shape去算
        self.linear2 = Linear(64, 10)
    def forward(self, x):
        x = self.conv1(x)
        x = self.maxpool1(x)
        x = self.conv2(x)
        x = self.maxpool2(x)
        x = self.conv3(x)
        x = self.maxpool3(x)
        x = self.flatten(x)
        x = self.linear1(x)
        x = self.linear2(x)
        return x
hank = Hank()
print(hank)
# 驗證神經網路是否可正常運行, 創建一套假數據跑看看會不會報錯
input = torch.ones((64, 3, 32, 32))
output = hank(input)
print(output.shape)


Hank(
  (conv1): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (maxpool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (maxpool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (maxpool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear1): Linear(in_features=1024, out_features=64, bias=True)
  (linear2): Linear(in_features=64, out_features=10, bias=True)
)
torch.Size([64, 10])


## Sequential

In [19]:
import torch
from torch import nn
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential

class Hank2(nn.Module):
    def __init__(self):
        super(Hank2, self).__init__()
        self.model1 = Sequential(
            Conv2d(3, 32, 5, padding=2),  # 可使用官方文件給的公式去求出padding
            MaxPool2d(2),
            Conv2d(32, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 64, 5, padding=2),
            MaxPool2d(2),
            Flatten(),
            Linear(1024, 64),
            Linear(64, 10)
        )

    def forward(self, x):
        x = self.model1(x)
        return x


# Sequential搭建的效果同上, 好處在於forward的時候比較簡潔
hank2 = Hank2()
print(hank2)
input = torch.ones((64, 3, 32, 32))
output = hank2(input)
print(output.shape)

Hank2(
  (model1): Sequential(
    (0): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Flatten(start_dim=1, end_dim=-1)
    (7): Linear(in_features=1024, out_features=64, bias=True)
    (8): Linear(in_features=64, out_features=10, bias=True)
  )
)
torch.Size([64, 10])


# 引用修改現有模型

In [24]:
# import torchvision
# from torch import nn
# # train_data = torchvision.datasets.ImageNet("Data/ImageNet", split='train', download=True,
# #                                            transform=torchvision.transforms.ToTensor())

# vgg16_false = torchvision.models.vgg16(pretrained=False)
# vgg16_true = torchvision.models.vgg16(pretrained=True)

# # print(vgg16_true)

# # train_data = torchvision.datasets.CIFAR10('Data/data', train=True, transform=torchvision.transforms.ToTensor(),
# #                                           download=True)

# vgg16_true.classifier.add_module('add_linear', nn.Linear(1000, 10))
# # print('---'*30)
# # print(vgg16_true)
# # print('---'*30)
# #
# print(vgg16_false)
# print('---'*30)
# vgg16_false.classifier[6] = nn.Linear(4096, 10)
# print(vgg16_false)