<a href="https://colab.research.google.com/github/RicottaChz/Pytorch_Youtube/blob/main/backpropagation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [27]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import warnings
warnings.filterwarnings(action = "ignore")

In [28]:
# Define input
input_tensor = torch.tensor([0.2, 0.1], dtype=torch.float64)

# Define weights: w1, w2, b1, b2
w1 = nn.Embedding(2, 2, dtype=torch.float64)
w2 = nn.Embedding(2, 2, dtype=torch.float64)
b1 = nn.Embedding(1, 2, dtype=torch.float64)
b2 = nn.Embedding(1, 2, dtype=torch.float64)

# Init weights: w1, w2, b1, b2
w1.weight.data = torch.tensor([[0.2, 0.1], [0.4, 0.15]], dtype=torch.float64, requires_grad=True).t()
w2.weight.data = torch.tensor([[0.65, 0.7], [0.45, 0.3]], dtype=torch.float64, requires_grad=True).t()
b1.weight.data = torch.tensor([[0.3]], dtype=torch.float64, requires_grad=True).t()
b2.weight.data = torch.tensor([[0.5]], dtype=torch.float64, requires_grad=True).t()

In [29]:
# Print weights
print('*'*30)
print('input_tensor:', input_tensor)
print('*'*30)
print('w1.weight:', w1.weight)
# w1.weight.grad = None
# If the optimizer.step() method is not called, that is, if backpropagation is not performed, the gradient of the weight is not obtained.
print('w1.weight.grad:', w1.weight.grad)
print('b1.weight:', b1.weight)
print('b1.weight.grad:', b1.weight.grad)
print('*'*30)
print('w2.weight:', w2.weight)
print('w2.weight.grad:', w2.weight.grad)
print('b2.weight:', b2.weight)
print('b2.weight.grad:', b2.weight.grad)
print('*'*30)

******************************
input_tensor: tensor([0.2000, 0.1000], dtype=torch.float64)
******************************
w1.weight: Parameter containing:
tensor([[0.2000, 0.4000],
        [0.1000, 0.1500]], dtype=torch.float64, requires_grad=True)
w1.weight.grad: None
b1.weight: Parameter containing:
tensor([[0.3000]], dtype=torch.float64, requires_grad=True)
b1.weight.grad: None
******************************
w2.weight: Parameter containing:
tensor([[0.6500, 0.4500],
        [0.7000, 0.3000]], dtype=torch.float64, requires_grad=True)
w2.weight.grad: None
b2.weight: Parameter containing:
tensor([[0.5000]], dtype=torch.float64, requires_grad=True)
b2.weight.grad: None
******************************


In [30]:
# Hidden layer (MLP)
net_h1_h2 = torch.matmul(input_tensor, w1.weight) + b1.weight
out_h1_h2 = F.relu(net_h1_h2)

In [31]:
# [[net_h1, net_h2]]
print('net_h1_h2:', net_h1_h2)
# [[out_h1, out_h2]]
print('out_h1_h2:', out_h1_h2)
print('out_h1_h2.grad:', out_h1_h2.grad)

# grad_fn 속성에는 tracking 된 연산이 저장

net_h1_h2: tensor([[0.3500, 0.3950]], dtype=torch.float64, grad_fn=<AddBackward0>)
out_h1_h2: tensor([[0.3500, 0.3950]], dtype=torch.float64, grad_fn=<ReluBackward0>)
out_h1_h2.grad: None


In [32]:
# Output layer (MLP)
net_o1_o2 = torch.matmul(out_h1_h2, w2.weight) + b2.weight
out_o1_o2 = F.relu(net_o1_o2)

In [33]:
# [[net_o1, net_o2]]
print('net_o1_o2:', net_o1_o2)
# [[out_o1, out_o2]]
print('out_o1_o2:', out_o1_o2)
print('out_o1_o2.grad:', out_o1_o2.grad)

net_o1_o2: tensor([[1.0040, 0.7760]], dtype=torch.float64, grad_fn=<AddBackward0>)
out_o1_o2: tensor([[1.0040, 0.7760]], dtype=torch.float64, grad_fn=<ReluBackward0>)
out_o1_o2.grad: None


In [34]:
label = torch.tensor([0.99, 0.01], dtype=torch.float64, requires_grad=True)

In [35]:
# Loss function (MSE -> 1/n)
loss = torch.sum(0.5*torch.square(label - out_o1_o2))
print('loss:', loss)

loss: tensor(0.2935, dtype=torch.float64, grad_fn=<SumBackward0>)


In [36]:
# Get gradient of each weight & bias
loss.backward()

In [37]:
# Gradients
# Save gradients in weight.grad attribute
print('w1.weight.grad:', w1.weight.grad)
print('b1.weight.grad:', b1.weight.grad)
print('w2.weight.grad:', w2.weight.grad)
print('b2.weight.grad:', b2.weight.grad)

w1.weight.grad: tensor([[0.0708, 0.0479],
        [0.0354, 0.0240]], dtype=torch.float64)
b1.weight.grad: tensor([[0.5934]], dtype=torch.float64)
w2.weight.grad: tensor([[0.0049, 0.2681],
        [0.0055, 0.3026]], dtype=torch.float64)
b2.weight.grad: tensor([[0.7800]], dtype=torch.float64)


In [38]:
# Before optimization
# Weights
print('w1.weight:', w1.weight)
print('b1.weight:', b1.weight)
print('w2.weight:', w2.weight)
print('b2.weight:', b2.weight)
# Loss
h1 = F.relu(torch.matmul(input_tensor, w1.weight) + b1.weight)
output = F.relu(torch.matmul(h1, w2.weight) + b2.weight)
print('loss:', torch.sum(0.5*torch.square(label - output)))
# Output
print('label:', label)
print('output:', output)

w1.weight: Parameter containing:
tensor([[0.2000, 0.4000],
        [0.1000, 0.1500]], dtype=torch.float64, requires_grad=True)
b1.weight: Parameter containing:
tensor([[0.3000]], dtype=torch.float64, requires_grad=True)
w2.weight: Parameter containing:
tensor([[0.6500, 0.4500],
        [0.7000, 0.3000]], dtype=torch.float64, requires_grad=True)
b2.weight: Parameter containing:
tensor([[0.5000]], dtype=torch.float64, requires_grad=True)
loss: tensor(0.2935, dtype=torch.float64, grad_fn=<SumBackward0>)
label: tensor([0.9900, 0.0100], dtype=torch.float64, requires_grad=True)
output: tensor([[1.0040, 0.7760]], dtype=torch.float64, grad_fn=<ReluBackward0>)


In [39]:
# Learning rate
lr = 0.5
# Optimizer
optimizer = optim.SGD((w1.weight, w2.weight, b1.weight, b2.weight), lr=0.5)

In [40]:
# Optimization
optimizer.step()

In [41]:
# Optimization (1 epoch)
# Optimizing weights
print('w1.weight:', w1.weight)
print('b1.weight:', b1.weight)
print('w2.weight:', w2.weight)
print('b2.weight:', b2.weight)
# Decreasing loss
h1 = F.relu(torch.matmul(input_tensor, w1.weight) + b1.weight)
output = F.relu(torch.matmul(h1, w2.weight) + b2.weight)
print('loss:', torch.sum(0.5*torch.square(label - output)))
# More optimizing output
print('label:', label)
print('output:', output)

w1.weight: Parameter containing:
tensor([[0.1646, 0.3760],
        [0.0823, 0.1380]], dtype=torch.float64, requires_grad=True)
b1.weight: Parameter containing:
tensor([[0.0033]], dtype=torch.float64, requires_grad=True)
w2.weight: Parameter containing:
tensor([[0.6476, 0.3160],
        [0.6972, 0.1487]], dtype=torch.float64, requires_grad=True)
b2.weight: Parameter containing:
tensor([[0.1100]], dtype=torch.float64, requires_grad=True)
loss: tensor(0.3177, dtype=torch.float64, grad_fn=<SumBackward0>)
label: tensor([0.9900, 0.0100], dtype=torch.float64, requires_grad=True)
output: tensor([[0.2031, 0.1378]], dtype=torch.float64, grad_fn=<ReluBackward0>)


In [42]:
# 1000 epochs
for i in range(1000):

    # Init gradient of optimizer
    # If this method not called, gradient is stacked.
    optimizer.zero_grad()

    # Foward pass
    h1 = F.relu(torch.matmul(input_tensor, w1.weight) + b1.weight)
    output = F.relu(torch.matmul(h1, w2.weight) + b2.weight)

    # Loss
    loss = torch.sum(0.5*torch.square(label - output))

    if i % 10 == 0:
        # Decreasing loss
        print('loss:', loss)

    # Backward pass, compute gradient
    loss.backward()

    # Optimization
    optimizer.step()

loss: tensor(0.3177, dtype=torch.float64, grad_fn=<SumBackward0>)
loss: tensor(0.0031, dtype=torch.float64, grad_fn=<SumBackward0>)
loss: tensor(2.9206e-05, dtype=torch.float64, grad_fn=<SumBackward0>)
loss: tensor(2.3337e-06, dtype=torch.float64, grad_fn=<SumBackward0>)
loss: tensor(1.8680e-07, dtype=torch.float64, grad_fn=<SumBackward0>)
loss: tensor(1.4961e-08, dtype=torch.float64, grad_fn=<SumBackward0>)
loss: tensor(1.1984e-09, dtype=torch.float64, grad_fn=<SumBackward0>)
loss: tensor(9.5996e-11, dtype=torch.float64, grad_fn=<SumBackward0>)
loss: tensor(7.6899e-12, dtype=torch.float64, grad_fn=<SumBackward0>)
loss: tensor(6.1601e-13, dtype=torch.float64, grad_fn=<SumBackward0>)
loss: tensor(4.9346e-14, dtype=torch.float64, grad_fn=<SumBackward0>)
loss: tensor(3.9529e-15, dtype=torch.float64, grad_fn=<SumBackward0>)
loss: tensor(3.1666e-16, dtype=torch.float64, grad_fn=<SumBackward0>)
loss: tensor(2.5366e-17, dtype=torch.float64, grad_fn=<SumBackward0>)
loss: tensor(2.0320e-18, dty

In [43]:
# Validation of output (1000 epochs)
h1 = F.relu(torch.matmul(input_tensor, w1.weight) + b1.weight)
output = F.relu(torch.matmul(h1, w2.weight) + b2.weight)
# Output: close to the label
print('label:', label)
print('output:', output)

label: tensor([0.9900, 0.0100], dtype=torch.float64, requires_grad=True)
output: tensor([[0.9900, 0.0100]], dtype=torch.float64, grad_fn=<ReluBackward0>)
