# PyTorch Brief Notes

## 1. Matrix Multiply(shape)

In [111]:
import torch

In [112]:
# 1 dimension
x = torch.tensor([2., 3.], requires_grad=True)
w = torch.tensor([4., 5.], requires_grad=True)
b = torch.tensor(5., requires_grad=True)
x, w, b

(tensor([2., 3.], requires_grad=True),
 tensor([4., 5.], requires_grad=True),
 tensor(5., requires_grad=True))

In [113]:
# 1 dimension only need to ensure the len(x) == len(w)
y = torch.matmul(x, w) + b
y

tensor(28., grad_fn=<AddBackward0>)

In [114]:
# more than 1 dimension
x1 = torch.reshape(torch.tensor(range(1, 25), dtype=torch.float32, requires_grad=True), (2, 3, 4))
w1 = torch.reshape(torch.tensor(range(1, 21), dtype=torch.float32, requires_grad=True), (4, 5))
b1 = torch.tensor([5., 6., 7.], requires_grad=True)

In [115]:
y1 = torch.matmul(x1, w1)
x1.shape, w1.shape, y1.shape

(torch.Size([2, 3, 4]), torch.Size([4, 5]), torch.Size([2, 3, 5]))

## 2. Auto gradient

In [116]:
x, w, b, y

(tensor([2., 3.], requires_grad=True),
 tensor([4., 5.], requires_grad=True),
 tensor(5., requires_grad=True),
 tensor(28., grad_fn=<AddBackward0>))

In [117]:
y.backward()
y

tensor(28., grad_fn=<AddBackward0>)

In [118]:
# The gradient of each element will be calculate and save
w.grad, b.grad, x.grad

(tensor([2., 3.]), tensor(1.), tensor([4., 5.]))

## 2. no_grad()
> Drag out from the computational graph, don't calculate gradient

In [119]:
z = x ** 2
with torch.no_grad():
    z1 = x ** 2

z, z1

(tensor([4., 9.], grad_fn=<PowBackward0>), tensor([4., 9.]))

In [120]:
# See how "grad_fn" changed
z.add_(torch.tensor([3., 4.]))

tensor([ 7., 13.], grad_fn=<AddBackward0>)

In [121]:
with torch.no_grad():
    z.mul_(torch.tensor([3., 4.]))
z

tensor([21., 52.], grad_fn=<AddBackward0>)

In [122]:
z.mul_(torch.tensor([3., 4.]))

tensor([ 63., 208.], grad_fn=<MulBackward0>)

## 3. Build a ANN

In [123]:
# feature and label
x = torch.randn(4, 20)
y = torch.randn(4, 5)

In [124]:
# ANN: 20 x 10 x 5
net = torch.nn.Sequential(
    torch.nn.Linear(20, 10),
    torch.nn.ReLU(),
    torch.nn.Linear(10, 5)
)
net, net.parameters()

(Sequential(
   (0): Linear(in_features=20, out_features=10, bias=True)
   (1): ReLU()
   (2): Linear(in_features=10, out_features=5, bias=True)
 ),
 <generator object Module.parameters at 0x113131890>)

In [125]:
loss = torch.nn.MSELoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.01)
optimizer

SGD (
Parameter Group 0
    dampening: 0
    lr: 0.01
    momentum: 0
    nesterov: False
    weight_decay: 0
)

In [126]:
for epoch in range(5):
    # Get the prediction
    y_hat = net(x)

    # Calculate loss
    loss_value = loss(y_hat, y)
    print(epoch, loss_value)

    # clear the optimizer
    optimizer.zero_grad()
    # backward
    loss_value.backward()

    # gradient descent begin
    optimizer.step()

0 tensor(0.9745, grad_fn=<MseLossBackward>)
1 tensor(0.9626, grad_fn=<MseLossBackward>)
2 tensor(0.9519, grad_fn=<MseLossBackward>)
3 tensor(0.9418, grad_fn=<MseLossBackward>)
4 tensor(0.9319, grad_fn=<MseLossBackward>)
