In [60]:
import torch
import numpy as np

# TENSOR

In [None]:
x = torch.ones(1,2, dtype=torch.float32)
y = torch.empty(2,2)
a = torch.rand(2,2, dtype=torch.float32)
z = y.add_(x)
z = y.sub_(x)
z = torch.mul(x,y)
print(a[1,1].item())   # slicing
b = a.view(-1)  # reshape
print(b.size()) # shape

c = b.numpy()
b.add_(1)
print(b,c)
b = torch.from_numpy(c)
c += 1
print(b,c)

device = torch.device("cpu")
if torch.cuda.is_available():
    device = torch.device("cuda")
    x = x.to(device)
    x = torch.ones(5, device=device)
    # x.numpy() # wrong, gpu cannot handle numpy
    x = x.to("cpu").numpy()

# GRADIENCE

In [57]:
# you need model to optimize
x = torch.ones(3, requires_grad=True, device=device) # tell the pytorch need to calculate the gradient later in the optimization steps
y = x + 2 # computation graph for optimization
# first we do a forward pass and calculate the output y,
# pytorch will automatically store a function
# the function then use the backpropagation to get the gradients using dy/dx (backward pass).
z = y*y*2
z = z.mean() # it is a scalar value
print(y, z)
z.backward() # dz/dx -> create a vector jacobain product to get the gradients (jacobian matrix must multiply a vector, chain rule)
print(x.grad) # print the gradients

z = y*y*2 # not the scalar value
v = torch.tensor([1,1.0,0.01], dtype=torch.float32) # give a gradient argument, vector of the same size
z.backward(v) # grad can be implicitly created only for scalar outputs
print(x.grad) # print the gradients

# prevent pytorch checking the gradients: 3 ways
# x.requires_grad_(False)
# x.detach()  # create a new tensor with the same value without gradient
# with torch.no_grad():

weights = torch.ones(4, requires_grad=True)
for epoch in range(2):
    model_output = (weights*3).sum()
    model_output.backward() # will accumulate the gradients
    print(weights.grad)
    # weights.grad.zero_() # empty the gradient

# optimizer = torch.optim.SGD(weights, lr=0.01)
# optimizer.step()
# optimizer.zero_grad()

tensor([3., 3., 3.], grad_fn=<AddBackward0>) tensor(18., grad_fn=<MeanBackward0>)
tensor([4., 4., 4.])
tensor([16.0000, 16.0000,  4.1200])
tensor([3., 3., 3., 3.])
tensor([6., 6., 6., 6.])


# BACKPROPAGATION

chain rule: $\frac{dz}{dx} = \frac{dz}{dy} \cdot \frac{dy}{dx}$
Computational graph: &nbsp; function &nbsp; $\rightarrow$ &nbsp; local gradients &nbsp; $\rightarrow$ &nbsp; final gradients (chain rule)
e.g. $z = x \cdot y$ $\rightarrow$ $\frac{dz}{dx} = \frac{dx \cdot y}{dx} = y$ & $\frac{dz}{dy} = \frac{dx \cdot y}{dy} = x$ &nbsp; $\rightarrow$  &nbsp; $\frac{d(loss)}{dx} = \frac{d(loss)}{dz} \cdot \frac{dz}{dx}$
Three steps:
1. Forward pass: &nbsp; Compute loss
2. Compute local gradients
3. Backward pass: &nbsp; Compute $\frac{d(loss)}{d(weights)}$ using chain rule

In [58]:
x = torch.tensor(1.0)
y = torch.tensor(2.0)
w = torch.tensor(1.0, requires_grad=True)
# forward pass and compute the loss
y_hat = w * x
loss = (y_hat - y) ** 2
#backward pass
loss.backward()
print(w.grad)
# update weights and next forward and backward pass

tensor(-2.)


# Optimize model with automatic gradient computation using pytorch autograd package
1. Prediction &nbsp; $\rightarrow$ &nbsp; pytorch model
2. Gradients computation &nbsp; $\rightarrow$ &nbsp; Autograd
3. loss computation &nbsp; $\rightarrow$ &nbsp; pytorch loss
4. Parameter updates &nbsp; $\rightarrow$ &nbsp; pytorch optimizer

In [66]:
# Manually linear regression
# f = w * x
# e.g. f = 2 * x
lr = 0.01
n_iters = 20
# model prediction
def forward(x):
    return w * x
# loss
def loss(y, y_pred):
    return ((y_pred - y) ** 2).mean()
# gradients
# MSE = 1/N * (w*x - y) ** 2
# dJ/dw = 1/N * 2x * (w*x - y)
def gradient(x,y,y_pred):
    return np.dot(2*x, y_pred-y).mean()

In [72]:
x = np.array([1,2,3,4], dtype=np.float32)
y = np.array([2,4,6,8], dtype=np.float32)
w = 0.0
print(f'Prediction before training: f(5) = {forward(5):.3f}')
#Training
for epoch in range(n_iters):
    y_pred = forward(x)     # forward pass
    l = loss(y, y_pred)     # loss
    dw = gradient(x,y,y_pred)      # gradients
    w -= lr * dw    #update weights
    if epoch % 2 == 0:
        print(f'Epoch: {epoch+1}, Weights: {w:.3f}, Loss: {l:.8f}')

Prediction before training: f(5) = 0.000
-120.0
Epoch: 1, Weights: 1.200, Loss: 30.00000000
-48.0
-19.200003
Epoch: 3, Weights: 1.872, Loss: 0.76800019
-7.68
-3.0720026
Epoch: 5, Weights: 1.980, Loss: 0.01966083
-1.2287936
-0.49152374
Epoch: 7, Weights: 1.997, Loss: 0.00050332
-0.1966095
-0.07864165
Epoch: 9, Weights: 1.999, Loss: 0.00001288
-0.031455517
-0.012580633
Epoch: 11, Weights: 2.000, Loss: 0.00000033
-0.0050354004
-0.0020105839
Epoch: 13, Weights: 2.000, Loss: 0.00000001
-0.00080895424
-0.00032258034
Epoch: 15, Weights: 2.000, Loss: 0.00000000
-0.00013017654
-4.9352646e-05
Epoch: 17, Weights: 2.000, Loss: 0.00000000
-2.0742416e-05
-7.867813e-06
Epoch: 19, Weights: 2.000, Loss: 0.00000000
0.0


In [69]:
x = torch.tensor([1,2,3,4], dtype=torch.float32)
y = torch.tensor([2,4,6,8], dtype=torch.float32)
w = torch.tensor(0.0, requires_grad=True)

print(f'Prediction before training: f(5) = {forward(5):.3f}')
#Training
for epoch in range(100):
    y_pred = forward(x)     # forward pass
    l = loss(y, y_pred)     # loss
    l.backward()     # gradients = backward pass;  dl/dw
    with torch.no_grad():
        w -= lr * w.grad
    w.grad.zero_()    # zero gradients
    if epoch % 10 == 0:
        print(f'Epoch: {epoch+1}, Weights: {w:.3f}, Loss: {l:.8f}')

Prediction before training: f(5) = 0.000
Epoch: 1, Weights: 0.300, Loss: 30.00000000
Epoch: 11, Weights: 1.665, Loss: 1.16278565
Epoch: 21, Weights: 1.934, Loss: 0.04506890
Epoch: 31, Weights: 1.987, Loss: 0.00174685
Epoch: 41, Weights: 1.997, Loss: 0.00006770
Epoch: 51, Weights: 1.999, Loss: 0.00000262
Epoch: 61, Weights: 2.000, Loss: 0.00000010
Epoch: 71, Weights: 2.000, Loss: 0.00000000
Epoch: 81, Weights: 2.000, Loss: 0.00000000
Epoch: 91, Weights: 2.000, Loss: 0.00000000


(0, 0)