Call the Libraries & GPU

In [None]:
import torch
import torch.nn as nn
import numpy as np

GPU = False
if torch.cuda.is_available():
    GPU = torch.device("cuda")

Create a Tensor and basic operations

In [None]:
x = torch.tensor([[1, 1],[1, 1]], dtype=torch.float64)
print(x)
print(x.dtype)
y = torch.rand(2, 2)
print(y)
print(f'{y.dtype}\n')
#z = x + y
#print(z)

#y = y.add(x)
#y = torch.add(x, y) # add(+), sub(-), mul(*), div(/)
y.add_(x)
print(y)

Select and view certain stuff

In [None]:
x = torch.rand(5, 4)
print(x, x.size())
print(x[:, 0]) # print 1st column
print(x[1, :]) # print 2nd row
print(x[0, 0])
print(x[0, 0].item()) # print a direct value

y = x.view(5*4) # re-shapes the tensor into 1D
print(y, y.size())

y = x.view(-1, 10) # calculate the other value
print(y, y.size())

Convert from torch to numpy and vice-versa

In [None]:
x = torch.rand(5)
print(x, type(x))
y = x.numpy() # y shares the location with x, changing x will change y as well
print(f'{y} {type(y)}\n')

x = np.ones(5)
print(x, type(x))
y = torch.from_numpy(x) # y shares the location with x, changing x will change y as well
print(y, type(y))

Create a Tensor on the GPU

In [None]:
x = torch.rand(5, device=GPU) # cannot be converted to numpy
y = torch.rand(5)
y = y.to(GPU)
z = x + y
z = z.to("cpu")

try:
    x = x.numpy()
except TypeError:
    print("Cannot covert to GPU Tensor Numpy")

try:
    z = z.numpy()
    print(z, type(z))
except TypeError:
    print("Cannot covert to GPU Tensor Numpy")

AutoGrad

In [None]:
x = torch.randn(3, requires_grad=True)
print(x)
# ⌄⌄⌄ These methods will remove grad
# x.requires_grad_(False)
# x.detach_()
# with torch.no_grad():
#   y = x + 2

y = x + 2
print(y)

z = y * y * 2
z = z.mean()
print(z)

z.backward() # dz/dx
print(x.grad)

Demo Dummy Function

In [None]:
weights = torch.randn(4, requires_grad=True)

for epoch in range(3):
    model_output = (weights * 3).sum()
    model_output.backward()
    print(weights.grad)

    weights.grad.zero_() # we have to clear the grad

Backpropagation

In [None]:
x = torch.tensor(1.0)
y = torch.tensor(2.0)

w = torch.tensor(1.0, requires_grad=True)

# forward pass and compute the loss
y_hat = w * x
loss = (y_hat - y)**2
print(loss)

# backward pass
loss.backward()
print(w.grad)

### update weights
### next forward and backward

Manual Gradient Descent

In [None]:
# f = w * x
# f = 2 * x

X = np.array([1, 2, 3, 4], dtype=np.float32)
Y = np.array([2, 4, 6, 8], dtype=np.float32)

w = 0.0

# model prediction
def forward(x):
    return w * x

# loss = MSE
def loss(y, y_predicted):
    return ((y_predicted - y)**2).mean()

# gradient
# MSE = 1/N * (w*x - y)**2
# dJ/dw = 1/N * 2x * (w*x - y)
def gradiant(x, y, y_predicted):
    return np.dot(2*x, y_predicted - y).mean()

print(f'Prediction before training: f(5) = {forward(5):.3f}')

# Training
learning_rate = 0.01
n_iters = 20

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)

    #loss
    L = loss(Y, y_pred)

    # gradients
    dw = gradiant(X, Y, y_pred)

    # update weights
    w -= learning_rate * dw

    if epoch % 2 == 0:
        print(f'epoch {epoch+1}: w = {w:.3f}, loss = {L:.8f}')

print(f'Prediction after training: f(5) = {forward(5):.3f}')

Automatic Gradient Descent

In [None]:
# f = w * x
# f = 2 * x

X = torch.tensor([1, 2, 3, 4], dtype=torch.float32)
Y = torch.tensor([2, 4, 6, 8], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

# model prediction
def forward(x):
    return w * x

# loss = MSE
def loss(y, y_predicted):
    return ((y_predicted - y)**2).mean()

print(f'Prediction before training: f(5) = {forward(5):.3f}')

# Training
learning_rate = 0.01
n_iters = 80

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)

    #loss
    L = loss(Y, y_pred)

    # gradients = backward pass
    L.backward() # dL/dw

    # update weights
    with torch.no_grad():
        w -= learning_rate * w.grad

    # !!! ZERO the gradiants !!!
    w.grad.zero_()

    if epoch % int(n_iters/10) == 0:
        print(f'epoch {epoch+1}: w = {w:.3f}, loss = {L:.8f}')

print(f'Prediction after training: f(5) = {forward(5):.3f}')

Training Pipeline (Manual Prediction)

In [None]:
# 1.) Design model (input, output size, forward pass)
# 2.) Construct loss and optimizer
# 3.) Training loop
#       - forward pass: compute prediction
#       - backward pass: gradients
#       - update weights

# f = w * x
# f = 2 * x

X = torch.tensor([1, 2, 3, 4], dtype=torch.float32)
Y = torch.tensor([2, 4, 6, 8], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

# model prediction
def forward(x):
    return w * x

print(f'Prediction before training: f(5) = {forward(5):.3f}')

# Training
learning_rate = 0.01
n_iters = 80

loss = nn.MSELoss()
optimizer = torch.optim.SGD([w], lr=learning_rate)

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)

    #loss
    L = loss(Y, y_pred)

    # gradients = backward pass
    L.backward() # dL/dw

    # update weights
    optimizer.step()

    # !!! ZERO the gradiants !!!
    optimizer.zero_grad()

    if epoch % int(n_iters/10) == 0:
        print(f'epoch {epoch+1}: w = {w:.3f}, loss = {L:.8f}')

print(f'Prediction after training: f(5) = {forward(5):.3f}')

Training Pipeline (Automatic Prediction)

In [70]:
# 1.) Design model (input, output size, forward pass)
# 2.) Construct loss and optimizer
# 3.) Training loop
#       - forward pass: compute prediction
#       - backward pass: gradients
#       - update weights

# f = w * x
# f = 2 * x

X = torch.tensor([[1], [2], [3], [4]], dtype=torch.float32) #, device=GPU)
Y = torch.tensor([[2], [4], [6], [8]], dtype=torch.float32) #, device=GPU)

X_test = torch.tensor([5], dtype=torch.float32) #, device=GPU)

n_samples, n_features = X.shape
print(n_samples, n_features)

input_size = n_features
output_size = n_features

#model = nn.Linear(input_size, output_size) #, device=GPU)
class LinearRegression(nn.Module):

    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()
        # define layers
        self.lin = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        return self.lin(x)

model = LinearRegression(input_size, output_size)

print(f'Prediction before training: f(5) = {model(X_test).item():.3f}')

# Training
learning_rate = 0.001
n_iters = 30000

loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = model(X)

    #loss
    L = loss(Y, y_pred)

    # gradients = backward pass
    L.backward() # dL/dw

    # update weights
    optimizer.step()

    # !!! ZERO the gradiants !!!
    optimizer.zero_grad()

    if epoch % int(n_iters/10) == 0:
        [w, b] = model.parameters()
        print(f'epoch {epoch+1}: w = {w[0][0].item():.3f}, loss = {L:.8f}')

print(f'Prediction after training: f(5) = {model(X_test).item():.3f}')


4 1
Prediction before training: f(5) = 3.121
epoch 1: w = 0.570, loss = 13.21897507
epoch 3001: w = 1.893, loss = 0.01659381
epoch 6001: w = 1.956, loss = 0.00275232
epoch 9001: w = 1.982, loss = 0.00045657
epoch 12001: w = 1.993, loss = 0.00007581
epoch 15001: w = 1.997, loss = 0.00001261
epoch 18001: w = 1.999, loss = 0.00000211
epoch 21001: w = 2.000, loss = 0.00000035
epoch 24001: w = 2.000, loss = 0.00000006
epoch 27001: w = 2.000, loss = 0.00000001
Prediction after training: f(5) = 10.000
