In [1]:
import torch # entire torch library
import torch.nn as nn
import numpy as np
# import torch.nn as nn # all the neural network modules and loss functions
# import torch.optim as optim # all the optimization algorithms
# import torch.nn.functional as F # functions that do not have parameters like activation functions
# from torch.utils.data import DataLoader # helps us create minibatches to train on
# import torchvision.datasets as datasets # used to import the MNIST dataset
# import torchvision.transforms as transforms # contains tranformations we can perform on the dataset

# Tensor Basics

In [None]:
# create a 1-d vector with 3 empty elements
x = torch.empty(3)
print(x)

# like a 2-d vector with 6 empty elements
x = torch.empty(2, 3) 
print(x)

In [None]:
# create a randomly-filled tensor
x = torch.rand(2,2)
# create a zero-filled tensor
x = torch.zeros(2,2)
# create an empty tensor
x = torch.empty(2,2)

# change the datatype of the tensor
x = torch.ones(2,2, dtype=torch.int)
print(x.dtype)

In [None]:
test = torch.rand(4, 4)
# view lets you reshape the tensor
test2 = test.view(16)
print(test2)
print(test.view(16,1))

In [None]:
# requires_grad tells pytorch that the gradient will need to be calculated for the value at a certain time
x = torch.ones(5, requires_grad=True)
print(x)

# Autograd

In [None]:
x = torch.ones(3, requires_grad=True)
print(x)

y = x + 2
print(y)
z = y * y * 2
# z = z.mean()
print(z)
v = torch.tensor([0.1, 1.0, 10], dtype=torch.float32)
z.backward(v) # calculates the gradient of z with respect to x
x.grad # this is where the gradients are stored


In [None]:
weights = torch.ones(4, requires_grad=True)

for epoch in range(1):
    model_output = (weights * 3).sum()
    model_output.backward()
    
    print(weights.grad)

# Backpropagation

In [None]:
x = torch.tensor(1.0)
y = torch.tensor(2.0)
w = torch.tensor(1.0, requires_grad=True) # interested in the gradient

# forward pass and compute the loss
y_hat = w * x
loss = (y_hat - y) ** 2
print(loss)

# backward pass
loss.backward() # this is the whole gradient computation
print(w.grad) # this is the first gradient afte the whole forward and backward pass


### update weights
### next forward and backward pass


# Gradient Descent

In [None]:
# IMPLEMENT LINEAR REGRESSION MANUALLY FIRST:

# f = w * x
# In our example, let's say f = 2 * x
# training sample: X

X = np.array([1, 2, 3, 4], dtype=np.float32)
Y = np.array([2, 4, 6, 8], dtype=np.float32)
w = 0.0

# model prediction
def forward(x):
    return w * x

# loss = MSE
def loss(y, y_predicted):
    return ((y_predicted - y) ** 2).mean()
    
# gradient (compute gradient of loss w/r/t parameters)

# MSE = 1 / N * (w * x - y)**2
# dJ/dw = 1 / N * 2x * (wx - y)
def gradient(x, y, y_predicted):
    return np.dot(2*x, y_predicted - y).mean()


print(f"Prediction before training: f(5) = {forward(5): .3f}")

# Training
learning_rate = 0.01
n_iters = 10

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)
    # loss
    l = loss(Y, y_pred)
    
    # gradients
    dw = gradient(X, Y, y_pred)
    
    # update weights
    w -= learning_rate * dw
    
    # print
    if epoch % 1 == 0:
        print(f'epoch {epoch + 1}: w = {w: .3f}, loss = {l:.8f}')
        

print(f"Prediction after training: f(5) = {forward(5): .3f}")


In [None]:
# IMPLEMENT LINEAR REGRESSION MANUALLY BUT USE PYTORCH GRADIENT:

# f = w * x
# In our example, let's say f = 2 * x
# training sample: X

X = torch.tensor([1, 2, 3, 4], dtype=torch.float32)
Y = torch.tensor([2, 4, 6, 8], dtype=torch.float32)
w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

# model prediction
def forward(x):
    return w * x

# loss = MSE
def loss(y, y_predicted):
    return ((y_predicted - y) ** 2).mean()
    

print(f"Prediction before training: f(5) = {forward(5): .3f}")

# Training
learning_rate = 0.01
n_iters = 20

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)
    
    # loss
    l = loss(Y, y_pred)
    
    # gradients = backward_pass
    l.backward() # dl / dw
    
    
    # update weights (not part of computational graph)
    with torch.no_grad():
        w -= learning_rate * w.grad
        
    # zero gradients (so they don't accumulate in w.grad)
    w.grad.zero_()
        
    # print
    if epoch % 1 == 0:
        print(f'epoch {epoch + 1}: w = {w: .3f}, loss = {l:.8f}')
        

print(f"Prediction after training: f(5) = {forward(5): .3f}")


In [None]:
# Pytorch typical work-flow
# step 1.) Design model (input, output size, forward pass)
# step 2.) construct loss and optimizer
# step 3.) Training Loop
    # - forward pass: compute prediction
    # - backward pass: gradients (back propogation)
    # - update weights

In [7]:
# IMPLEMENT LINEAR REGRESSION with all pytorch pieces:

# f = w * x
# In our example, let's say f = 2 * x
# training sample: X

X = torch.tensor([[1], [2], [3], [4]], dtype=torch.float32)
Y = torch.tensor([[2], [4], [6], [8]], dtype=torch.float32)

X_test = torch.tensor([5], dtype=torch.float32)

n_samples, n_features = X.shape
input_size = n_features # 1
output_size = n_features # 1

# model = nn.Linear(input_size, output_size) # only 1 layer but typically need to come up with model here

# if we needed a custom model:

class LinearRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()
        # define layers
        self.lin = nn.Linear(input_dim, output_dim)
    def forward(self, x):
        return self.lin(x)
    
model = LinearRegression(input_size, output_size)
    

print(f"Prediction before training: f(5) = {model(X_test).item(): .3f}")

# Training
learning_rate = 0.01
n_iters = 100

loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = model(X)
    
    # loss
    l = loss(Y, y_pred)
    
    # gradients = backward_pass
    l.backward() # dl / dw
    
    # update weights
    optimizer.step()
   
    # zero gradients (so they don't accumulate in w.grad)
    optimizer.zero_grad()
        
    # print
    if epoch % 10 == 0:
        [w, b] = model.parameters()
        print(f'epoch {epoch + 1}: w = {w[0][0].item(): .3f}, loss = {l:.8f}')
        
print(f"Prediction after training: f(5) = {model(X_test).item(): .3f}")

Prediction before training: f(5) = -1.097
