In [None]:
"""
We will continue where we left off, we will now replace the manually computed loss and parameter
updates using the loss and optimizr classes in PyTorch
Then we will alos replace the manually computed model prediction by implementing a PyTorch model
Then, PyTorch can do the complete pipeline for us 
"""

In [1]:
import numpy as np
import torch

In [4]:
# Now we are doing the things detailed above

"""
General training pipeline in PyTorch:
    1. Design model (input_size, output_size, foward_pass(operations / layers))
    2. Constructloss and optimizer
    3. Training loop:
        - Forward pass: compute prediction
        - Backward pass: gradients
        - Update weights 
"""

import torch.nn as nn # Neural Network module

# f = w * x
# f = 2 * x (w=2)
# We are taking the real value of W to be 2???

# Training samples
X = torch.tensor([1, 2, 3, 4], dtype=torch.float32) # Training input
Y = torch.tensor([2, 4, 6, 8], dtype=torch.float32) # 2 * x, Training output

# w also has to be a tensor in this model
# Since we are interested in the gradient of our loss with respect ot w, we need to specify
# ... that w requires the gradient calculattionn
w = torch.tensor(0.0, dtype=torch.float32, requires_grad = True) # Initially 0

# Model Prediciton
def forward(x):
    # Forward pass to follow the conventions of pytorch
    return w * x

# We are no longer manually calculating loss

# Remove manually computed gradient

# Main program
print(f'Prediction before training: f(5) =  {forward(5):.3f}') # f(5) should be 10

# Training

learning_rate = 0.01
n_iters = 100 # Number of iteration

loss = nn.MSELoss() # implemented in PyTorch, MSELoss = mean squared error loss
# Exactly what we manually implpemeneted before

optimizer = torch.optim.SGD([w], lr = learning_rate) 
# SGD = stochasitc gadient descent
# Parameters are the weights and the learning rage

# Training loop

for epoch in range(n_iters):
    # Prediciton = forward pass
    y_pred = forward(X)
    
    # Loss
    l = loss(Y, y_pred) # Now calsl nn.MSELoss()
    # Could be written as l = nn.MSELoss(Y, y_pred)
    
    # Gradients = backward pass (remember: back propogation)
    l.backward() # Calculates dLoss/dWeights (dl/dw)
    # PyTorch does all the calculations for us
    
    # Update weights
    # We no longer need to do this manually
    optimizer.step()
    
    """
    We also need to zero our gradients, because otherwise l.backward() will keep accumulating
    the w.grad attribute
    """
    optimizer.zero_grad() # We still have to empty our graditns though
    
    if(epoch % 10 == 0):
        print(f'Epoch {epoch  + 1}: w = {w:.3f}, loss = {l:.8f}')
    
print(f'Prediction after training: f(5) =  {forward(5):.3f}')
"""
It ends up not as good as the one above becaus the back propogation is not as exact as the numerical
gradient compuation
"""

Prediction before training: f(5) =  0.000
Epoch 1: w = 0.300, loss = 30.00000000
Epoch 11: w = 1.665, loss = 1.16278565
Epoch 21: w = 1.934, loss = 0.04506890
Epoch 31: w = 1.987, loss = 0.00174685
Epoch 41: w = 1.997, loss = 0.00006770
Epoch 51: w = 1.999, loss = 0.00000262
Epoch 61: w = 2.000, loss = 0.00000010
Epoch 71: w = 2.000, loss = 0.00000000
Epoch 81: w = 2.000, loss = 0.00000000
Epoch 91: w = 2.000, loss = 0.00000000
Prediction after training: f(5) =  10.000


'\nIt ends up not as good as the one above becaus the back propogation is not as exact as the numerical\ngradient compuation\n'

In [8]:
# Last step is to replace the forward() method with a PyTorch model

import torch.nn as nn # Neural Network module

# f = w * x
# f = 2 * x (w=2)
# We are taking the real value of W to be 2???

# Training samples

"""
We need to change this for the model
Needs to be a 2d array where the number of rows is the number of samples and for each row we have
the features
"""
X = torch.tensor([[1], [2], [3], [4]], dtype=torch.float32) # Training input
Y = torch.tensor([[2], [4], [6], [8]], dtype=torch.float32) # 2 * x, Training output

n_samples, n_features = X.shape 
# n_samples (amount of inputs) = 4
# n_features (amount of numbers in each input) = 1
#     - So like if instead of [1] it was [1,2,3] the n_features would be 3

# Weights removed form here

# Model Prediciton
# Now we do this via PyTorch
# Linear is very trivial - only one layer
input_size = n_features
output_size = n_features

# BELOW IS FOR THE DEFAULT ONE, WE CREATE A CUSTOM ONE BELOW THAT
# model = nn.Linear(input_size, output_size) # Parameters are input size and output size of features

# Say we need a custom model
# We do it here:
class LinearRegression(nn.Module):
    # Custom linear regression model
    # This example actually does exactly the same as the normal linear regression model
    # ... but just here as an example of how we can edit stuff
    
    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__() # This is how we call the "super" constuctor
        # ... Inherits from linear regression
        
        # Define layers
        self.lin = nn.Linear(input_dim, output_dim) # Linear layer
        
    def forward(self, x):
        return self.lin(x)
model = LinearRegression(input_size, output_size)

# We are no longer manually calculating loss

# Remove manually computed gradient

# Main program
# Look at how this is called now, instead of forwards()
X_test = torch.tensor([5], dtype=torch.float32) # Test tensor - needed for testing
print(f'Prediction before training: f(5) =  {model(X_test).item():.3f}') # f(5) should be 10
# .item() called since output is only of size 1, we want the actual value

# Training

learning_rate = 0.01
n_iters = 100 # Number of iteration

loss = nn.MSELoss() # implemented in PyTorch, MSELoss = mean squared error loss
# Exactly what we manually implpemeneted before

optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate) # Weights is now model.parameters()
# SGD = stochasitc gadient descent
# Parameters are the weights and the learning rage

# Training loop

for epoch in range(n_iters):
    # Prediciton = forward pass
    y_pred = model(X) # Prediction now just calls model
    
    # Loss
    l = loss(Y, y_pred) # Now calsl nn.MSELoss()
    # Could be written as l = nn.MSELoss(Y, y_pred)
    
    # Gradients = backward pass (remember: back propogation)
    l.backward() # Calculates dLoss/dWeights (dl/dw)
    # PyTorch does all the calculations for us
    
    # Update weights
    # We no longer need to do this manually
    optimizer.step()
    
    """
    We also need to zero our gradients, because otherwise l.backward() will keep accumulating
    the w.grad attribute
    """
    optimizer.zero_grad() # We still have to empty our graditns though
    
    if(epoch % 10 == 0):
        [w, b]  = model.parameters() # Need to unpack them to print them now
        # w = weights
        # b = bias ???
        # These are like lists of lists so we need to get it like w below
        print(f'Epoch {epoch  + 1}: w = {w[0][0].item():.3f}, loss = {l:.8f}')
    
print(f'Prediction after training: f(5) =  {model(X_test).item():.3f}') # f(5) should be 10
"""
It ends up not as good as the one above becaus the back propogation is not as exact as the numerical
gradient compuation
"""

Prediction before training: f(5) =  -4.271
Epoch 1: w = -0.314, loss = 64.07299805
Epoch 11: w = 1.522, loss = 1.68022776
Epoch 21: w = 1.821, loss = 0.06466764
Epoch 31: w = 1.872, loss = 0.02163561
Epoch 41: w = 1.883, loss = 0.01936034
Epoch 51: w = 1.888, loss = 0.01820717
Epoch 61: w = 1.891, loss = 0.01714674
Epoch 71: w = 1.895, loss = 0.01614869
Epoch 81: w = 1.898, loss = 0.01520874
Epoch 91: w = 1.901, loss = 0.01432352
Prediction after training: f(5) =  9.801


'\nIt ends up not as good as the one above becaus the back propogation is not as exact as the numerical\ngradient compuation\n'