The training pipeline typically consists of three steps:
- 1) Design the model with (input, output size, forward pass)
- 2) Construct loss and optimizer
- 3) Training loop (iterate through this a few times until we are done): 
    - forward pass: compute the prediction
    - backward pass: compute gradients
    - update weights
  

In [3]:
# continuing the code from the "PyTorch_Gradient_Descent" file
import torch
import torch.nn as nn # importing the neural network module

X = torch.tensor([[1],[2],[3],[4]], dtype=torch.float32)
Y = torch.tensor([[2],[4],[6],[8]], dtype=torch.float32)

n_samples, n_features = X.shape
print(n_samples, n_features)


# model needs input and output sizes
input_size = n_features
output_size = n_features
model = nn.Linear(input_size, output_size) # but now we need X and Y to have a 2x2 shape

X_test = torch.tensor([5], dtype=torch.float32)
print(f'Prediction before training is f(5) = {model(X_test).item():.3f}') # passing in the test sample into the model
# .item() used to get the actual value of the one item that is there

# Training parameters
learning_rate = 0.01
n_iters = 100

loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) # instead of weights we just need model.parameters()

for epoch in range(n_iters):
  # first we need the prediction (through the forward pass)
  y_pred = model(X)

  # now we need the loss
  l = loss(Y, y_pred)

  # now we need to do the backward pass
  l.backward() # this will compute the gradient of our loss w.r.t 'w'

  optimizer.step() #does an optimization step instead of the manual weight updating we did previously

  # zero the gradients
  optimizer.zero_grad()

  if epoch % 10 == 0: # printing a message at every other epoch
    [w,b] = model.parameters() # the model weights along with the bias
    print(f'epoch {epoch+1}: w = {w[0][0].item():.3f}, loss = {l:.8f}') # get the first weight with w[0][0]

print(f'Prediction after training is f(5) = {model(X_test).item():.3f}') # passing in the test sample into the model

4 1
Prediction before training is f(5) = 0.188
epoch 1: w = 0.193, loss = 26.51708603
epoch 11: w = 1.379, loss = 0.90803540
epoch 21: w = 1.579, loss = 0.23254812
epoch 31: w = 1.621, loss = 0.20290354
epoch 41: w = 1.637, loss = 0.19067675
epoch 51: w = 1.648, loss = 0.17956752
epoch 61: w = 1.659, loss = 0.16911556
epoch 71: w = 1.669, loss = 0.15927213
epoch 81: w = 1.679, loss = 0.15000163
epoch 91: w = 1.688, loss = 0.14127083
Prediction after training is f(5) = 9.375


In [5]:
# In our model above, we just had one layer (the 'nn.Linear()'), but for more complex models consisting of multiple
# layers, we can do the following:

# create a custom class for our model representation:

class LinearRegression(nn.Module): # derived from nn.Module
  def __init__(self, input_dim, output_dim):
    super(LinearRegression, self).__init__() # calling the super-constructor
    # now define layers:
    self.lin = nn.Linear(input_dim, output_dim)
  
  def forward(self, x):
    return self.lin(x)
  

model = LinearRegression(input_size, output_size)

# Now let's test it out (using the code we wrote for the last model above)

X = torch.tensor([[1],[2],[3],[4]], dtype=torch.float32)
Y = torch.tensor([[2],[4],[6],[8]], dtype=torch.float32)
n_samples, n_features = X.shape

# model needs input and output sizes
input_size = n_features
output_size = n_features

X_test = torch.tensor([5], dtype=torch.float32)
print(f'Prediction before training is f(5) = {model(X_test).item():.3f}')

# Training parameters
learning_rate = 0.01
n_iters = 100

loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(n_iters):
  y_pred = model(X)

  l = loss(Y, y_pred)

  l.backward() 
  optimizer.step() 
  optimizer.zero_grad()

  if epoch % 10 == 0: 
    [w,b] = model.parameters() 
    print(f'epoch {epoch+1}: w = {w[0][0].item():.3f}, loss = {l:.8f}')

print(f'Prediction after training is f(5) = {model(X_test).item():.3f}')

Prediction before training is f(5) = -3.312
epoch 1: w = -0.310, loss = 52.03084564
epoch 11: w = 1.349, loss = 1.50452352
epoch 21: w = 1.624, loss = 0.18806896
epoch 31: w = 1.676, loss = 0.14532833
epoch 41: w = 1.693, loss = 0.13604680
epoch 51: w = 1.703, loss = 0.12810701
epoch 61: w = 1.712, loss = 0.12064995
epoch 71: w = 1.720, loss = 0.11362746
epoch 81: w = 1.729, loss = 0.10701378
epoch 91: w = 1.737, loss = 0.10078500
Prediction after training is f(5) = 9.472
