# Gradient Descendent 

## Training Pipeline

In [11]:
# 1) design model (input, output size, forward pass)
# 2) Construct loss and optimizer
# 3) Training loop
#   - forward pass: compute prediction
#   - backward pass: gradients
#   - update weights

import torch
import torch.nn as nn

In [13]:
# f = w * x

# f = 2 * x

x = torch.tensor([[1],[2],[3],[4]], dtype=torch.float32)
y = torch.tensor([[2],[4],[6],[8]], dtype=torch.float32)

n_samples, n_features = x.shape
print(n_samples, n_features)

x_test = torch.tensor([5], dtype=torch.float32)
input_size = n_features
output_size = n_features
model = nn.Linear(input_size, output_size)

class LinearRegression(nn.Module):
  def __init__(self, input_dim, output_dim):
    super(LinearRegression, self).__init__()
    # define layers
    self.lin = nn.Linear(input_dim, output_dim)
  
  def forward(self, x):
    return self.lin(x)

model = LinearRegression(input_size, output_size)

print(f'Prediction before training: f(5) = {model(x_test).item():.3f}')

# training
learning_rate = 0.01
n_iters = 100

loss =  nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate) # stochastic gradient descent

for epoch in range(n_iters):
  # prediction = forward pass
  y_pred = model(x)

  # loss
  l = loss(y, y_pred)

  # gradients = backward pass
  l.backward() # dl/dw

  # update weights
  optimizer.step()

  # zero gradients
  optimizer.zero_grad()

  if epoch % 10 == 0:
    [w,b] = model.parameters()
    print(f'epoch {epoch+1}: w = {w[0][0].item():.3f}, loss = {l:.8f}')

print(f'Prediction before training: f(5) = {model(x_test).item():.3f}')


4 1
Prediction before training: f(5) = -4.877
epoch 1: w = -0.444, loss = 68.74407959
epoch 11: w = 1.459, loss = 1.82273746
epoch 21: w = 1.770, loss = 0.08875199
epoch 31: w = 1.824, loss = 0.04146870
epoch 41: w = 1.837, loss = 0.03796528
epoch 51: w = 1.843, loss = 0.03572734
epoch 61: w = 1.848, loss = 0.03364708
epoch 71: w = 1.852, loss = 0.03168865
epoch 81: w = 1.857, loss = 0.02984422
epoch 91: w = 1.861, loss = 0.02810709
Prediction before training: f(5) = 9.721
