# Gradient Descendent with Autograd and Backpropagation

## Training Loop with Numpy

In [1]:
import numpy as np

In [2]:
#f= w * x

In [3]:
#f= 2 * x

In [4]:
X = np.array([1,2,3,4], dtype=np.float32)
Y = np.array([2,4,6,8], dtype=np.float32)

In [5]:
w=0.0

### Model Prediction

In [6]:
def forward(x):
    return w * x

### Loss

In [7]:
def loss(y, y_predicted):
    return((y_predicted-y)**2).mean()

### Gradient

In [8]:
def gradient(x,y,y_predicted):
    """
    gradient
    MSE = 1/N ((w*x)-y)**2
    dJ/dw = 1/N 2x ((w*x)-y)
    """
    return np.dot(2*x, y_predicted-y).mean() #1/N 2x ((w*x)-y)

In [9]:
# Forward Pass
print(f"prediction before training: f(5){forward(5):.3f}")

prediction before training: f(5)0.000


### Training

In [10]:
learning_rate = 0.01
n_iters = 10
for epoch in range(n_iters):
    # prediction = forward pass
    y_pred =forward(X)

    # loss
    l = loss(Y, y_pred)

    # gradients
    dw = gradient(X, Y, y_pred)
    #update weights into negative direction of gradients
    w -= learning_rate * dw

    if epoch % 1 ==0:
        print(f'epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f} ')

print(f"prediction after training: f(5){forward(5):.3f}")

epoch 1: w = 1.200, loss = 30.00000000 
epoch 2: w = 1.680, loss = 4.79999924 
epoch 3: w = 1.872, loss = 0.76800019 
epoch 4: w = 1.949, loss = 0.12288000 
epoch 5: w = 1.980, loss = 0.01966083 
epoch 6: w = 1.992, loss = 0.00314574 
epoch 7: w = 1.997, loss = 0.00050331 
epoch 8: w = 1.999, loss = 0.00008053 
epoch 9: w = 1.999, loss = 0.00001288 
epoch 10: w = 2.000, loss = 0.00000206 
prediction after training: f(5)9.999


## Training Loop with pytorch

In [11]:
import torch

In [12]:
X = torch.tensor([1,2,3,4], dtype=torch.float32)
Y = torch.tensor([2,4,6,8], dtype=torch.float32)

In [13]:
w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

In [14]:
def forward(x):
    return w * x

In [15]:
def loss(y, y_predicted):
    return((y_predicted-y)**2).mean()

In [16]:
learning_rate = 0.01
n_iters = 100
for epoch in range(n_iters):
    # prediction = forward pass
    y_pred =forward(X)

    # loss
    l = loss(Y, y_pred)

    # gradients = backward pass
    l.backward() # dl/dw gradient of loss respect to w
    
    
    #update weights. Weights calculation should not be part of computational graph
    with torch.no_grad():
        w -= learning_rate * w.grad

    # zero gradients. 
    # Empty or convert to zero the gradients
    # to avoid acumulate w in w.grad every time l.backward() is calculated
    w.grad.zero_()
    
    if epoch % 10 ==0:
        print(f'epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f} ')

print(f"prediction after training: f(5){forward(5):.3f}")

epoch 1: w = 0.300, loss = 30.00000000 
epoch 11: w = 1.665, loss = 1.16278565 
epoch 21: w = 1.934, loss = 0.04506890 
epoch 31: w = 1.987, loss = 0.00174685 
epoch 41: w = 1.997, loss = 0.00006770 
epoch 51: w = 1.999, loss = 0.00000262 
epoch 61: w = 2.000, loss = 0.00000010 
epoch 71: w = 2.000, loss = 0.00000000 
epoch 81: w = 2.000, loss = 0.00000000 
epoch 91: w = 2.000, loss = 0.00000000 
prediction after training: f(5)10.000


## Training loop with pytorch and torch.nn

**Steps**
1. Design model (input, output size, forward pass)
2. Construct loss and optimizer
3. Training loop
   - Forward pass: compute predictions
   - backward pass: gradients
   - update weights 

In [17]:
import torch
import torch.nn as nn

In [18]:
#f= w * x
#f= 2 * x
X = torch.tensor([[1],[2],[3],[4]], dtype=torch.float32)
Y = torch.tensor([[2],[4],[6],[8]], dtype=torch.float32)

In [19]:
X_test = torch.tensor([5], dtype=torch.float32)

In [20]:
n_samples, n_features = X.shape
print(n_samples, n_features)

4 1


In [21]:
input_size = n_features
output_size = n_features

In [22]:
# model = nn.Linear(input_size, output_size)
# class LinearRegression is the equivalent to model = nn.Linear(input_size, output_size) 
class LinearRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()
        #define layers
        self.lin = nn.Linear(input_dim, output_dim)
    def forward(self, x):
        return self.lin(x)
model = LinearRegression(input_size, output_size)

In [23]:
print(f"prediction before training: f({X_test.item()}){model(X_test).item():.3f}") #use item to get the float between them

prediction before training: f(5.0)5.113


In [34]:
learning_rate = 0.01
n_iters = 100
loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred =model(X)

    # loss
    l = loss(Y, y_pred)

    # gradients = backward pass
    l.backward() # dl/dw gradient of loss respect to w
    
    
    #update weights. Weights calculation should not be part of computational graph
    optimizer.step()
    
    # zero gradients. 
    optimizer.zero_grad()
    
    if epoch % 10 ==0:
        [w, b] = model.parameters()
        print(f'epoch {epoch+1}: w = {w[0][0].item():.3f}, loss = {l:.8f}')

print(f"prediction after training: f(5){model(X_test).item():.3f}")

epoch 1: w = 1.989, loss = 0.00016451
epoch 11: w = 1.990, loss = 0.00015493
epoch 21: w = 1.990, loss = 0.00014591
epoch 31: w = 1.990, loss = 0.00013742
epoch 41: w = 1.991, loss = 0.00012942
epoch 51: w = 1.991, loss = 0.00012189
epoch 61: w = 1.991, loss = 0.00011479
epoch 71: w = 1.991, loss = 0.00010811
epoch 81: w = 1.992, loss = 0.00010182
epoch 91: w = 1.992, loss = 0.00009589
prediction after training: f(5)9.984
