In [2]:
import numpy as np

In [3]:
X = np.array([1,2,3,4], dtype=np.float32)
Y = np.array([2,4,6,8], dtype=np.float32)

w = 0.0

In [4]:
def forward(x):
    return w*x

In [5]:
def loss(y,y_pred):
    """MSE"""
    return ((y-y_pred)**2).mean()

In [6]:
def gradient(x,y,y_pred):
    return np.dot(2*x, y_pred-y).mean()

In [7]:
print(f"prediction before training: f(5)={forward(5):.3f}")

prediction before training: f(5)=0.000


In [9]:
lr=.01
n_iter=10
for epoch in range(n_iter):
    y_pred = forward(X)
    l = loss(Y, y_pred)
    
    dw = gradient(X,Y,y_pred)
    
    w -= lr * dw
    
    if epoch % 2 == 0:
        print(f"epoch {epoch}: w = {w:.3f}, loss = {l:.8f}")

epoch 0: w = 1.200, loss = 30.00000000
epoch 2: w = 1.872, loss = 0.76800019
epoch 4: w = 1.980, loss = 0.01966083
epoch 6: w = 1.997, loss = 0.00050331
epoch 8: w = 1.999, loss = 0.00001288


In [10]:
print(f"prediction after training: f(5)={forward(5):.3f}")

prediction after training: f(5)=9.999


### Let's do the same thing with pytorch

In [11]:
import torch

In [12]:
X = torch.tensor([1,2,3,4], dtype=torch.float32)
Y = torch.tensor([2,4,6,8], dtype=torch.float32)
w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

In [13]:
print(f"prediction before training: f(5)={forward(5):.3f}")

prediction before training: f(5)=0.000


In [14]:
## forward and loss are the same

for epoch in range(n_iter):
    y_pred = forward(X)
    l = loss(Y, y_pred)
    l.backward() # dl/dw
    with torch.no_grad():
        w -= lr*w.grad
    
    w.grad.zero_()
    
    if epoch % 2 == 0:
        print(f"epoch {epoch}: w = {w:.3f}, loss = {l:.8f}")

epoch 0: w = 0.300, loss = 30.00000000
epoch 2: w = 0.772, loss = 15.66018772
epoch 4: w = 1.113, loss = 8.17471695
epoch 6: w = 1.359, loss = 4.26725292
epoch 8: w = 1.537, loss = 2.22753215


In [15]:
print(f"prediction after training: f(5)={forward(5):.3f}")

prediction after training: f(5)=8.031


### ML Pipeline

1. Design model (input_size, output_size, froward steps)
2. Construct loss and optimizer
3. Training loop
    - forward pass: compute predictions
    - backward pass: compute gradients
    - update weights

In [23]:
import torch.nn as nn

In [34]:
X = torch.tensor([[1], [2], [3], [4]], dtype=torch.float32)
Y = torch.tensor([[2], [4], [6], [8]], dtype=torch.float32)

X_test = torch.tensor([5], dtype=torch.float32)

n_samples, n_features = X.shape
print(f"samples: {n_samples}, features: {n_features}")

samples: 4, features: 1


In [35]:
model = nn.Linear(1, 1)

In [36]:
loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

In [39]:
for epoch in range(1000):
    # forward pass
    y_pred = model(X)
    
    # compute loss
    l = loss(Y, y_pred)
    
    # backward pass
    l.backward() # dl/dw
    
    # update weights
    optimizer.step()
    
    # empty gradients
    optimizer.zero_grad()
    
    if epoch % 2 == 0:
        [w, b] = model.parameters()
        print(f"epoch {epoch}: w = {w[0].item():.3f}, loss = {l:.8f}")

epoch 0: w = 1.614, loss = 1.13462031
epoch 2: w = 1.703, loss = 0.55323672
epoch 4: w = 1.765, loss = 0.27323592
epoch 6: w = 1.808, loss = 0.13834271
epoch 8: w = 1.838, loss = 0.07331493
epoch 10: w = 1.859, loss = 0.04192632
epoch 12: w = 1.874, loss = 0.02673458
epoch 14: w = 1.884, loss = 0.01934218
epoch 16: w = 1.892, loss = 0.01570586
epoch 18: w = 1.897, loss = 0.01387885
epoch 20: w = 1.901, loss = 0.01292392
epoch 22: w = 1.904, loss = 0.01238972
epoch 24: w = 1.906, loss = 0.01205903
epoch 26: w = 1.908, loss = 0.01182712
epoch 28: w = 1.909, loss = 0.01164370
epoch 30: w = 1.910, loss = 0.01148447
epoch 32: w = 1.911, loss = 0.01133772
epoch 34: w = 1.912, loss = 0.01119783
epoch 36: w = 1.912, loss = 0.01106204
epoch 38: w = 1.913, loss = 0.01092905
epoch 40: w = 1.914, loss = 0.01079824
epoch 42: w = 1.914, loss = 0.01066925
epoch 44: w = 1.915, loss = 0.01054192
epoch 46: w = 1.915, loss = 0.01041619
epoch 48: w = 1.916, loss = 0.01029198
epoch 50: w = 1.916, loss = 0.

epoch 838: w = 1.992, loss = 0.00009016
epoch 840: w = 1.992, loss = 0.00008909
epoch 842: w = 1.992, loss = 0.00008803
epoch 844: w = 1.992, loss = 0.00008698
epoch 846: w = 1.992, loss = 0.00008594
epoch 848: w = 1.992, loss = 0.00008492
epoch 850: w = 1.992, loss = 0.00008390
epoch 852: w = 1.992, loss = 0.00008290
epoch 854: w = 1.992, loss = 0.00008192
epoch 856: w = 1.993, loss = 0.00008094
epoch 858: w = 1.993, loss = 0.00007997
epoch 860: w = 1.993, loss = 0.00007902
epoch 862: w = 1.993, loss = 0.00007808
epoch 864: w = 1.993, loss = 0.00007715
epoch 866: w = 1.993, loss = 0.00007623
epoch 868: w = 1.993, loss = 0.00007532
epoch 870: w = 1.993, loss = 0.00007442
epoch 872: w = 1.993, loss = 0.00007354
epoch 874: w = 1.993, loss = 0.00007266
epoch 876: w = 1.993, loss = 0.00007179
epoch 878: w = 1.993, loss = 0.00007094
epoch 880: w = 1.993, loss = 0.00007009
epoch 882: w = 1.993, loss = 0.00006926
epoch 884: w = 1.993, loss = 0.00006843
epoch 886: w = 1.993, loss = 0.00006761


In [40]:
print(f"prediction after training: f(5)={model(X_test).item():.3f}")

prediction after training: f(5)=9.990
