https://www.youtube.com/watch?v=VVDHU_TWwUg


# General pytorch training pipeline

1. Design model (input_size,output_size,forward pass)
2. Construct loss and optimizer
3. Training loop

- forward pass: compute prediction
- backward pass: gradients
- update weights
- repeat


In [1]:
import torch
import torch.nn as nn

In [2]:
# linear regression
# f = w*x (no bias term for simplicity)
# f = 2*x

X = torch.tensor([[1], [2], [3], [4]], dtype=torch.float32)
Y = torch.tensor([[2], [4], [6], [8]], dtype=torch.float32)  # Y=2X f=2X
X_test = torch.tensor([5], dtype=torch.float32)

n_samples, n_features = X.shape
print("n_samples", n_samples, "n_features", n_features)

input_size = n_features  # 1
output_size = n_features  # 1


# model = nn.Linear(input_size, output_size)


class LinearRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()
        # define layers
        self.lin = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        return self.lin(x)


model = LinearRegression(input_size, output_size)

# w = torch.tensor(
#     0.0, dtype=torch.float32, requires_grad=True
# )  # requires_grad=True replaces the numpy gradient function :)


# # model prediction
# def forward(x):
#     return w * x


print(f"Prediction before training: f(5) = {model(X_test).item():.3f} (should be 10)")

n_samples 4 n_features 1
Prediction before training: f(5) = -2.222 (should be 10)


In [3]:
# Training
learning_rate = 0.01
n_iters = 100

loss = nn.MSELoss()
optimizer = torch.optim.SGD(
    model.parameters(), lr=learning_rate
)  # SGB - Stochastic Gradient Descent

for epoch in range(n_iters):
    # prediction = forward pass
    # y_pred = forward(X)
    y_pred = model(X)

    # loss
    l = loss(Y, y_pred)

    # gradient
    l.backward()  # dl/dw calculates w.grad

    # update weights
    optimizer.step()

    # zero gradients
    optimizer.zero_grad()

    if epoch % 10 == 0:
        [w, b] = model.parameters()
        print(
            f"epoch {epoch + 1}: w = {w[0][0].item():.3f}, loss = {l:.8f}, b (bias) = {b.item():.3f}"
        )
        print(
            f"Prediction during training: f(5) = {model(X_test).item():.3f} (should be 10)"
        )

print(f"Prediction after training: f(5) = {model(X_test).item():.3f} (should be 10)")

epoch 1: w = -0.081, loss = 44.73395538, b (bias) = 0.138
Prediction during training: f(5) = -0.268 (should be 10)
epoch 11: w = 1.457, loss = 1.24575353, b (bias) = 0.636
Prediction during training: f(5) = 7.919 (should be 10)
epoch 21: w = 1.710, loss = 0.11546580, b (bias) = 0.698
Prediction during training: f(5) = 9.248 (should be 10)
epoch 31: w = 1.757, loss = 0.08137792, b (bias) = 0.690
Prediction during training: f(5) = 9.474 (should be 10)
epoch 41: w = 1.770, loss = 0.07593330, b (bias) = 0.672
Prediction during training: f(5) = 9.523 (should be 10)
epoch 51: w = 1.778, loss = 0.07149521, b (bias) = 0.652
Prediction during training: f(5) = 9.542 (should be 10)
epoch 61: w = 1.785, loss = 0.06733333, b (bias) = 0.633
Prediction during training: f(5) = 9.556 (should be 10)
epoch 71: w = 1.791, loss = 0.06341418, b (bias) = 0.614
Prediction during training: f(5) = 9.570 (should be 10)
epoch 81: w = 1.797, loss = 0.05972315, b (bias) = 0.596
Prediction during training: f(5) = 9.