> Implement Linear Regression from nothing, only rely on PyTorch's autograd.

In [1]:
import os
import random
import pickle
import torch

from typing import Dict, Tuple, List, Iterator, Any

# Load Data

In [2]:
linreg_data_path: str = os.path.join('..', '..', 'data', 'linreg_data', 'linreg_data.pkl')

with open(linreg_data_path, 'rb') as f:
    data: Dict[str, torch.Tensor] = pickle.load(f)

print(data.keys())
print({key:value_type for key, value_type in zip(data.keys(), map(type, data.values()))})

dict_keys(['true_w', 'true_b', 'features', 'labels'])
{'true_w': <class 'torch.Tensor'>, 'true_b': <class 'torch.Tensor'>, 'features': <class 'torch.Tensor'>, 'labels': <class 'torch.Tensor'>}


In [3]:
true_w, true_b = data['true_w'], data['true_b']
features, labels = data['features'], data['labels']

In [4]:
def data_iter(batch_size: int, features: torch.Tensor, labels: torch.Tensor) -> Iterator[Tuple[torch.Tensor, torch.Tensor]]:
    """Generate mini-batches of data."""
    num_samples: int = len(features)
    indices: list = list(range(num_samples))
    random.shuffle(indices)
    for i in range(0, num_samples, batch_size):
        batch_indices: torch.Tensor = torch.tensor(indices[i:min(i + batch_size, num_samples)])
        yield features[batch_indices], labels[batch_indices]

# Build Model Architecture

In [5]:
# Build linear regression model
def linreg(X: torch.Tensor, w: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
    """Linear regression model."""
    return X @ w + b

In [6]:
# Initialize model parameters
w: torch.Tensor = torch.normal(0, 1e-1, size=(2, 1), requires_grad=True)
b: torch.Tensor = torch.zeros(1, requires_grad=True)

# Train Model

In [7]:
# Use MSE as the loss function
def mean_squared_loss(y_hat: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
    """Mean squared error loss."""
    return ((y_hat - y) ** 2 / 2).mean()

In [8]:
# Use sgd as the optimizer
def sgd(params: List[torch.Tensor], lr: float) -> None:
    """Stochastic gradient descent."""
    with torch.no_grad():
        for param in params:
            param -= lr * param.grad
            param.grad.zero_()

In [9]:
# Setup hyper-parameters
batch_size: int = 10
lr: float = 3e-2
num_epochs: int = 5
net: Any = linreg
loss: Any = mean_squared_loss
trainer: Any = sgd

In [None]:
# Build model training procedure
for epoch in range(num_epochs):
    for X, y in data_iter(batch_size, features, labels):
        l: float = loss(net(X, w, b), y)
        l.backward()
        trainer([w, b], lr)
    with torch.no_grad():
        train_loss: float = loss(net(features, w, b), labels)
        print(f'epoch {epoch + 1}， loss {float(train_loss):f}')

epoch 1， loss 0.037818
epoch 2， loss 0.000136
epoch 3， loss 0.000049
epoch 4， loss 0.000048
epoch 5， loss 0.000049


# Verify Training Result

In [11]:
print(f"w estimated: {w.reshape(true_w.shape)}, true w: {true_w}")
print(f"b estimated: {b}, true b: {true_b}")

print(f"w and true_w difference: {torch.abs(true_w - w.reshape(true_w.shape))}")
print(f"b and true_b difference: {torch.abs(true_b - b)}")

w estimated: tensor([ 1.9999, -3.4002], grad_fn=<ViewBackward0>), true w: tensor([ 2.0000, -3.4000])
b estimated: tensor([4.2001], requires_grad=True), true b: tensor([4.2000])
w and true_w difference: tensor([0.0001, 0.0002], grad_fn=<AbsBackward0>)
b and true_b difference: tensor([0.0001], grad_fn=<AbsBackward0>)
