In [66]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import numpy as np


In [67]:
class LinearRegression(nn.Module):
    def __init__(self, input_features):
        super().__init__()
        self.weights = nn.Parameter(torch.randn(input_features, 1))
        self.bias = nn.Parameter(torch.zeros(1))

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return torch.matmul(x, self.weights) + self.bias

In [68]:
class MSELoss:
    def __call__(self, y_true, y_pred):
        return torch.mean((y_true - y_pred)**2)

    def compute_gradients(self, x, y_true, y_pred):
        N = x.shape[0]
        error = y_true - y_pred
        
        grad_weights = -2/N * torch.matmul(x.T, error)
        grad_bias = -2/N * torch.sum(error)
        return grad_weights, grad_bias

In [69]:
class MomentumOptimizer:
    def __init__(self, parameters, learning_rate=0.01, momentum_coefficient=0.9):
        self.parameters = list(parameters)
        self.learning_rate = learning_rate
        self.momentum_coefficient = momentum_coefficient
        self.velocities = [torch.zeros_like(p) for p in self.parameters]

    def step(self, gradients):
        if len(self.parameters) != len(gradients):
            raise ValueError("Number of parameters and gradients must match.")

        for i, (param, grad) in enumerate(zip(self.parameters, gradients)):
            self.velocities[i] = (self.momentum_coefficient * self.velocities[i] +
                                  self.learning_rate * grad)
            param.data -= self.velocities[i]

In [70]:
def train_model(model, x_train, y_train, x_val, y_val,
                          loss_fn, optimizer, num_epochs, batch_size):
    num_samples = x_train.shape[0]

    for epoch in range(num_epochs):
        permutation = torch.randperm(num_samples)
        x_shuffled = x_train[permutation]
        y_shuffled = y_train[permutation]

        for i in range(0, num_samples, batch_size):
            x_batch = x_shuffled[i:i+batch_size]
            y_batch = y_shuffled[i:i+batch_size]

            y_pred = model.forward(x_batch)
            
            grad_weights, grad_bias = loss_fn.compute_gradients(x_batch, y_batch, y_pred)
            gradients = [grad_weights, grad_bias]

            optimizer.step(gradients)

        train_loss = loss_fn(y_train, model.forward(x_train))
        val_loss = loss_fn(y_val, model.forward(x_val))

        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")


In [71]:
if __name__ == "__main__":
    np.random.seed(42)
    num_samples = 1000
    input_features = 2
    X = np.random.rand(num_samples, input_features) * 10
    true_weights = np.array([[2.5], [-1.5]])
    true_bias = np.array([5.0])
    y = np.dot(X, true_weights) + true_bias + np.random.randn(num_samples, 1) * 2

    split_ratio = 0.8
    split_index = int(num_samples * split_ratio)
    X_train_np, X_val_np = X[:split_index], X[split_index:]
    y_train_np, y_val_np = y[:split_index], y[split_index:]

    X_train = torch.tensor(X_train_np, dtype=torch.float32)
    y_train = torch.tensor(y_train_np, dtype=torch.float32)
    X_val = torch.tensor(X_val_np, dtype=torch.float32)
    y_val = torch.tensor(y_val_np, dtype=torch.float32)

    model = LinearRegression(input_features)
    loss_fn = MSELoss()
    
    optimizer = MomentumOptimizer(parameters=[model.weights, model.bias],
                                  learning_rate=0.001,
                                  momentum_coefficient=0.9)

    num_epochs = 200
    batch_size = 32
    print("Starting training...")
    train_model(model, X_train, y_train, X_val, y_val,
                              loss_fn, optimizer, num_epochs, batch_size)

    print("\nTraining finished.")
    print("Learned Weights:", model.weights.flatten())
    print("Learned Bias:", model.bias[0])
    print("True Weights:", true_weights.flatten())
    print("True Bias:", true_bias[0])

Starting training...
Epoch 1/200, Train Loss: 28.5707, Val Loss: 30.0011
Epoch 2/200, Train Loss: 6.5866, Val Loss: 7.0309
Epoch 3/200, Train Loss: 6.1730, Val Loss: 6.5312
Epoch 4/200, Train Loss: 6.1608, Val Loss: 6.7087
Epoch 5/200, Train Loss: 5.5742, Val Loss: 6.0088
Epoch 6/200, Train Loss: 5.2785, Val Loss: 5.6592
Epoch 7/200, Train Loss: 5.4317, Val Loss: 5.9211
Epoch 8/200, Train Loss: 4.9257, Val Loss: 5.2940
Epoch 9/200, Train Loss: 4.7660, Val Loss: 5.1180
Epoch 10/200, Train Loss: 4.6290, Val Loss: 4.8964
Epoch 11/200, Train Loss: 4.5039, Val Loss: 4.7637
Epoch 12/200, Train Loss: 4.4921, Val Loss: 4.7040
Epoch 13/200, Train Loss: 4.3659, Val Loss: 4.6736
Epoch 14/200, Train Loss: 4.3106, Val Loss: 4.5036
Epoch 15/200, Train Loss: 4.1872, Val Loss: 4.4338
Epoch 16/200, Train Loss: 4.1339, Val Loss: 4.3636
Epoch 17/200, Train Loss: 4.1485, Val Loss: 4.3202
Epoch 18/200, Train Loss: 4.1709, Val Loss: 4.4720
Epoch 19/200, Train Loss: 4.0235, Val Loss: 4.2293
Epoch 20/200, Tra