In [8]:
import numpy as np

class LinearRegressionGD:
    
    def __init__(self):
        self.weights = None
    
    def fit(self, X, y, lr=0.1, num_epochs=1000):
        B, D = X.shape

        self.weights = np.zeros([D + 1,])  # + 1 for bias
        X = np.concatenate([X, np.ones((B, 1))], axis=1)  # (B, D + 1)
        
        for epoch in range(num_epochs):
            # forward pass
            y_pred = np.matmul(X, self.weights)
            
            # compute loss
            loss = np.mean((y_pred - y) ** 2)
            
            # backward pass
            grad_loss = 2 * (y_pred - y)  # (B,)
            grad = grad_loss @ X  # (D + 1,)
            grad /= B

            # optimizer.step
            self.weights -= lr * grad

            # logging
            if epoch % 100 == 0:
                print(f"[{epoch}] {loss=}")

    def predict(self, X):
        B, _ = X.shape
        X = np.concatenate([X, np.ones((B, 1))], axis=1)  # (B, D + 1)
        # (B, D+1) @ (D+1,) -> (B,)
        y_pred = X @ self.weights
        return y_pred

In [9]:
X = np.array([
    [1],
    [2],
    [3],
])

y = np.array([0, 0.5, 1.0])

linear_regression = LinearRegressionGD()

linear_regression.fit(X, y)

print(linear_regression.weights)  # should be around [0.5, -0.5]

X_pred = np.array([
    [5],
    [6],
    [10],
])
print(linear_regression.predict(X_pred))

[0] loss=np.float64(0.4166666666666667)
[100] loss=np.float64(0.00040191591113936787)
[200] loss=np.float64(3.094567568025492e-06)
[300] loss=np.float64(2.3826746261246055e-08)
[400] loss=np.float64(1.834549819708184e-10)
[500] loss=np.float64(1.4125189415293045e-12)
[600] loss=np.float64(1.0875745851353692e-14)
[700] loss=np.float64(8.373823890849441e-17)
[800] loss=np.float64(6.447459083887788e-19)
[900] loss=np.float64(4.964251992539865e-21)
[ 0.5 -0.5]
[2.  2.5 4.5]
