In [94]:
from typing import List, Tuple, NewType
import random

Matrix2D = NewType("Matrix2D", List[List[float]])
Vector = NewType("Vector", List[float])

def shape(xs: Vector | Matrix2D) -> Tuple[int, ...]:
    if isinstance(xs, list):
        nrows = len(xs)
        ncols = 0
        if nrows == 0:
            return (0,)
        if isinstance(xs[0], float):
            return (nrows,)
        ncols = len(xs[0])
        return (nrows, ncols)
    else:
        raise TypeError(f"Invalid type, expected either Vector | Matrix2D, got: {type(xs)}")
        
def dot_product(xs: Vector, ys: Vector) -> float:
    assert len(xs) == len(ys), f"Dimension for both vectors should be same, got: dim(xs) = {len(xs)}, dim(ys) = {len(ys)}"
    return sum(x*y for x, y in zip(xs, ys))

class LinearRegressionModel:
    def __init__(self, theta: Vector, b: float):
        self.theta = theta
        self.b = b

    def __repr__(self) -> str:
        return f"LinearRegressionModel <theta: {self.theta}, b: {self.b}"
    __str__ = __repr__

    @staticmethod
    def random(dim: int) -> "LinearRegressionModel":
        return LinearRegressionModel([random.random() for _ in range(dim)], random.random())
        
    def predict(self, X: Matrix2D) -> Vector:
        return [
            dot_product(x, self.theta) + self.b
            for x in X
        ]
    
    def update_weights(
        self,
        X_train: Matrix2D,
        y_train: Vector,
        learning_rate: float,
    ) -> Tuple[Vector, float]:
        nrows, ncols = shape(X_train)
        weight_factor = learning_rate * 2 / nrows
        y_train_hat = self.predict(X_train)
        diffs = [(y_hat - y) for y_hat, y in zip(y_train_hat, y_train)]
        self.theta = [
            t - weight_factor * sum(
                x[col] * diff
                for x, diff in zip(X_train, diffs)
            )
            for col, t in enumerate(self.theta)
        ]
        self.b = self.b - weight_factor * sum(diffs)

class LinearRegressionSGDTrainer:
    @staticmethod
    def fit(
        X_train: Matrix2D,
        y_train: Vector,
        num_iterations: int,
        learning_rate: float
    ) -> LinearRegressionModel:
        nrows, ncols = shape(X_train)
        model = LinearRegressionModel.random(ncols)
        for _ in range(num_iterations):
            model.update_weights(X_train, y_train, learning_rate)
        return model

In [95]:
# let's try to fit a model to predict y = 2x + 1

actual_m = 2
actual_c = 1

num_samples = 1000

xs = [[random.random()] for _ in range(num_samples)]
ys = [actual_m*x + actual_c for [x] in xs]

print(f"{xs=}")
print(f"{ys=}")

xs=[[0.41958156091619436], [0.627775801566388], [0.23739838027394866], [0.8091178815346228], [0.7637175071556188], [0.46265015794601816], [0.5925982194657372], [0.8719399056441556], [0.7817959089423784], [0.047487211672611274], [0.7220133011079504], [0.7657903379693889], [0.7972464871231519], [0.11630243880969982], [0.38980584947586705], [0.867201868988144], [0.7139729868331344], [0.45688034925767096], [0.21561852932053593], [0.21589408512887265], [0.05062565335382907], [0.8312156702591597], [0.09264437091909372], [0.9314333985645148], [0.1711852880656053], [0.9090938523712851], [0.897017205037253], [0.9549345645067234], [0.5125492136699803], [0.020145460964390183], [0.8232714426346222], [0.7862153968424321], [0.15634685340580579], [0.23627142984276206], [0.4228244987716474], [0.43227752373184236], [0.12171742554625475], [0.13318355538792082], [0.9752418717952167], [0.019679272275266668], [0.17260607978987963], [0.4217904943377502], [0.6203635018009834], [0.8112047726051673], [0.746622

In [96]:
model = LinearRegressionSGDTrainer.fit(xs, ys, 1000, 0.1)
model

LinearRegressionModel <theta: [1.9999980795108716], b: 1.0000010262272023

In [98]:
model.predict([[1], [2], [3]])

[2.999999105738074, 4.9999971852489455, 6.999995264759818]

In [99]:
# let's try to fit a model to predict y = 2x1 + 3x2 + 1

actual_m1 = 2
actual_m2 = 3
actual_c = 1

num_samples = 1000

xs = [[random.random(), random.random()] for _ in range(num_samples)]
ys = [actual_m1*x1 + actual_m2*x2 + actual_c for [x1, x2] in xs]

print(f"{xs=}")
print(f"{ys=}")

xs=[[0.8808496122956923, 0.9975133177845458], [0.9016766058732095, 0.041632164680390216], [0.7933381515842157, 0.4095451948496719], [0.37617224902272084, 0.5131062699000982], [0.6888274525503376, 0.5850086973574775], [0.5703816084308352, 0.7602792451677417], [0.590183885474211, 0.1905979832263215], [0.25947913147809354, 0.26437719789338765], [0.5269100730034368, 0.9887996065910415], [0.8691430595879411, 0.19979244437590093], [0.002456682589327852, 0.6440480973206525], [0.07332611373427633, 0.41262830301782105], [0.4308926752309459, 0.7902080433080055], [0.599643968223512, 0.15489877871538316], [0.32574227975451286, 0.3524585506683944], [0.7108953771857949, 0.24080144877312726], [0.8881876959588555, 0.6548423007650641], [0.2663281375885158, 0.727645657007287], [0.6515874264930618, 0.2495916348297268], [0.918660053038258, 0.3989219015261758], [0.5209650531298975, 0.3661912969456722], [0.5835218210371876, 0.007518156311083546], [0.662499646186168, 0.0700372763090863], [0.02012291044318603

In [100]:
model = LinearRegressionSGDTrainer.fit(xs, ys, 1000, 0.1)
model

LinearRegressionModel <theta: [1.99998349985357, 2.9999842943682262], b: 1.0000166886114583

In [101]:
model.predict([[1, 1], [1, 2], [1, 3]])

[5.999984482833254, 8.999968777201481, 11.999953071569708]