In [4]:
import numpy as np

In [5]:
def fit_lr(X, y):
    X_aug = np.c_[np.ones(len(X)), X] # [X_aug] = [1, X]  -> shape: (n, 1 + d)
    beta = np.linalg.inv(X_aug.T @ X_aug) @ (X_aug.T @ y)     # ((1+d) x (1+d))^{-1} @ (1+d, k)
    W, b = beta[1:], beta[1]
    return W, b

def fit_lr_sgd(X, y, lr=0.01, epochs=1000, batch_size=32, seed=42):
    np.random.seed(seed)
    n, d = X.shape
    W, b = np.random.randn(d), np.random.randn()
    for epoch in range(epochs):
        idx = np.random.permutation(n)
        for start in range(0, n, batch_size):
            X_batch = X[idx[start:start+batch_size]]
            y_batch = y[idx[start:start+batch_size]]

            y_hat = X_batch @ W + b

            err = y_hat - y_batch

            m = len(X_batch)
            grad_W = X_batch.T @ err / m
            grad_b = np.sum(err) / m

            W -= lr * grad_W
            b -= lr * grad_b
    return W, b


def predict_lr(X, W, b):
    return X @ W + b



In [6]:
# Synthetic data: y = 3 + 2*x1 - x2 + noise
rng = np.random.default_rng(42)
X = rng.normal(size=(300, 2))
true_W = np.array([2.0, -1.0])
true_b = 3.0
y = X @ true_W + true_b + rng.normal(scale=0.3, size=300)

# normal
W1, b1 = fit_lr(X, y)
print("normal  -> W:", W1, "b:", b1)

# sgd
W2, b2 = fit_lr_sgd(X, y, lr=0.05, epochs=200, batch_size=64)
print("sgd    -> W:", W2, "b:", b2)

# quick check
from numpy.linalg import norm
print("||W1-W2||:", norm(W1 - W2), "| b diff |:", abs(b1 - b2))


normal  -> W: [ 1.96790081 -0.98838336] b: 1.9679008071430562
sgd    -> W: [ 1.96707982 -0.99039957] b: 2.9780327763812835
||W1-W2||: 0.002176954686210135 | b diff |: 1.0101319692382273
