In [2]:
import numpy as np

class LinearRegressionGD:
    def __init__(self, lr=0.01, n_iters=1000):
        self.lr = lr
        self.n_iters = n_iters
        self.w = None
        self.b = None

    def fit(self, X, y):
        X = np.asarray(X, dtype=float)
        y = np.asarray(y, dtype=float).reshape(-1)

        n_samples, n_features = X.shape
        self.w = np.zeros(n_features)
        self.b = 0.0

        for _ in range(self.n_iters):
            y_pred = X @ self.w + self.b
            error = y_pred - y

            dw = (2 / n_samples) * (X.T @ error)
            db = (2 / n_samples) * np.sum(error)

            self.w -= self.lr * dw
            self.b -= self.lr * db

        return self

    def predict(self, X):
        X = np.asarray(X, dtype=float)
        return X @ self.w + self.b


class RidgeRegressionGD(LinearRegressionGD):
    def __init__(self, lr=0.01, n_iters=1000, alpha=0.1):
        super().__init__(lr, n_iters)
        self.alpha = alpha

    def fit(self, X, y):
        X = np.asarray(X, dtype=float)
        y = np.asarray(y, dtype=float).reshape(-1)

        n_samples, n_features = X.shape
        self.w = np.zeros(n_features)
        self.b = 0.0

        for _ in range(self.n_iters):
            y_pred = X @ self.w + self.b
            error = y_pred - y

            # Ridge gradient: add 2*alpha*w (don't penalize bias)
            dw = (2 / n_samples) * (X.T @ error) + (2 * self.alpha) * self.w
            db = (2 / n_samples) * np.sum(error)

            self.w -= self.lr * dw
            self.b -= self.lr * db

        return self


class LassoRegressionGD(LinearRegressionGD):
    def __init__(self, lr=0.01, n_iters=1000, alpha=0.1):
        super().__init__(lr, n_iters)
        self.alpha = alpha

    def fit(self, X, y):
        X = np.asarray(X, dtype=float)
        y = np.asarray(y, dtype=float).reshape(-1)

        n_samples, n_features = X.shape
        self.w = np.zeros(n_features)
        self.b = 0.0

        for _ in range(self.n_iters):
            y_pred = X @ self.w + self.b
            error = y_pred - y

            # Lasso subgradient: add alpha*sign(w) (don't penalize bias)
            dw = (2 / n_samples) * (X.T @ error) + self.alpha * np.sign(self.w)
            db = (2 / n_samples) * np.sum(error)

            self.w -= self.lr * dw
            self.b -= self.lr * db

        return self


# Your data
X = [
    [1, 2],
    [2, 1],
    [3, 4],
    [4, 3]
]
y = [8, 9, 18, 19]
X_new = [[5, 2]]

# Ridge
ridge = RidgeRegressionGD(lr=0.01, n_iters=2000, alpha=0.1).fit(X, y)
print("Ridge Weights:", ridge.w)
print("Ridge Bias:", ridge.b)
print("Ridge Prediction:", ridge.predict(X_new))

# Lasso
lasso = LassoRegressionGD(lr=0.01, n_iters=2000, alpha=0.1).fit(X, y)
print("Lasso Weights:", lasso.w)
print("Lasso Bias:", lasso.b)
print("Lasso Prediction:", lasso.predict(X_new))


Ridge Weights: [2.79816595 1.96483262]
Ridge Bias: 1.5920738718233847
Ridge Prediction: [19.51256888]
Lasso Weights: [2.97523627 1.97523627]
Lasso Bias: 1.12364194075655
Lasso Prediction: [19.95029582]


Ridge adds ‚Äúpenalty for large weights‚Äù

Just add this to dw:

dw=

MSE gradient
+
2
ùõº
ùë§


dw=MSE gradient+2Œ±w
Lasso adds ‚Äúpenalty for absolute weights‚Äù

Just add:

dw=

MSE gradient
+
ùõº
‚ãÖ
ùë†
ùëñ
ùëî
ùëõ
(
ùë§
)
dw=MSE gradient+Œ±‚ãÖsign(w)

Bias b stays unchanged.