## MISA (2024-2025)
- Alohan'ny mamerina dia avereno atao Run ny notebook iray manontolo. Ny fanaovana azy dia redémarrena mihitsy ny kernel aloha (jereo menubar, safidio **Kernel$\rightarrow$Restart Kernel and Run All Cells**).

- Izay misy hoe `YOUR CODE HERE` na `YOUR ANSWER HERE` ihany no fenoina. Afaka manampy cells vaovao raha ilaina. Aza adino ny mameno references eo ambany raha ilaina.

## References
Eto ilay references rehetra no apetraka

---

In [4]:
from random import randrange
import numpy as np
from sklearn.metrics import mean_squared_error, log_loss
from sklearn.datasets import load_breast_cancer, load_diabetes
from sklearn.linear_model import Lasso, LinearRegression
from sklearn.preprocessing import normalize


def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5, error=1e-9):
    """
    sample a few random elements and only return numerical
    in this dimensions
    """

    for i in range(num_checks):
        ix = tuple([randrange(m) for m in x.shape])

        oldval = x[ix]
        x[ix] = oldval + h  # increment by h
        fxph = f(x)  # evaluate f(x + h)
        x[ix] = oldval - h  # increment by h
        fxmh = f(x)  # evaluate f(x - h)
        x[ix] = oldval  # reset

        grad_numerical = (fxph - fxmh) / (2 * h)
        grad_analytic = analytic_grad[ix]
        rel_error = abs(grad_numerical - grad_analytic) / (
            abs(grad_numerical) + abs(grad_analytic)
        )
        print(
            "numerical: %f analytic: %f, relative error: %e"
            % (grad_numerical, grad_analytic, rel_error)
        )
        assert rel_error < error

def rel_error(x, y):
    """ returns relative error """
    return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

In [5]:
data = load_diabetes()
X, y = data.data, data.target

In [6]:
def mse_loss_vectorized(w, b, X, y):
    """
    MSE loss function WITHOUT FOR LOOPS, NO REGULARIZATION.
    
    Inputs:
    - w: weights, numpy array of shape (D,)
    - b: bias, scalar
    - X: input data, numpy array of shape (N, D)
    - y: true labels, numpy array of shape (N,)
    
    Returns:
    - loss: scalar, mean squared error loss
    - dw: gradient of the loss with respect to weights w, shape (D,)
    - db: gradient of the loss with respect to bias b, scalar
    """
    # Number of samples
    N = X.shape[0]

    # Predicted values: y_pred = X @ w + b
    y_pred = X.dot(w) + b

    # Compute loss: (1/N) * Σ(y_pred - y)^2
    loss = np.mean((y_pred - y) ** 2)

    # Compute gradients
    diff = y_pred - y  # Difference between predicted and actual values
    dw = (2 / N) * X.T.dot(diff)  # Gradient with respect to weights
    db = (2 / N) * np.sum(diff)   # Gradient with respect to bias

    return loss, dw, np.array(db).reshape(1,)


In [7]:
def soft_threshold(x, delta):
    return np.sign(x) * np.maximum(np.abs(x) - delta, 0)

# Lasso Subgradient Descent

In [8]:
def l1_subgradient(w):
    """
    Subgradient of the L1 loss
    """
    dw = np.zeros_like(w)
    return dw
    

def lasso_subgradient_mse_loss_vectorized(w, b, X, y, alpha):
    """
    MSE loss function adding the subgradient for w
    """
    loss, dw, db = mse_loss_vectorized(w, b, X, y)
    # Add the subgradient to dw
    dw += alpha * l1_subgradient(w)
    return loss, dw, db


In [9]:
class LassolSubgradientDescent():
    def __init__(self, alpha=0.1):
        """
        Initialize the Lasso model.

        Inputs:
        - alpha: Regularization strength.
        """
        self.w = None
        self.b = None
        self.alpha = alpha

    def train(self, X, y, learning_rate=1e-3, num_iters=100, batch_size=200, verbose=False):
        """
        Train the model using stochastic gradient descent.

        Inputs:
        - X: Training data of shape (N, d)
        - y: Training labels of shape (N,)
        - learning_rate: Learning rate for optimization.
        - num_iters: Number of iterations to run SGD.
        - batch_size: Size of each minibatch.
        - verbose: Boolean; if true, print progress during optimization.
        """
        N, d = X.shape
        
        if self.w is None:  # Initialize weights and bias
            self.w = 0.001 * np.random.randn(d)
            self.b = 0.0

        loss_history = []
        for it in range(num_iters):
            # Sample batch_size elements for the minibatch
            indices = np.random.choice(N, batch_size, replace=True)
            X_batch = X[indices]
            y_batch = y[indices]
            
            # Compute loss and gradients
            loss, dw, db = self.loss(X_batch, y_batch)
            loss_history.append(loss)

            # Update weights and bias
            self.w -= learning_rate * dw
            self.b -= learning_rate * db
            
            if verbose and it % 100 == 0:
                print(f"Iteration {it} / {num_iters}: Loss {loss:.6f}")

        return loss_history

    def predict(self, X):
        """
        Predict using the trained Lasso model.

        Inputs:
        - X: Data of shape (N, d)

        Returns:
        - y_pred: Predicted labels of shape (N,)
        """
        y_pred = X.dot(self.w) + self.b
        return y_pred

    def loss(self, X_batch, y_batch):
        """
        Compute the loss and gradients using the lasso regularized MSE loss.

        Inputs:
        - X_batch: A minibatch of data of shape (batch_size, d)
        - y_batch: Corresponding labels of shape (batch_size,)

        Returns:
        - loss: Scalar loss value.
        - dw: Gradient with respect to weights w.
        - db: Gradient with respect to bias b.
        """
        return lasso_subgradient_mse_loss_vectorized(self.w, self.b, X_batch, y_batch, self.alpha)


def lasso_subgradient_mse_loss_vectorized(w, b, X, y, alpha):
    """
    Compute the MSE loss with L1 regularization (Lasso).

    Inputs:
    - w: Weights of shape (d,)
    - b: Bias (scalar)
    - X: Data of shape (N, d)
    - y: Labels of shape (N,)
    - alpha: Regularization strength

    Returns:
    - loss: Scalar value of the loss.
    - dw: Gradient with respect to weights w of shape (d,)
    - db: Gradient with respect to bias b (scalar)
    """
    N = X.shape[0]
    
    # Predictions and residuals
    y_pred = X.dot(w) + b
    residuals = y_pred - y

    # Compute loss
    mse_loss = np.sum(residuals ** 2) / (2 * N)
    l1_regularization = alpha * np.sum(np.abs(w))
    loss = mse_loss + l1_regularization

    # Compute gradients
    dw = X.T.dot(residuals) / N + alpha * np.sign(w)
    db = np.sum(residuals) / N

    return loss, dw, db


In [10]:
model = LassolSubgradientDescent(alpha=0.1)
model.train(X, y, learning_rate=1e-2,verbose=True, num_iters=200_000)
pred = model.predict(X)
mse = mean_squared_error(pred, y)

sk_model = Lasso(alpha=0.1, fit_intercept=True)
sk_model.fit(X, y)
sk_pred = sk_model.predict(X)
sk_mse = mean_squared_error(sk_pred, y)

print("MSE scikit-learn:", sk_mse)
print("MSE Coordinate descent model :", mse)
assert mse - sk_mse < 50

Iteration 0 / 200000: Loss 14213.492232
Iteration 100 / 200000: Loss 4812.793665
Iteration 200 / 200000: Loss 2965.665016
Iteration 300 / 200000: Loss 2588.628024
Iteration 400 / 200000: Loss 3067.621242
Iteration 500 / 200000: Loss 2808.373754
Iteration 600 / 200000: Loss 2917.277398
Iteration 700 / 200000: Loss 2470.478722
Iteration 800 / 200000: Loss 2863.515855
Iteration 900 / 200000: Loss 2930.303780
Iteration 1000 / 200000: Loss 2924.312484
Iteration 1100 / 200000: Loss 2958.061796
Iteration 1200 / 200000: Loss 2560.821555
Iteration 1300 / 200000: Loss 2928.643841
Iteration 1400 / 200000: Loss 2673.572735
Iteration 1500 / 200000: Loss 2909.296660
Iteration 1600 / 200000: Loss 2904.491011
Iteration 1700 / 200000: Loss 2764.108387
Iteration 1800 / 200000: Loss 2655.217668
Iteration 1900 / 200000: Loss 2758.991238
Iteration 2000 / 200000: Loss 2440.929563
Iteration 2100 / 200000: Loss 3088.496379
Iteration 2200 / 200000: Loss 3155.580082
Iteration 2300 / 200000: Loss 2646.324164
Ite

In [11]:
model = LassolSubgradientDescent(alpha=2)
model.train(X, y, learning_rate=1e-2,verbose=True, num_iters=200_000)
pred = model.predict(X)
mse = mean_squared_error(pred, y)

sk_model = Lasso(alpha=2, fit_intercept=True)
sk_model.fit(X, y)
sk_pred = sk_model.predict(X)
sk_mse = mean_squared_error(sk_pred, y)

print("MSE scikit-learn:", sk_mse)
print("MSE Coordinate descent model :", mse)
assert mse - sk_mse < 50

Iteration 0 / 200000: Loss 13659.777793
Iteration 100 / 200000: Loss 4265.190781
Iteration 200 / 200000: Loss 3549.492189
Iteration 300 / 200000: Loss 3014.960331
Iteration 400 / 200000: Loss 2874.255207
Iteration 500 / 200000: Loss 2609.894692
Iteration 600 / 200000: Loss 3093.332429
Iteration 700 / 200000: Loss 3546.707588
Iteration 800 / 200000: Loss 2741.856981
Iteration 900 / 200000: Loss 3469.006398
Iteration 1000 / 200000: Loss 3164.434047
Iteration 1100 / 200000: Loss 3355.305131
Iteration 1200 / 200000: Loss 2570.465610
Iteration 1300 / 200000: Loss 3115.653718
Iteration 1400 / 200000: Loss 2741.259456
Iteration 1500 / 200000: Loss 2820.315002
Iteration 1600 / 200000: Loss 2969.420516
Iteration 1700 / 200000: Loss 2827.562091
Iteration 1800 / 200000: Loss 3003.278087
Iteration 1900 / 200000: Loss 2972.033041
Iteration 2000 / 200000: Loss 3011.814884
Iteration 2100 / 200000: Loss 2908.856128
Iteration 2200 / 200000: Loss 3086.781790
Iteration 2300 / 200000: Loss 2625.020184
Ite

# Lasso Proximal Gradient Descent (iterative soft thresholding algorithm or ISTA)

In [12]:
class LassoProximalGradientDescent():
    def __init__(self, alpha=0.1):
        """
        Initialize the Lasso model.

        Inputs:
        - alpha: Regularization strength.
        """
        self.w = None
        self.b = None
        self.alpha = alpha

    def train(self, X, y, learning_rate=1e-3, num_iters=100, batch_size=200, verbose=False):
        """
        Train the model using proximal gradient descent.

        Inputs:
        - X: Training data of shape (N, d)
        - y: Training labels of shape (N,)
        - learning_rate: Learning rate for optimization.
        - num_iters: Number of iterations to run SGD.
        - batch_size: Size of each minibatch.
        - verbose: Boolean; if true, print progress during optimization.
        """
        N, d = X.shape
        
        if self.w is None:  # Initialize weights and bias
            self.w = 0.001 * np.random.randn(d)
            self.b = 0.0

        loss_history = []
        for it in range(num_iters):
            # Sample batch_size elements for the minibatch
            indices = np.random.choice(N, batch_size, replace=True)
            X_batch = X[indices]
            y_batch = y[indices]
            
            # Compute loss and gradients
            loss, dw, db = self.loss(X_batch, y_batch)
            loss_history.append(loss)

            # Update weights and bias
            self.w = soft_threshold(self.w - learning_rate * dw, self.alpha * learning_rate)
            self.b -= learning_rate * db
            
            if verbose and it % 100 == 0:
                print(f"Iteration {it} / {num_iters}: Loss {loss:.6f}")

        return loss_history

    def predict(self, X):
        """
        Predict using the trained Lasso model.

        Inputs:
        - X: Data of shape (N, d)

        Returns:
        - y_pred: Predicted labels of shape (N,)
        """
        y_pred = X.dot(self.w) + self.b
        return y_pred

    def loss(self, X_batch, y_batch):
        """
        Compute the loss and gradients using the MSE loss.

        Inputs:
        - X_batch: A minibatch of data of shape (batch_size, d)
        - y_batch: Corresponding labels of shape (batch_size,)

        Returns:
        - loss: Scalar loss value.
        - dw: Gradient with respect to weights w.
        - db: Gradient with respect to bias b.
        """
        return mse_loss_vectorized(self.w, self.b, X_batch, y_batch)

In [13]:
model = LassoProximalGradientDescent(alpha=0.1)
model.train(X, y, learning_rate=1e-2,verbose=True, num_iters=200_000)
pred = model.predict(X)
mse= mean_squared_error(pred, y)

sk_model = Lasso(alpha=0.1, fit_intercept=True)
sk_model.fit(X, y)
sk_pred = sk_model.predict(X)
sk_mse = mean_squared_error(sk_pred, y)

print("MSE scikit-learn:", sk_mse)
print("MSE Coordinate descent model :", mse)
assert mse - sk_mse < 50

Iteration 0 / 200000: Loss 28154.103601
Iteration 100 / 200000: Loss 6189.929994
Iteration 200 / 200000: Loss 6056.691967
Iteration 300 / 200000: Loss 5628.382181
Iteration 400 / 200000: Loss 5101.195019
Iteration 500 / 200000: Loss 5910.206032
Iteration 600 / 200000: Loss 6060.966300


Iteration 700 / 200000: Loss 5504.052970
Iteration 800 / 200000: Loss 5228.691546
Iteration 900 / 200000: Loss 5548.527897
Iteration 1000 / 200000: Loss 5432.642978
Iteration 1100 / 200000: Loss 4572.638486
Iteration 1200 / 200000: Loss 4869.732349
Iteration 1300 / 200000: Loss 4300.286933
Iteration 1400 / 200000: Loss 5555.592592
Iteration 1500 / 200000: Loss 4522.898247
Iteration 1600 / 200000: Loss 4636.494213
Iteration 1700 / 200000: Loss 4543.966652
Iteration 1800 / 200000: Loss 4875.801787
Iteration 1900 / 200000: Loss 4779.721183
Iteration 2000 / 200000: Loss 4540.829750
Iteration 2100 / 200000: Loss 4371.995470
Iteration 2200 / 200000: Loss 4235.339893
Iteration 2300 / 200000: Loss 4452.436023
Iteration 2400 / 200000: Loss 4554.919577
Iteration 2500 / 200000: Loss 4852.480596
Iteration 2600 / 200000: Loss 4779.603805
Iteration 2700 / 200000: Loss 4329.984839
Iteration 2800 / 200000: Loss 4578.963793
Iteration 2900 / 200000: Loss 4181.188994
Iteration 3000 / 200000: Loss 4256.97

In [14]:
model = LassoProximalGradientDescent(alpha=2)
model.train(X, y, learning_rate=1e-2,verbose=True, num_iters=200_000)
pred = model.predict(X)
mse= mean_squared_error(pred, y)

sk_model = Lasso(alpha=2, fit_intercept=True)
sk_model.fit(X, y)
sk_pred = sk_model.predict(X)
sk_mse = mean_squared_error(sk_pred, y)

print("MSE scikit-learn:", sk_mse)
print("MSE Coordinate descent model :", mse)
assert mse - sk_mse < 50

Iteration 0 / 200000: Loss 27740.692265
Iteration 100 / 200000: Loss 6711.597673
Iteration 200 / 200000: Loss 6542.154208
Iteration 300 / 200000: Loss 6386.712350
Iteration 400 / 200000: Loss 5595.091476
Iteration 500 / 200000: Loss 5969.024341
Iteration 600 / 200000: Loss 5856.983702
Iteration 700 / 200000: Loss 5339.687528
Iteration 800 / 200000: Loss 6012.678664
Iteration 900 / 200000: Loss 5868.410697
Iteration 1000 / 200000: Loss 5651.659079
Iteration 1100 / 200000: Loss 6089.016732
Iteration 1200 / 200000: Loss 5057.684368
Iteration 1300 / 200000: Loss 5291.152475
Iteration 1400 / 200000: Loss 5470.276062
Iteration 1500 / 200000: Loss 5831.304092
Iteration 1600 / 200000: Loss 5521.930276
Iteration 1700 / 200000: Loss 4859.783938
Iteration 1800 / 200000: Loss 5722.826295
Iteration 1900 / 200000: Loss 5927.260744
Iteration 2000 / 200000: Loss 6119.384153
Iteration 2100 / 200000: Loss 5553.309349
Iteration 2200 / 200000: Loss 5708.045417
Iteration 2300 / 200000: Loss 5450.109811
Ite

# Lasso Projected Gradient Descent

In [15]:
class LassoProjectedGradientDescent:
    def __init__(self, alpha=0.1):
        """
        Initialize the Lasso model with Projected Gradient Descent.

        Inputs:
        - alpha: Regularization strength.
        """
        self.w_p = None
        self.w_n = None
        self.b = None
        self.alpha = alpha

    def train(self, X, y, learning_rate=1e-3, num_iters=100, batch_size=200, verbose=False):
        """
        Train the model using projected gradient descent.

        Inputs:
        - X: Training data of shape (N, d)
        - y: Training labels of shape (N,)
        - learning_rate: Learning rate for optimization.
        - num_iters: Number of iterations to run SGD.
        - batch_size: Size of each minibatch.
        - verbose: Boolean; if true, print progress during optimization.
        """
        N, d = X.shape

        if self.w_p is None:  # Initialize weights and bias
            self.w_p = np.maximum(0, 0.001 * np.random.randn(d))  # Positive part
            self.w_n = np.maximum(0, -0.001 * np.random.randn(d))  # Negative part
            self.b = 0.0

        loss_history = []
        for it in range(num_iters):
            # Sample batch_size elements for the minibatch
            indices = np.random.choice(N, batch_size, replace=True)
            X_batch = X[indices]
            y_batch = y[indices]

            # Compute loss and gradients
            loss, dw, db = self.loss(X_batch, y_batch)
            loss_history.append(loss)

            # Update weights and bias with gradient step
            self.w_p = np.maximum(0, self.w_p - learning_rate * dw)  # Project to positive orthant
            self.w_n = np.maximum(0, self.w_n - learning_rate * (-dw))  # Project to positive orthant
            self.b -= learning_rate * db

            if verbose and it % 100 == 0:
                print(f"Iteration {it} / {num_iters}: Loss {loss:.6f}")

        return loss_history

    def predict(self, X):
        """
        Predict using the trained Lasso model.

        Inputs:
        - X: Data of shape (N, d)

        Returns:
        - y_pred: Predicted labels of shape (N,)
        """
        w = self.w_p - self.w_n  # Reconstruct the weights
        y_pred = X.dot(w) + self.b
        return y_pred

    def loss(self, X_batch, y_batch):
        """
        Compute the loss and gradients using the MSE loss with L1 regularization.

        Inputs:
        - X_batch: A minibatch of data of shape (batch_size, d)
        - y_batch: Corresponding labels of shape (batch_size,)

        Returns:
        - loss: Scalar loss value.
        - dw: Gradient with respect to weights w.
        - db: Gradient with respect to bias b.
        """
        N = X_batch.shape[0]
        w = self.w_p - self.w_n  # Reconstruct the weights

        # Predictions and residuals
        y_pred = X_batch.dot(w) + self.b
        residuals = y_pred - y_batch

        # Compute loss
        mse_loss = np.sum(residuals ** 2) / (2 * N)
        l1_loss = self.alpha * np.sum(np.abs(w))  # Regularization term
        loss = mse_loss + l1_loss

        # Gradients
        dw = X_batch.T.dot(residuals) / N + self.alpha * np.sign(w)
        db = np.sum(residuals) / N

        return loss, dw, db


In [16]:
model = LassoProjectedGradientDescent(alpha=0.1)
model.train(X, y, learning_rate=1e-2,verbose=True, num_iters=200_000)
pred = model.predict(X)
mse= mean_squared_error(pred, y)

sk_model = Lasso(alpha=0.1, fit_intercept=True)
sk_model.fit(X, y)
sk_pred = sk_model.predict(X)
sk_mse = mean_squared_error(sk_pred, y)

print("MSE scikit-learn:", sk_mse)
print("MSE Coordinate descent model :", mse)
assert mse - sk_mse < 50

Iteration 0 / 200000: Loss 16198.266699
Iteration 100 / 200000: Loss 4624.575357
Iteration 200 / 200000: Loss 3492.617124
Iteration 300 / 200000: Loss 2939.459351
Iteration 400 / 200000: Loss 2720.285732
Iteration 500 / 200000: Loss 2743.705474
Iteration 600 / 200000: Loss 2714.403036
Iteration 700 / 200000: Loss 2619.390624
Iteration 800 / 200000: Loss 2696.349421
Iteration 900 / 200000: Loss 2505.401425
Iteration 1000 / 200000: Loss 2601.508400
Iteration 1100 / 200000: Loss 2950.389666
Iteration 1200 / 200000: Loss 2974.229928
Iteration 1300 / 200000: Loss 2913.898584
Iteration 1400 / 200000: Loss 2434.263748
Iteration 1500 / 200000: Loss 2660.113637
Iteration 1600 / 200000: Loss 2731.708611
Iteration 1700 / 200000: Loss 2582.837076
Iteration 1800 / 200000: Loss 2558.390370
Iteration 1900 / 200000: Loss 2582.161304
Iteration 2000 / 200000: Loss 2501.298741
Iteration 2100 / 200000: Loss 2728.204337
Iteration 2200 / 200000: Loss 2756.092474
Iteration 2300 / 200000: Loss 2635.309544
Ite

In [17]:
model = LassoProjectedGradientDescent(alpha=2)
model.train(X, y, learning_rate=1e-2,verbose=True, num_iters=200_000)
pred = model.predict(X)
mse= mean_squared_error(pred, y)

sk_model = Lasso(alpha=2, fit_intercept=True)
sk_model.fit(X, y)
sk_pred = sk_model.predict(X)
sk_mse = mean_squared_error(sk_pred, y)

print("MSE scikit-learn:", sk_mse)
print("MSE Coordinate descent model :", mse)
assert mse - sk_mse < 50

Iteration 0 / 200000: Loss 15896.534514
Iteration 100 / 200000: Loss 4229.463591
Iteration 200 / 200000: Loss 2995.593154
Iteration 300 / 200000: Loss 2832.920036
Iteration 400 / 200000: Loss 3279.478709
Iteration 500 / 200000: Loss 2796.934578
Iteration 600 / 200000: Loss 3199.021761
Iteration 700 / 200000: Loss 2894.040378
Iteration 800 / 200000: Loss 3213.826223
Iteration 900 / 200000: Loss 2829.132627
Iteration 1000 / 200000: Loss 2732.477857
Iteration 1100 / 200000: Loss 3037.121792
Iteration 1200 / 200000: Loss 2665.697868
Iteration 1300 / 200000: Loss 3006.626652
Iteration 1400 / 200000: Loss 2910.194085
Iteration 1500 / 200000: Loss 3011.280893
Iteration 1600 / 200000: Loss 2428.747303
Iteration 1700 / 200000: Loss 2756.861059
Iteration 1800 / 200000: Loss 3167.067286
Iteration 1900 / 200000: Loss 2978.202113
Iteration 2000 / 200000: Loss 3144.819640
Iteration 2100 / 200000: Loss 3290.683548
Iteration 2200 / 200000: Loss 2932.458443
Iteration 2300 / 200000: Loss 2849.558578
Ite

# Lasso Coordinate Descent (no intercept)

In [18]:
class LassoCoordinateDescent:
    def __init__(self, alpha=0.1):
        """
        Initialiser le modèle Lasso avec descente par coordonnées.

        Paramètres :
        - alpha : Force de la régularisation L1.
        """
        self.w = None  # Poids (coefficients)
        self.alpha = alpha  # Paramètre de régularisation

    def train(self, X, y, num_iters=1000):
        """
        Entraîner le modèle en utilisant la descente par coordonnées.

        Paramètres :
        - X : Données d'entraînement de taille (N, d).
        - y : Labels d'entraînement de taille (N,).
        - num_iters : Nombre d'itérations pour la descente par coordonnées.
        """
        N, d = X.shape
        if self.w is None:  # Initialiser les poids
            self.w = np.zeros(d)
        
        # Pré-calcul pour accélérer les calculs
        X_squared_sum = np.sum(X ** 2, axis=0)  # Somme des carrés des colonnes de X
        
        for it in range(num_iters):
            for j in range(d):  # Mettre à jour chaque coordonnée (chaque poids)
                # Calculer la prédiction sans la coordonnée courante j
                y_pred_except_j = X.dot(self.w) - X[:, j] * self.w[j]
                
                # Calculer la somme résiduelle (corrélation avec la colonne j)
                rho_j = np.dot(X[:, j], y - y_pred_except_j)
                
                # Appliquer la mise à jour selon la régularisation L1
                if rho_j < -self.alpha:
                    self.w[j] = (rho_j + self.alpha) / X_squared_sum[j]
                elif rho_j > self.alpha:
                    self.w[j] = (rho_j - self.alpha) / X_squared_sum[j]
                else:
                    self.w[j] = 0.0

    def predict(self, X): 
        """
        Prédire les résultats en utilisant le modèle entraîné.

        Paramètres :
        - X : Données de taille (N, d).

        Retour :
        - y_pred : Prédictions de taille (N,).
        """
        return X.dot(self.w)


In [19]:
model = LassoCoordinateDescent(alpha=0.1)
model.train(X, y)
pred = model.predict(X)
mse= mean_squared_error(pred, y)

sk_model = Lasso(alpha=0.1, fit_intercept=False)
sk_model.fit(X, y)
sk_pred = sk_model.predict(X)
sk_mse = mean_squared_error(sk_pred, y)

print("MSE scikit-learn:", sk_mse)
print("MSE Coordinate descent model :", mse)
assert mse - sk_mse < 50

MSE scikit-learn: 26057.12449614575
MSE Coordinate descent model : 26004.303657948607


In [20]:
model = LassoCoordinateDescent(alpha=2)
model.train(X, y)
pred = model.predict(X)
mse= mean_squared_error(pred, y)

sk_model = Lasso(alpha=2, fit_intercept=False)
sk_model.fit(X, y)
sk_pred = sk_model.predict(X)
sk_mse = mean_squared_error(sk_pred, y)

print("MSE scikit-learn:", sk_mse)
print("MSE Coordinate descent model :", mse)
assert mse - sk_mse < 50

MSE scikit-learn: 28794.887776106683
MSE Coordinate descent model : 26006.422489676806
