# Elastic Net From Scratch

Elastic Net is a regularized regression method that linearly combines the **L1 (Lasso)** and **L2 (Ridge)** penalties. It is particularly useful when multiple features are correlated.

## Key Concepts:
- **L1 Penalty (Lasso)**: Encourages sparsity (feature selection)
- **L2 Penalty (Ridge)**: Encourages small weights (grouping effect)
- **L1 Ratio ($\rho$)**: Controls the balance between L1 and L2

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import ElasticNet as SklearnElasticNet

## 1. Mathematical Foundation

### Cost Function
$$J(w, b) = \frac{1}{2n} \sum_{i=1}^n (y_i - (X_i w + b))^2 + \alpha \rho \sum_{j=1}^m |w_j| + \frac{\alpha (1-\rho)}{2} \sum_{j=1}^m w_j^2$$

### Coordinate Descent Update
$$w_j = \frac{S(\rho_j, \alpha \rho n)}{\sum x_{ij}^2 + \alpha(1-\rho)n}$$
Where $S$ is the soft thresholding operator.

In [None]:
class ElasticNet:
    def __init__(self, alpha=1.0, l1_ratio=0.5, max_iter=1000, tol=1e-4):
        self.alpha = alpha
        self.l1_ratio = l1_ratio
        self.max_iter = max_iter
        self.tol = tol
        self.w = None
        self.b = 0

    def _soft_threshold(self, rho, lam):
        if rho < -lam: return rho + lam
        if rho > lam: return rho - lam
        return 0

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.w = np.zeros(n_features)
        self.b = np.mean(y)
        
        for _ in range(self.max_iter):
            w_old = self.w.copy()
            
            for j in range(n_features):
                y_pred = X @ self.w + self.b
                rho = np.dot(X[:, j], y - y_pred + self.w[j] * X[:, j])
                
                # Elastic Net specific coefficients
                num = self._soft_threshold(rho, self.alpha * self.l1_ratio * n_samples)
                den = np.sum(X[:, j]**2) + (self.alpha * (1 - self.l1_ratio) * n_samples)
                
                self.w[j] = num / den
            
            self.b = np.mean(y - X @ self.w)
            
            if np.linalg.norm(self.w - w_old) < self.tol:
                break
        return self

    def predict(self, X):
        return X @ self.w + self.b

    def score(self, X, y):
        y_pred = self.predict(X)
        return 1 - np.sum((y - y_pred)**2) / np.sum((y - np.mean(y))**2)

## 2. Comparison with Sklearn

In [None]:
X, y = make_regression(n_samples=100, n_features=10, n_informative=5, noise=5, random_state=42)
scaler = StandardScaler()
X = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

en = ElasticNet(alpha=1.0, l1_ratio=0.5)
en.fit(X_train, y_train)

sk_en = SklearnElasticNet(alpha=1.0, l1_ratio=0.5)
sk_en.fit(X_train, y_train)

print(f"Our ElasticNet R2: {en.score(X_test, y_test):.4f}")
print(f"Sklearn ElasticNet R2: {sk_en.score(X_test, y_test):.4f}")