### **1. Prediction Function**

$$
\hat{y} = X \cdot \mathbf{w} + b
$$

### **2. Mean Squared Error (MSE) Loss**

$$
\mathcal{L} = \frac{1}{n} \sum_{i=1}^{n} (y_i - \hat{y}_i)^2
$$

### **3. Gradient w\.r.t Weights**

$$
\frac{\partial \mathcal{L}}{\partial \mathbf{w}} = -\frac{2}{n} X^T (y - \hat{y})
$$

### **4. Gradient w\.r.t Bias**

$$
\frac{\partial \mathcal{L}}{\partial b} = -\frac{2}{n} \sum_{i=1}^{n} (y_i - \hat{y}_i)
$$

### **5. Weight Update Rule**

$$
\mathbf{w} := \mathbf{w} - \alpha \cdot \frac{\partial \mathcal{L}}{\partial \mathbf{w}}
$$

### **6. Bias Update Rule**

$$
b := b - \alpha \cdot \frac{\partial \mathcal{L}}{\partial b}
$$

---


In [80]:
import numpy as np

In [81]:
class LinearRegressionBGD:
    def __init__(self, lr = 0.01, n_iters = 10000):
        self.lr = lr 
        self.n_iters = n_iters
        self.weights = None 
        self.bias = None
        
    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.ones(n_features)
        self.bias = 0
        
        for _ in range(self.n_iters):
            y_hat = np.dot(X, self.weights) + self.bias 
            
            dw = (1 / n_samples) * np.dot(X.T, (y_hat - y))
            db = (1 / n_samples) * np.sum(y_hat - y)
            
            self.weights = self.weights - (self.lr * dw)
            self.bias = self.bias - (self.lr * db)
            
    def predict(self, X):
        y_pred = np.dot(X, self.weights) + self.bias
        return y_pred