$$
\boxed{
\beta = (X^T X)^{-1} X^T Y
}
$$

In [22]:
from sklearn.datasets import load_diabetes
import pandas as pd
import numpy as np

data = load_diabetes()

X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

In [23]:
X.shape

(442, 10)

In [24]:
y.shape

(442,)

#### using sklearn' Linear Regression

In [25]:
from sklearn.model_selection import train_test_split

In [26]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=41)

In [27]:
X_train.shape ,X_test.shape

((353, 10), (89, 10))

In [28]:
from sklearn.linear_model import LinearRegression

In [29]:
reg = LinearRegression()

In [30]:
reg.fit(X_train , y_train)

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


In [31]:
y_pred = reg.predict(X_test)

In [32]:
from sklearn.metrics import r2_score

r2_score(y_test,y_pred)

0.5052064192833894

In [33]:
reg.intercept_

151.21270606283562

In [34]:
reg.coef_

array([  -39.40833709,  -257.3089758 ,   541.10651779,   277.06406632,
       -1147.78185543,   718.00140161,   238.30974011,   266.64759223,
         828.9320125 ,   118.54249533])

### Using Own MLR from scratch

In [62]:
import numpy as np
import pandas as pd

from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [63]:
data = load_diabetes()

X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target, name="target")

In [64]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=41
)

In [65]:
class MLR_OLS:
    
    def __init__(self):
        self.coef_ = None
        self.intercept_ = None
        
    def fit(self, X_train, y_train):
        X_train = X_train.to_numpy()
        y_train = y_train.to_numpy()
        
        X_train = np.c_[np.ones(X_train.shape[0]), X_train]
        
        betas = np.linalg.pinv(X_train) @ y_train
        
        self.intercept_ = betas[0]
        self.coef_ = betas[1:]
    
    def predict(self, X_test):
        X_test = X_test.to_numpy()
        return X_test @ self.coef_ + self.intercept_

| Aspect         | MLR_OLS    | sklearn LinearRegression |
| -------------- | --------------- | ------------------------ |
| Solution       | Closed-form OLS | Closed-form OLS          |
| Inversion      | `pinv(X)`       | SVD / least-squares      |
| Stability      | High            | Very high                |
| Speed          | Slower          | Faster                   |
| Scaling        | Manual          | Manual                   |
| Regularization | ❌               | ❌ (separate Ridge/Lasso) |


In [66]:
mlr_custom = MLR_OLS()
mlr_custom.fit(X_train, y_train)

y_pred_custom = mlr_custom.predict(X_test)

In [67]:
mlr_sklearn = LinearRegression()
mlr_sklearn.fit(X_train, y_train)

y_pred_sklearn = mlr_sklearn.predict(X_test)

In [68]:
def evaluate_model(y_true, y_pred, model_name):
    print(f"\n{model_name}")
    print("MAE :", mean_absolute_error(y_true, y_pred))
    print("MSE :", mean_squared_error(y_true, y_pred))
    print("RMSE:", np.sqrt(mean_squared_error(y_true, y_pred)))
    print("R2  :", r2_score(y_true, y_pred))

In [69]:
evaluate_model(y_test, y_pred_custom, "Custom OLS (From Scratch)")
evaluate_model(y_test, y_pred_sklearn, "Sklearn LinearRegression")


Custom OLS (From Scratch)
MAE : 44.00063007274891
MSE : 2924.194706250973
RMSE: 54.07582367612141
R2  : 0.5052064192833878

Sklearn LinearRegression
MAE : 44.00063007274882
MSE : 2924.1947062509644
RMSE: 54.07582367612133
R2  : 0.5052064192833894


In [70]:
print("\nIntercept comparison")
print("Custom  :", mlr_custom.intercept_)
print("Sklearn :", mlr_sklearn.intercept_)

print("\nCoefficient comparison")
comparison_df = pd.DataFrame({
    "Feature": X.columns,
    "Custom_OLS": mlr_custom.coef_,
    "Sklearn_OLS": mlr_sklearn.coef_
})

comparison_df


Intercept comparison
Custom  : 151.2127060628348
Sklearn : 151.21270606283562

Coefficient comparison


Unnamed: 0,Feature,Custom_OLS,Sklearn_OLS
0,age,-39.408337,-39.408337
1,sex,-257.308976,-257.308976
2,bmi,541.106518,541.106518
3,bp,277.064066,277.064066
4,s1,-1147.781855,-1147.781855
5,s2,718.001402,718.001402
6,s3,238.30974,238.30974
7,s4,266.647592,266.647592
8,s5,828.932012,828.932012
9,s6,118.542495,118.542495


______

_____

In [47]:
### Gradient Descent 
class MLR_FromScratch:
    
    def __init__(self, lr=0.01, epochs=3000):
        self.lr = lr
        self.epochs = epochs
        self.beta = None
        
    def fit(self, X, y):
        X = X.to_numpy()
        y = y.to_numpy()
        
        n_samples, n_features = X.shape
        
        # Add bias column
        X = np.c_[np.ones(n_samples), X]
        
        self.beta = np.zeros(n_features + 1)
        
        for _ in range(self.epochs):
            y_hat = X @ self.beta
            error = y_hat - y
            gradient = (2 / n_samples) * (X.T @ error)
            self.beta -= self.lr * gradient
            
    def predict(self, X):
        X = X.to_numpy()
        X = np.c_[np.ones(X.shape[0]), X]
        return X @ self.beta