### Multiple Linear Regression from Scratch


In [None]:
import numpy as np


class MultipleLinearRegression:
    def __init__(self):
        self.coef_ = None
        self.intercept_ = None

    def fit(self, X, y):
        """
        Fit the model using the Normal Equation method.
        Parameters:
        X : array-like, shape (n_samples, n_features)
            Training data.
        y : array-like, shape (n_samples,)
            Target values.
        """
        
        X = np.insert(X, 0, 1, axis=1)              # Add the intercept term
        betas = np.linalg.inv(X.T @ X) @ X.T @ y    # Normal Equation
        self.intercept_ = betas[0]                  # First coefficient is the intercept
        self.coef_ = betas[1:]                      

    def predict(self, X):
        """
        Predict using the linear model.
        Parameters:
        X : array-like, shape (n_samples, n_features)
            Samples.
        Returns:
        y_pred : array-like, shape (n_samples,)
            Predicted values.
        """

        return X @ self.coef_ + self.intercept_ 
        

In [18]:
from sklearn.datasets import load_diabetes
import numpy as np

In [2]:
X, y = load_diabetes(return_X_y=True)

In [11]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = MultipleLinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [13]:
model.coef_, model.intercept_

(array([  37.90402135, -241.96436231,  542.42875852,  347.70384391,
        -931.48884588,  518.06227698,  163.41998299,  275.31790158,
         736.1988589 ,   48.67065743]),
 151.34560453986)

In [14]:
from sklearn.metrics import mean_squared_error, r2_score
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse}")     
print(f"R^2 Score: {r2}")

Mean Squared Error: 2900.1936284934795
R^2 Score: 0.4526027629719198


### Comparison between the custom `MultipleLinearRegression` class and scikit-learn’s built-in `LinearRegression` model.

In [15]:
from sklearn.linear_model import LinearRegression

sk_model = LinearRegression()
sk_model.fit(X_train, y_train)

sk_y_pred = sk_model.predict(X_test)

In [16]:
sk_model.coef_, sk_model.intercept_

(array([  37.90402135, -241.96436231,  542.42875852,  347.70384391,
        -931.48884588,  518.06227698,  163.41998299,  275.31790158,
         736.1988589 ,   48.67065743]),
 151.34560453985995)

In [17]:
sk_mse = mean_squared_error(y_test, sk_y_pred)
sk_r2 = r2_score(y_test, sk_y_pred)
print(f"Scikit-learn Mean Squared Error: {sk_mse}")
print(f"Scikit-learn R^2 Score: {sk_r2}")

Scikit-learn Mean Squared Error: 2900.1936284934823
Scikit-learn R^2 Score: 0.45260276297191926
