In [51]:
import pandas as pd
from sklearn.datasets import load_diabetes
import numpy as np


In [52]:
x,y = load_diabetes(return_X_y=True)

In [53]:
x.shape,y.shape

((442, 10), (442,))

In [54]:
from sklearn.model_selection import train_test_split

In [55]:
x_train,x_test,y_train,y_test = train_test_split(x,y,random_state=20)

$$ \hat{y} = B_0 + B_1 x_1 + B_2 x_2 + \dots + B_m x_m $$



The coefficient vector **B** in multiple linear regression is given by:

$$
\mathbf{B} = \begin{bmatrix}
\beta_0 \\
\beta_1 \\
\vdots \\
\beta_m
\end{bmatrix}
$$

where:
- $\beta_0$ represents the intercept coefficient,
- $\beta_1$ represents the coefficient for the first predictor variable,
- $\vdots$ represents the coefficients for the remaining predictor variables,
- $\beta_m$ represents the coefficient for the last predictor variable.

Each $\beta_i$ represents the effect of the corresponding predictor variable on the response variable in the regression model.


$\beta$ is calculated by:
$\beta$ = $$ (X^T \cdot X)^{-1} \cdot X^T \cdot Y $$

where:
- X represent all input columns with one columns of 1
- Y represent target output



In [60]:
class MultipleLinearRegression:
    def __init__(self):
        self.coeff_= None
        self.intercept_ = None


    def fit(self,x,y):
        x_ = np.insert(x,0,1,axis=1)
        betas = np.linalg.inv(np.dot(x_.T,x_)).dot(x_.T).dot(y)
        self.intercept_ = betas[0]
        self.coeff_ = betas[1:]
        print(betas)



    def predict(self,x):
        return ( np.dot(x,self.coeff_) +  self.intercept_ )


In [61]:
mlr  = MultipleLinearRegression()

In [63]:
mlr.fit(x_train,y_train)

[  154.7290065    -48.02846228  -224.65536334   554.61628748
   360.27084995 -1085.53343      772.19682316   149.1631924
    85.46051108   866.52891424    21.12842707]


In [65]:
y_pred=mlr.predict(x_test)

In [66]:
from sklearn.metrics import r2_score

In [67]:
r2_score(y_test,y_pred)

0.4427388186629707