In [38]:
import numpy as np
from sklearn.datasets import load_diabetes

In [39]:
X,y =load_diabetes(return_X_y=True)

In [40]:
X.shape

(442, 10)

In [41]:
y.shape

(442,)

In [42]:
from sklearn.model_selection import train_test_split

In [43]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.1)

In [44]:
X_train.shape

(397, 10)

In [45]:
X_test.shape

(45, 10)

# Train with scikitlearn Linear Regression Model

In [46]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

In [52]:
lr = LinearRegression()

In [53]:
lr.fit(X_train,y_train)

In [54]:
y_pred = lr.predict(X_test)
y_pred

array([283.24598324,  76.64424024, 117.89757893, 100.64830771,
        67.56211613, 199.63430315, 119.17133391, 108.60896582,
       128.1884908 , 212.27548244, 220.32103167, 153.73306876,
       104.36833892, 155.41826649, 115.89852041, 182.38156243,
        86.91049425, 178.96168595, 206.15906828, 204.15614099,
       190.47100304, 185.98172279, 163.7060152 , 155.66648529,
        52.91904691,  45.43812297, 214.43701363, 252.51495875,
       128.3171272 , 254.9643587 , 149.04540387, 113.2068051 ,
       101.26679929, 172.60073184,  67.18578799, 170.38683777,
       207.44199514, 131.69334441,  89.86912248,  86.88523061,
       173.79371406, 190.91099041, 164.84596513, 114.71465952,
       188.6273933 ])

In [55]:
y_test

array([281.,  59., 182.,  81.,  83., 233.,  66., 102., 178., 163., 268.,
       185., 125., 259.,  64.,  91.,  96.,  90., 151., 297., 241., 175.,
       109., 262.,  78.,  47., 220., 336., 150., 275.,  97.,  88., 170.,
       180.,  39., 121., 249.,  60., 114.,  96., 244.,  78., 151.,  96.,
       212.])

In [56]:
r2_score(y_test,y_pred)

0.5585992127646751

# Create own Multiple Linear Regression

In [57]:
class OwnMultipleLinearRegressor:
    def __init__(self):
        self.coef_ = None
        self.intercept_ = None


    def fit(self,X_train,y_train):
        X_train = np.insert(X_train,0,1,axis=1)
        
        # calcuate the coeffs
        betas = np.linalg.inv(np.dot(X_train.T,X_train)).dot(X_train.T).dot(y_train)
        self.intercept_ = betas[0]
        self.coef_ = betas[1:]

    
    def predict(self,X_test):
        y_pred = np.dot(X_test,self.coef_) + self.intercept_
        return y_pred

In [58]:
mylr = OwnMultipleLinearRegressor()

In [59]:
mylr.fit(X_train,y_train)

In [61]:
my_y_pred = mylr.predict(X_test)
my_y_pred

array([283.24598324,  76.64424024, 117.89757893, 100.64830771,
        67.56211613, 199.63430315, 119.17133391, 108.60896582,
       128.1884908 , 212.27548244, 220.32103167, 153.73306876,
       104.36833892, 155.41826649, 115.89852041, 182.38156243,
        86.91049425, 178.96168595, 206.15906828, 204.15614099,
       190.47100304, 185.98172279, 163.7060152 , 155.66648529,
        52.91904691,  45.43812297, 214.43701363, 252.51495875,
       128.3171272 , 254.9643587 , 149.04540387, 113.2068051 ,
       101.26679929, 172.60073184,  67.18578799, 170.38683777,
       207.44199514, 131.69334441,  89.86912248,  86.88523061,
       173.79371406, 190.91099041, 164.84596513, 114.71465952,
       188.6273933 ])

In [62]:
r2_score(y_test,my_y_pred)

0.5585992127646745