In [1]:
import numpy as np
from sklearn.datasets import load_diabetes

In [3]:
x,y = load_diabetes(return_X_y=True)

In [5]:
x

array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
         0.01990749, -0.01764613],
       [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
        -0.06833155, -0.09220405],
       [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
         0.00286131, -0.02593034],
       ...,
       [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
        -0.04688253,  0.01549073],
       [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
         0.04452873, -0.02593034],
       [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
        -0.00422151,  0.00306441]])

In [7]:
y

array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310., 101.,
        69., 179., 185., 118., 171., 166., 144.,  97., 168.,  68.,  49.,
        68., 245., 184., 202., 137.,  85., 131., 283., 129.,  59., 341.,
        87.,  65., 102., 265., 276., 252.,  90., 100.,  55.,  61.,  92.,
       259.,  53., 190., 142.,  75., 142., 155., 225.,  59., 104., 182.,
       128.,  52.,  37., 170., 170.,  61., 144.,  52., 128.,  71., 163.,
       150.,  97., 160., 178.,  48., 270., 202., 111.,  85.,  42., 170.,
       200., 252., 113., 143.,  51.,  52., 210.,  65., 141.,  55., 134.,
        42., 111.,  98., 164.,  48.,  96.,  90., 162., 150., 279.,  92.,
        83., 128., 102., 302., 198.,  95.,  53., 134., 144., 232.,  81.,
       104.,  59., 246., 297., 258., 229., 275., 281., 179., 200., 200.,
       173., 180.,  84., 121., 161.,  99., 109., 115., 268., 274., 158.,
       107.,  83., 103., 272.,  85., 280., 336., 281., 118., 317., 235.,
        60., 174., 259., 178., 128.,  96., 126., 28

In [9]:
from sklearn.model_selection import train_test_split

In [11]:
x_train,x_test,y_train,y_test = train_test_split(x , y ,test_size=0.2,random_state=3)

In [13]:
from sklearn.linear_model import LinearRegression

In [17]:
lr = LinearRegression()

In [19]:
lr.fit(x_train,y_train)

In [23]:
y_pred = lr.predict(x_test)

In [25]:
from sklearn.metrics import r2_score , mean_absolute_error , mean_squared_error

In [27]:
r2_score(y_test,y_pred)

0.4161792211496943

In [29]:
mean_absolute_error(y_test,y_pred)

44.84820711311212

In [31]:
mean_squared_error(y_test,y_pred)

3161.213140359926

In [33]:
lr.coef_

array([  -1.13744712, -212.8867836 ,  540.45536994,  345.20621542,
       -938.23814645,  516.62060367,  172.85885498,  267.87535242,
        732.63230159,   70.07849485])

In [35]:
lr.intercept_

153.13441535285003

## Building our own Model
lets build our own model which would give us comparable outputs to sklearn's model.

In [42]:
class DerivedMultiRegression:
    def __init__(self):
        self.coef = None 
        self.intercept = None 
    
    
    def fit(self, x_train,y_train):
        x_train = np.insert(x_train, 0,1,axis=1)

        #calculate the coeffs
        beta = np.linalg.inv(np.dot(x_train.T,x_train)).dot(x_train.T).dot(y_train)
        self.intercept = beta[0]
        self.coef = beta[1:]
        
    def predict(self,x_test):
        y_pred = self.intercept + np.dot(x_test,self.coef)
        return y_pred

In [44]:
dlr = DerivedMultiRegression()

In [46]:
dlr.fit(x_train , y_train)

In [50]:
dlr_y_predict = dlr.predict(x_test)

In [52]:
r2_score(y_test , dlr_y_predict)

0.4161792211496943

In [56]:
mean_absolute_error(y_test,dlr_y_predict)

44.848207113112146

In [58]:
mean_squared_error(y_test,dlr_y_predict)

3161.213140359926

We got same results as the model itself 