In [21]:
from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

In [23]:
X,y = load_diabetes(return_X_y=True)

In [13]:
X.shape

(442, 10)

In [16]:
Xtrain, Xtest, ytrain, ytest = train_test_split(X,y, test_size=0.1, random_state=1)
Xtrain.shape

(397, 10)

In [17]:
lr = LinearRegression()
lr.fit(Xtrain,ytrain)

LinearRegression()

In [18]:
lr.coef_

array([ -26.2759041 , -231.92697391,  566.49859849,  303.08631061,
       -709.86807128,  428.67719627,   82.12319677,  165.60628133,
        729.33691844,   42.58281733])

In [19]:
lr.intercept_

151.4520908664676

In [22]:
pred=lr.predict(Xtest)
rmse = np.sqrt(mean_squared_error(ytest, pred))
r_two = r2_score(ytest, pred)

print(rmse,'\n', r_two)

57.616773487757534 
 0.3174949176473588


In [56]:
class MultiFeatureGD:
    
    def __init__(self, learning_rate, epochs):
        self.lrr = learning_rate
        self.epochs = epochs
        self.intercept_ = None
        self.coef_ = None
        
    def fit(self, X,y):
        
        self.intercept_ = 0
        self.coef_ = np.ones(X.shape[1])
        
        for i in range(self.epochs):
            #first updating all the coefficients and an intercept
            
            #WE MUST KNOW THE CONCEPT OF VECTORIZATION. recommended video: https://www.youtube.com/watch?v=qsIrQi0fzbY
            #In simple sentence, vectorization is avoiding the for loops.
            
            #intercept
            yhat = np.dot(X,self.coef_) + self.intercept_
            slope_intercept = -2 * np.mean(y-yhat)
            step_size = self.lrr * slope_intercept
            self.intercept_ = self.intercept_ - step_size
            
            
            #coefficient
            slop_coef = -2 * np.dot((y - yhat), X) / X.shape[0] #We obtain this step after doing the derivative and understanding
            #the matrix and vectors.
            step_size = self.lrr * slop_coef
            self.coef_ = self.coef_ - step_size
            
        print(f'The y-intercept: {self.intercept_},\n The coefficient:{self.coef_}')
            
    def predict(self,Xtest):
        return np.dot(Xtest, self.coef_) + self.intercept_
            

In [66]:
a = MultiFeatureGD(0.1,10000)
a.fit(X,y)

The y-intercept: 152.13348416289637,
 The coefficient:[  -5.39407432 -233.77260766  524.52549429  319.75669124  -61.83765052
 -116.38548375 -206.63815552  119.51732693  455.28947752   84.87407198]


In [67]:
pred = a.predict(Xtest)
pred

array([117.61085739, 107.55989952, 184.509519  ,  68.97458556,
       172.08863319, 190.55722889, 219.05564124, 124.97490287,
       155.97700508, 127.87483182, 208.07661286, 197.70589445,
        92.47949295,  75.52149947, 231.94400307, 203.84975848,
       176.80653413, 100.76790908, 118.75465112, 168.12568832,
       187.70938662,  89.09538869, 142.78368775, 104.41473801,
        91.68927596, 210.3241617 ,  95.09103118, 172.0769282 ,
       123.76627928, 181.07260254, 158.23857229, 136.95721253,
       189.79612497,  96.66898386, 160.3128906 , 189.44763873,
       105.27801983, 245.27273189, 171.3772019 , 185.05336944,
       160.33762524, 182.27380073, 119.79426219, 123.19954979,
       140.68360779])

In [69]:
r2_score(ytest, pred) #The model can be improved by tuning the hyperparamaters

0.33604625883586936