In [1]:
from sklearn.datasets import load_diabetes

In [2]:
import numpy as np
import matplotlib.pyplot as plt

In [3]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [5]:
X,y = load_diabetes(return_X_y=True)

In [10]:
X.shape
#input

(442, 10)

In [11]:
y.shape
#output

(442,)

In [8]:
X

array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
         0.01990749, -0.01764613],
       [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
        -0.06833155, -0.09220405],
       [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
         0.00286131, -0.02593034],
       ...,
       [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
        -0.04688253,  0.01549073],
       [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
         0.04452873, -0.02593034],
       [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
        -0.00422151,  0.00306441]])

In [12]:
y

array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310., 101.,
        69., 179., 185., 118., 171., 166., 144.,  97., 168.,  68.,  49.,
        68., 245., 184., 202., 137.,  85., 131., 283., 129.,  59., 341.,
        87.,  65., 102., 265., 276., 252.,  90., 100.,  55.,  61.,  92.,
       259.,  53., 190., 142.,  75., 142., 155., 225.,  59., 104., 182.,
       128.,  52.,  37., 170., 170.,  61., 144.,  52., 128.,  71., 163.,
       150.,  97., 160., 178.,  48., 270., 202., 111.,  85.,  42., 170.,
       200., 252., 113., 143.,  51.,  52., 210.,  65., 141.,  55., 134.,
        42., 111.,  98., 164.,  48.,  96.,  90., 162., 150., 279.,  92.,
        83., 128., 102., 302., 198.,  95.,  53., 134., 144., 232.,  81.,
       104.,  59., 246., 297., 258., 229., 275., 281., 179., 200., 200.,
       173., 180.,  84., 121., 161.,  99., 109., 115., 268., 274., 158.,
       107.,  83., 103., 272.,  85., 280., 336., 281., 118., 317., 235.,
        60., 174., 259., 178., 128.,  96., 126., 28

In [13]:
X_train,X_test,y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=2)

In [14]:
reg = LinearRegression()

In [15]:
reg.fit(X_train, y_train)

In [16]:
reg.coef_

array([  -9.15865318, -205.45432163,  516.69374454,  340.61999905,
       -895.5520019 ,  561.22067904,  153.89310954,  126.73139688,
        861.12700152,   52.42112238])

In [17]:
reg.intercept_

np.float64(151.88331005254167)

In [19]:
y_pred = reg.predict(X_test)

In [20]:
r2_score(y_test, y_pred)

0.4399338661568969

In [40]:
X_train.shape

(353, 10)

In [41]:
y_train.shape

(353,)

In [42]:
X.shape

(442, 10)

In [43]:
y.shape

(442,)

In [86]:
import time

### Now making our own class to find intercept and coefficients in multiple Linear Regression

In [101]:
class GDRegressor:

    def __init__(self, learning_rate=0.01, epochs=10000):
        self.coef_ = None
        self.intercept_ = None
        self.lr = learning_rate
        self.epochs = epochs

    def fit(self,X_train, y_train):
        self.intercept_=0
        # to know the no. of coefficients, we will be requiring the shape of x which will tell us about the no. of input columns in the dataset
        self.coef_ = np.ones(X_train.shape[1])
        for i in range(self.epochs):
            #update all the coefficients and intercepts
            #Vectorizaton
            y_hat = self.intercept_ + np.dot(X_train, self.coef_)
            # print(y_hat.shape)
            intercept_der = -2 * np.mean(y_train-y_hat)
            self.intercept_ = self.intercept_ - (self.lr * intercept_der)

            coef_der = -2 * np.dot((y_train - y_hat), X_train) / X_train.shape[0]
            self.coef_ = self.coef_ - (self.lr * coef_der)

        print(self.intercept_, self.coef_)

    def predict(self,X_test):
        return np.dot(X_test, self.coef_) + self.intercept_

In [102]:
gd = GDRegressor()

In [103]:
start = time.time()
gd.fit(X_train, y_train)
print("Time taken is : ", time.time()-start)

151.93997581529314 [  62.25473737  -24.14781742  262.34511386  192.16177336   39.46857342
   10.2531568  -142.46684985  124.29304814  244.280155    119.30530374]
Time taken is :  0.6579329967498779


In [104]:
y_pred = gd.predict(X_test)

In [105]:
r2_score(y_test, y_pred)

0.3971428800671305