### Implementing Linear Regression Using Gradient Descent from scratch using Python

In [None]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('diabetes.csv')

In [3]:
df.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,progression
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019908,-0.017646,151
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.06833,-0.092204,75
2,0.085299,0.05068,0.044451,-0.005671,-0.045599,-0.034194,-0.032356,-0.002592,0.002864,-0.02593,141
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022692,-0.009362,206
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031991,-0.046641,135


In [4]:
#separating target columns
x = df.loc[:, df.columns != 'progression'].to_numpy()
y = df['progression'].to_numpy()

In [5]:
x = x.T
y = y.reshape(1,x.shape[1]) #reshaping the target column for matrix calculations

In [6]:
x.shape

(10, 442)

In [7]:
y.shape

(1, 442)

Training the model

In [32]:
def fit(x, y, epochs, lr):
    m = x.shape[1] 
    n = x.shape[0]
    weights = np.zeros((n,1)) #initialising weights as zeros
    bias = 0

    for i in range(epochs):
        
        lin_pred = np.dot(weights.T, x) + bias #Linear equation  y_pred = Wx + B
        error = y-lin_pred #calculating the error term

        dw = (1/n)*np.dot(error ,x.T) #calculating the weights
        db = (1/n) * np.sum(error) #calculating the bias value

        weights = weights - lr*dw.T #adjusting the weights in each iteration of gradient descent
        bias = bias - lr*db    # adjusting the bias value in each iteration

    return weights, bias    
weights, biases = fit(x, y, epochs = 100, lr = 0.001) #returns weights and bias values which is used for prediction

Prediction

In [33]:
def predict(x,y,w,b):
    lin_pred = np.dot(w.T,x) + b
    return lin_pred
y_pred = predict(x,y,weights,biases) #pre-calculated weights and biases are used for predicting here. 

In [34]:
y_pred = y_pred[0].tolist()#converting the predictions into a list for calculation

In [35]:
y

array([[151,  75, 141, 206, 135,  97, 138,  63, 110, 310, 101,  69, 179,
        185, 118, 171, 166, 144,  97, 168,  68,  49,  68, 245, 184, 202,
        137,  85, 131, 283, 129,  59, 341,  87,  65, 102, 265, 276, 252,
         90, 100,  55,  61,  92, 259,  53, 190, 142,  75, 142, 155, 225,
         59, 104, 182, 128,  52,  37, 170, 170,  61, 144,  52, 128,  71,
        163, 150,  97, 160, 178,  48, 270, 202, 111,  85,  42, 170, 200,
        252, 113, 143,  51,  52, 210,  65, 141,  55, 134,  42, 111,  98,
        164,  48,  96,  90, 162, 150, 279,  92,  83, 128, 102, 302, 198,
         95,  53, 134, 144, 232,  81, 104,  59, 246, 297, 258, 229, 275,
        281, 179, 200, 200, 173, 180,  84, 121, 161,  99, 109, 115, 268,
        274, 158, 107,  83, 103, 272,  85, 280, 336, 281, 118, 317, 235,
         60, 174, 259, 178, 128,  96, 126, 288,  88, 292,  71, 197, 186,
         25,  84,  96, 195,  53, 217, 172, 131, 214,  59,  70, 220, 268,
        152,  47,  74, 295, 101, 151, 127, 237, 225

In [36]:
y_pred


[-11346.0983386695,
 -11342.313422413672,
 -11345.593733321071,
 -11345.167109689733,
 -11344.29651834948,
 -11341.973391675547,
 -11343.107044932116,
 -11346.113273160287,
 -11345.649569247307,
 -11345.533337538549,
 -11342.141622250634,
 -11344.931280181401,
 -11343.610278572982,
 -11345.416193105317,
 -11343.352997180218,
 -11347.118910254165,
 -11345.739129456308,
 -11346.909635991347,
 -11344.28785808566,
 -11343.525905182207,
 -11343.471810000756,
 -11343.300449267588,
 -11343.63150645377,
 -11349.022340181675,
 -11344.328811907304,
 -11345.069072402304,
 -11342.061231074724,
 -11344.75235451472,
 -11343.650569770094,
 -11345.326033873329,
 -11344.376587389133,
 -11341.940284338403,
 -11348.055502180952,
 -11342.786173634944,
 -11342.439176388008,
 -11343.880763796404,
 -11345.48398526642,
 -11344.170989651528,
 -11348.529685706057,
 -11345.035527589518,
 -11346.769308188474,
 -11341.940130809287,
 -11344.689196217176,
 -11343.191031001703,
 -11347.153629496555,
 -11344.679289672

Calculating Mean Squared Value

In [37]:
#calculating r-squared value
mean_y = np.mean(y)
r2 = 0
for i in range(0,len(y_pred)):
    r2_num =(y[0][i]-y_pred[i])**2
    r2_denom = (y[0][i]-mean_y)**2
    r2 += r2_num/r2_denom
print(1-r2,  "R-Squared error")

-15606449107.539557 R-Squared error


In [38]:
mse = np.square(np.subtract(y[0],y_pred)).mean()

In [39]:
print('Mean Squared Error : ',mse)

Mean Squared Error :  132192698.89833748
