# Importing Libraries

In [83]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Data Preprocessing  

In [84]:
df = pd.read_csv('/content/diabetes.csv')
df.head()

Unnamed: 0,Age,BMI,BloodPressure,Insulin,HbA1c,Glucose,Cholesterol,HDL,LDL,Triglycerides,PhysicalActivity,FamilyHistory,BloodSugar
0,68,25.3,91,170,7.9,118,236,63,109,222,1,0,189.67011
1,58,39.5,74,153,8.6,127,204,41,150,233,2,0,192.274924
2,44,28.2,73,169,9.1,146,241,48,153,108,2,1,209.916904
3,72,20.7,71,98,7.8,186,181,46,175,105,4,1,237.310364
4,37,26.9,89,118,9.4,190,199,39,177,172,2,1,228.661224


In [85]:
X = df.drop('BloodSugar', axis=1)
y = df['BloodSugar']

In [86]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

min_max_scaler = MinMaxScaler()
X_train = min_max_scaler.fit_transform(X_train)
X_test = min_max_scaler.transform(X_test)

# Multiple Linear Regression using Orinary Least Square Method
## (Closed form solution)

In [87]:
lr = LinearRegression()
lr.fit(X_train , y_train)
y_pred = lr.predict(X_test)

In [88]:
r2 = r2_score(y_test , y_pred)
print(r2)

0.8974452257885692


In [89]:
lr.coef_

array([12.13485877, 28.20372652, 13.55685266, -3.14833901,  3.31168376,
       81.60382416,  3.64052061,  1.32479037,  1.63141536, -1.55644462,
        4.39286516,  0.56731929])

In [90]:
lr.intercept_

np.float64(128.25050200110226)

# Multiple Linear Regression using Batch Gradient Descent

In [91]:
class GDRegressor:
  def __init__(self , learning_rate=0.01 , epochs=100):
    self.coef_ = None
    self.intercept_ = None
    self.learning_rate = learning_rate
    self.epochs = epochs

  def fit(self , X_train , y_train):
    self.intercept_ = 0
    self.coef_ = np.ones(X_train.shape[1])
    n = X_train.shape[0]

    for i in range(self.epochs):
      y_hat = np.dot(X_train , self.coef_) + self.intercept_
      intercept_der = -(2.0 / n) * np.sum(y_train - y_hat)
      self.intercept_ = self.intercept_ - (self.learning_rate * intercept_der)

      coef_der = -(2.0 / n) * np.dot((y_train - y_hat) , X_train)
      self.coef_ = self.coef_ - (self.learning_rate * coef_der)

  def predict(self , X_test):
    return np.dot(X_test , self.coef_) + self.intercept_

In [153]:
gdr = GDRegressor(learning_rate=0.2 , epochs=1000)
gdr.fit(X_train , y_train)
y_pred = gdr.predict(X_test)

In [154]:
gdr.coef_

array([12.13682531, 28.20607479, 13.55828436, -3.14639304,  3.31348203,
       81.60477105,  3.641711  ,  1.32695999,  1.63370109, -1.55408733,
        4.3939516 ,  0.56782746])

In [155]:
gdr.intercept_

np.float64(128.24018033654605)

In [156]:
gdr_r2 = r2_score(y_test , y_pred)
print(gdr_r2)

0.8974452395051797
