# Importing Libraries

In [92]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDRegressor , LinearRegression
from sklearn.metrics import r2_score

# Data Preprocessing  

In [3]:
df = pd.read_csv('/content/diabetes.csv')
df.head()

Unnamed: 0,Age,BMI,BloodPressure,Insulin,HbA1c,Glucose,Cholesterol,HDL,LDL,Triglycerides,PhysicalActivity,FamilyHistory,BloodSugar
0,68,25.3,91,170,7.9,118,236,63,109,222,1,0,189.67011
1,58,39.5,74,153,8.6,127,204,41,150,233,2,0,192.274924
2,44,28.2,73,169,9.1,146,241,48,153,108,2,1,209.916904
3,72,20.7,71,98,7.8,186,181,46,175,105,4,1,237.310364
4,37,26.9,89,118,9.4,190,199,39,177,172,2,1,228.661224


In [4]:
X = df.drop('BloodSugar', axis=1)
y = df['BloodSugar']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

min_max_scaler = MinMaxScaler()
X_train = min_max_scaler.fit_transform(X_train)
X_test = min_max_scaler.transform(X_test)

# Multiple Linear Regression using Sklearn Library

In [86]:
lr = SGDRegressor()
lr.fit(X_train , y_train)
y_pred = lr.predict(X_test)

In [87]:
r2 = r2_score(y_test , y_pred)
print(r2)

0.8934789465526406


In [88]:
lr.coef_

array([13.65862472, 30.02514699, 14.65713705, -1.59978136,  4.69959309,
       82.1902766 ,  4.54995948,  3.04904478,  3.41919192,  0.27552514,
        5.23378103,  0.97524141])

In [89]:
lr.intercept_

array([120.28398761])

# Multiple Linear Regression using Batch Gradient Descent

In [32]:
class MySGDRegressor:
  def __init__(self , learning_rate=0.01 , epochs=100):
    self.coef_ = None
    self.intercept_ = None
    self.learning_rate = learning_rate
    self.epochs = epochs

  def fit(self , X_train , y_train):
    X_train = np.asarray(X_train)
    y_train = np.asarray(y_train)

    self.intercept_ = 0
    self.coef_ = np.ones(X_train.shape[1])
    n = X_train.shape[0]

    for i in range(self.epochs):
      for j in range(n):
        idx = np.random.randint(0 , n)

        x_i = X_train[idx]
        y_i = y_train[idx]

        y_hat = np.dot(x_i , self.coef_) + self.intercept_
        intercept_der = -2 * (y_i - y_hat)
        self.intercept_ = self.intercept_ - (self.learning_rate * intercept_der)

        coef_der = -2 * np.dot((y_i - y_hat) , x_i)
        self.coef_ = self.coef_ - (self.learning_rate * coef_der)

  def predict(self , X_test):
    return np.dot(X_test , self.coef_) + self.intercept_

In [149]:
sgdr = MySGDRegressor(learning_rate=0.01 , epochs=50)
sgdr.fit(X_train , y_train)
y_pred = sgdr.predict(X_test)

In [150]:
sgdr.coef_

array([12.76517818, 28.50955918, 13.4244237 , -2.94363864,  3.20877206,
       82.14792761,  5.54263888,  2.8546731 ,  2.60322519, -2.2782387 ,
        3.52649856,  0.84351421])

In [151]:
sgdr.intercept_

np.float64(127.12335003786667)

In [152]:
sgdr_r2 = r2_score(y_test , y_pred)
print(sgdr_r2)

0.8861479045162028


# Using Orinary Least Square Method

In [93]:
lr = LinearRegression()
lr.fit(X_train , y_train)
y_pred = lr.predict(X_test)

In [94]:
r2 = r2_score(y_test , y_pred)
print(r2)

0.8974452257885692


In [95]:
lr.coef_

array([12.13485877, 28.20372652, 13.55685266, -3.14833901,  3.31168376,
       81.60382416,  3.64052061,  1.32479037,  1.63141536, -1.55644462,
        4.39286516,  0.56731929])

In [96]:
lr.intercept_

np.float64(128.25050200110226)