In [35]:
from sklearn.datasets import load_diabetes
import numpy as np

In [36]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

In [37]:
data = load_diabetes()

In [38]:
print(data.DESCR)

.. _diabetes_dataset:

Diabetes dataset
----------------

Ten baseline variables, age, sex, body mass index, average blood
pressure, and six blood serum measurements were obtained for each of n =
442 diabetes patients, as well as the response of interest, a
quantitative measure of disease progression one year after baseline.

**Data Set Characteristics:**

:Number of Instances: 442

:Number of Attributes: First 10 columns are numeric predictive values

:Target: Column 11 is a quantitative measure of disease progression one year after baseline

:Attribute Information:
    - age     age in years
    - sex
    - bmi     body mass index
    - bp      average blood pressure
    - s1      tc, total serum cholesterol
    - s2      ldl, low-density lipoproteins
    - s3      hdl, high-density lipoproteins
    - s4      tch, total cholesterol / HDL
    - s5      ltg, possibly log of serum triglycerides level
    - s6      glu, blood sugar level

Note: Each of these 10 feature variables have bee

In [39]:
data.feature_names

['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']

In [40]:
import pandas as pd

df = pd.DataFrame(data.data, columns = data.feature_names)

In [41]:
df.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641


In [42]:
df.shape

(442, 10)

In [43]:
X_train, X_test, y_train, y_test = train_test_split(df, data.target, test_size=0.2, random_state=22)

In [44]:
X_train.shape, X_test.shape

((353, 10), (89, 10))

In [45]:
y_train[:5]

array([175., 181.,  45., 116., 274.])

In [46]:
reg = LinearRegression()

In [47]:
reg.fit(X_train, y_train)

In [48]:
y_pred = reg.predict(X_test)

In [49]:
r2_score(y_test, y_pred)

0.48002331551396993

In [50]:
reg.coef_

array([  28.58297885, -260.7460662 ,  494.21420338,  294.55710928,
       -911.28612744,  601.93675888,   64.93690777,  140.09313478,
        813.06920641,   76.14511679])

In [51]:
reg.intercept_

151.34554771656752

# Building our own class for Batch Gradient Descent

In [84]:
class BatchGradientDescent:
    def __init__(self, lr= 0.01, epochs=100):
        self.lr = lr
        self.epochs = epochs
        self.coef_ = None
        self.intercept_ = None

    def fit(self, X_train, y_train):
        self.coef_ = np.ones(X_train.shape[1])
        self.intercept_ = 0
        for i in range(self.epochs):
            y_pred = np.dot(X_train, self.coef_) + self.intercept_
            loss = np.mean(np.square(y_train - y_pred))

            slope_der = -2 * (np.dot((y_train - y_pred), X_train)) / X_train.shape[0]
            intercept_der = -2 * np.mean(y_train - y_pred)

            self.coef_ = self.coef_ - self.lr * slope_der
            self.intercept_ = self.intercept_ - self.lr * intercept_der

            # print(f"loss: {loss}")
            # print(f"coeff_: {self.coef_}")
            # print(f"intercept_: {self.intercept_}")
        print(self.coef_, self.intercept_)

    def predict(self, X_test):
        return np.dot(X_test, self.coef_) + self.intercept_



In [101]:
cls = BatchGradientDescent(0.15, 10000)

In [102]:
cls.fit(X_train, y_train)

[  34.74344584 -259.26036498  510.14786303  291.87429311  -94.12825344
  -80.61564364 -266.58737502  106.15552982  478.23600584   79.39230619] 151.25376169472943


In [103]:
y_pred = cls.predict(X_test)

In [104]:
r2_score(y_test, y_pred)

0.4825849181576809