In [1]:
import random
import numpy as np
from sklearn.datasets import load_diabetes

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.metrics import r2_score

### **Linear Regression**

In [2]:
dataset = load_diabetes(as_frame = True)
df = dataset['data']
df['target'] = dataset['target'] 

In [3]:
df.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0


In [5]:
X = df.iloc[:, :10]
y = df['target']

In [6]:
X_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 2)

In [9]:
lr = LinearRegression()

In [10]:
lr.fit(X_train, y_train)

In [12]:
lr_m, lr_b = lr.coef_, lr.intercept_
print(lr_m, lr_b)

[  -9.15865318 -205.45432163  516.69374454  340.61999905 -895.5520019
  561.22067904  153.89310954  126.73139688  861.12700152   52.42112238] 151.88331005254167


In [17]:
y_pred = lr.predict(x_test)

In [18]:
r2_score(y_test, y_pred)

0.4399338661568968

### **SGD Regressor**

In [13]:
sgdr = SGDRegressor(max_iter = 100, learning_rate = 'constant', eta0=0.001)

In [15]:
sgdr.fit(X_train, y_train)



In [16]:
sgdr_m, sgdr_b = sgdr.coef_, sgdr.intercept_
print(lr_m, lr_b)

[  -9.15865318 -205.45432163  516.69374454  340.61999905 -895.5520019
  561.22067904  153.89310954  126.73139688  861.12700152   52.42112238] 151.88331005254167


In [19]:
y_pred = sgdr.predict(x_test)

In [20]:
r2_score(y_test, y_pred)

0.16679036661742885

### **Mini-Batch Gradient Descent**

In [21]:
mbgd = SGDRegressor(learning_rate='constant', eta0=0.1)

In [37]:
batch_size = 50
epochs = 300

for i in range(epochs):
    idx = random.sample(range(X_train.shape[0]), batch_size)
    mbgd.partial_fit(X_train.iloc[idx], y_train.iloc[idx])

In [38]:
mbgd.coef_

array([ -19.77590839, -188.78460401,  522.92723118,  336.73608527,
        -98.20067605,  -66.99281649, -165.51807453,   79.70291101,
        545.70093704,   76.48184284])

In [39]:
mbgd.intercept_

array([126.64831735])

In [40]:
y_pred = mbgd.predict(x_test)

In [41]:
r2_score(y_test, y_pred)

0.3216389440127294