In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [7]:
# working with "Diabetes Toy dataset"
from sklearn.datasets import load_diabetes

data = load_diabetes()

In [8]:
# firstly will understand the data with "Description"
print(data.DESCR)

.. _diabetes_dataset:

Diabetes dataset
----------------

Ten baseline variables, age, sex, body mass index, average blood
pressure, and six blood serum measurements were obtained for each of n =
442 diabetes patients, as well as the response of interest, a
quantitative measure of disease progression one year after baseline.

**Data Set Characteristics:**

  :Number of Instances: 442

  :Number of Attributes: First 10 columns are numeric predictive values

  :Target: Column 11 is a quantitative measure of disease progression one year after baseline

  :Attribute Information:
      - age     age in years
      - sex
      - bmi     body mass index
      - bp      average blood pressure
      - s1      tc, total serum cholesterol
      - s2      ldl, low-density lipoproteins
      - s3      hdl, high-density lipoproteins
      - s4      tch, total cholesterol / HDL
      - s5      ltg, possibly log of serum triglycerides level
      - s6      glu, blood sugar level

Note: Each of these 1

In [9]:
# now split data in "X" and "Y"
X = data.data
y = data.target

In [10]:
# will apply train test split to divide data into training and testing set
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=45)

In [11]:
# now apply linear regression model
from sklearn.linear_model import LinearRegression
L = LinearRegression()

In [12]:
L.fit(X_train,y_train)

In [13]:
print(L.coef_)
print(L.intercept_)

[  23.45465406 -247.42747406  492.1087518   329.35876431 -970.79723039
  573.54295519  182.42162368  255.92168168  794.21609282   89.32249214]
152.13623331746496


In [14]:
# now will do prediction on test set
y_pred=L.predict(X_test)


In [16]:
# will compare our model accuracy with original result
from sklearn.metrics import r2_score, mean_squared_error

print("R2 score",r2_score(y_test,y_pred))
print("RMSE",np.sqrt(mean_squared_error(y_test,y_pred)))

R2 score 0.5188113124539249
RMSE 48.72713760953253


In [17]:
# now will apply "ridge regression" to check model accuracy will increase or not.

from sklearn.linear_model import Ridge
R = Ridge(alpha = 0.0001)

In [18]:
R.fit(X_train,y_train)

In [19]:
y_pred1 = R.predict(X_test)

In [20]:
print("R2 score",r2_score(y_test,y_pred1))
print("RMSE",np.sqrt(mean_squared_error(y_test,y_pred1)))

R2 score 0.5189732635884949
RMSE 48.71893700181956


In [21]:
# we can see after applying ridge regression too we got exactly same accuracy of r2score, RMSE. it is giving slightly better result, if we apply lasso regression
#then we can get better result. also it is not bad option to apply regularization. #

In [22]:
# now again apply Ridge regression with different alpha value

In [23]:
reg = Ridge(alpha=0.1,solver='cholesky')

In [24]:
reg.fit(X_train,y_train)

In [25]:
y_pred = reg.predict(X_test)
r2_score(y_test,y_pred)

0.5208421054129915

In [26]:
print(reg.coef_)
print(reg.intercept_)

[  31.03389163 -204.8305909   464.8963454   304.96414918  -95.76037039
  -87.52969228 -183.81809293  147.49794012  425.85392451  110.21559915]
152.07141290172817


In [27]:
#Now will apply Gredient Descent
from sklearn.linear_model import SGDRegressor

In [28]:
sgd = SGDRegressor(penalty='l2',max_iter=500,eta0=0.1,learning_rate='constant',alpha=0.001)

In [29]:
sgd.fit(X_train,y_train)

y_pred = sgd.predict(X_test)
print("R2 score",r2_score(y_test,y_pred))
print(sgd.coef_)
print(sgd.intercept_)

R2 score 0.4930904543753082
[  39.96842218 -136.75320351  383.45813189  257.1862838   -28.13272504
  -75.02502767 -177.02344092  127.96109387  327.07927621  138.75747303]
[158.73704715]


In [None]:
# now will apply ridge regression to compare performance accuracy with sgdregressor model performance

In [30]:
from sklearn.linear_model import Ridge

reg1 = Ridge(alpha=0.001, max_iter=500,solver='sparse_cg')

In [31]:
reg1.fit(X_train,y_train)

y_pred = reg1.predict(X_test)
print("R2 score",r2_score(y_test,y_pred))
print(reg1.coef_)
print(reg1.intercept_)

R2 score 0.5201448363733796
[  24.01614157 -246.40604595  493.59285633  329.08832668 -852.76774004
  479.42466511  131.86683001  243.03291514  748.9646394    90.15507892]
152.12463295186845
