# Use diabetes dataset (sklearn.datasets.load_diabetes) and apply

## Ridge
 ## Lasso
  ## Polynomial

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression


In [49]:
from sklearn.datasets import load_diabetes

X, y = load_diabetes(return_X_y=True, as_frame=True)
print(load_diabetes()['DESCR'])

.. _diabetes_dataset:

Diabetes dataset
----------------

Ten baseline variables, age, sex, body mass index, average blood
pressure, and six blood serum measurements were obtained for each of n =
442 diabetes patients, as well as the response of interest, a
quantitative measure of disease progression one year after baseline.

**Data Set Characteristics:**

:Number of Instances: 442

:Number of Attributes: First 10 columns are numeric predictive values

:Target: Column 11 is a quantitative measure of disease progression one year after baseline

:Attribute Information:
    - age     age in years
    - sex
    - bmi     body mass index
    - bp      average blood pressure
    - s1      tc, total serum cholesterol
    - s2      ldl, low-density lipoproteins
    - s3      hdl, high-density lipoproteins
    - s4      tch, total cholesterol / HDL
    - s5      ltg, possibly log of serum triglycerides level
    - s6      glu, blood sugar level

Note: Each of these 10 feature variables have bee

In [50]:
X.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641


In [51]:
X_train, X_test, y_train, y_test= train_test_split(X, y, test_size=0.2, random_state=42)
print ('X_train.shape=', X_train.shape)
print ('y_train.shape=', y_train.shape)
print ('X_test.shape=', X_test.shape)
print ('y_test.shape=', y_test.shape)
print ('X_train[0]=')
X_train.iloc[0]

X_train.shape= (353, 10)
y_train.shape= (353,)
X_test.shape= (89, 10)
y_test.shape= (89,)
X_train[0]=


age    0.070769
sex    0.050680
bmi    0.012117
bp     0.056301
s1     0.034206
s2     0.049416
s3    -0.039719
s4     0.034309
s5     0.027364
s6    -0.001078
Name: 17, dtype: float64

In [64]:
lin_reg = LinearRegression()
lin_reg.fit(X_train,y_train)
regress = lin_reg
print('train score =', regress.score(X_train,y_train))
print('test score =', regress.score(X_test, y_test))
print ('b: {}, \nw= {}'.format(regress.intercept_, regress.coef_)) 

train score = 0.5279193863361498
test score = 0.4526027629719195
b: 151.34560453985995, 
w= [  37.90402135 -241.96436231  542.42875852  347.70384391 -931.48884588
  518.06227698  163.41998299  275.31790158  736.1988589    48.67065743]


## Ridge


In [53]:
from sklearn.linear_model import Ridge
ridge_reg=Ridge()
ridge_reg.fit(X_train,y_train)
regressor = ridge_reg
print ('train score =', regressor.score(X_train, y_train))
print ('test score =', regressor.score(X_test, y_test))
print ('b: {}, \nw= {}'.format(regressor.intercept_, regressor.coef_)) 

train score = 0.4424027835503953
test score = 0.41915292635986556
b: 152.241675211113, 
w= [  45.36737726  -76.66608563  291.33883165  198.99581745   -0.53030959
  -28.57704987 -144.51190505  119.26006559  230.22160832  112.14983004]


 ## Lasso
 

In [47]:
from sklearn.linear_model import Lasso
lasso_reg=Lasso()
lasso_reg.fit(X_train,y_train)
regressor = lasso_reg
print ('Lasso')
print ('R2 train score =', regressor.score(X_train, y_train))
print ('R2 test score =', regressor.score(X_test, y_test))
print ('b: {}, \nw= {}'.format(regressor.intercept_, regressor.coef_)) 

Lasso
R2 train score = 0.5225434389503393
R2 test score = 0.46687670944102466
b: 153.73654390934846, 
w= [ 0.68703212 -9.29751904 26.21922482 15.65731357 -8.2281718  -0.
 -9.02408714  3.42086059 22.63646533  2.09864712]


 ## Polynomial

In [None]:
from sklearn.preprocessing import PolynomialFeatures
poly= PolynomialFeatures() 
X_train_poly= poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)
print ('X_train.shape= ',X_train.shape)
print ('X_train_poly.shape= ',X_train_poly.shape)


poly_reg = LinearRegression().fit (X_train_poly,y_train)
regressor = poly_reg
print ('Polynomial + Linear Regression')
print ('R2 train score =', regressor.score(X_train_poly, y_train))
print ('R2 test score =', regressor.score(X_test_poly, y_test))
print ('b: {}, \nw= {}'.format(regressor.intercept_, regressor.coef_)) 


X_train.shape=  (353, 10)
X_train_poly.shape=  (353, 66)
Polynomial + Linear Regression
R2 train score = 0.6061583502354682
R2 test score = 0.4156399336408001
b: 63.85672684344125, 
w= [ 1.02737951e-08  1.08828416e+02 -3.11898491e+02  4.21980794e+02
  3.91317715e+02 -1.69978817e+04  1.48192215e+04  6.05907491e+03
  4.43719629e+01  6.24332056e+03  9.85710597e+00  2.39028514e+03
  1.52465762e+03 -1.29048951e+03  6.61958511e+02 -9.38779898e+02
 -7.24306985e+03  5.90750460e+03  9.79750036e+03  1.22545646e+03
  8.57928127e+02 -1.88339350e+00  3.46891714e+02  1.25430373e+03
  1.60832972e+03  1.65480922e+03 -3.52932759e+03 -7.60218049e+03
  2.13145537e+03  6.88174670e+02  8.00898355e+02  3.34793540e+03
 -6.15585745e+03  7.29715549e+03 -2.11435053e+02 -3.58431763e+03
  3.69561236e+03  4.74291338e+02 -2.56573693e+02  1.99317477e+04
 -1.46380406e+04 -8.28673720e+03 -1.39611298e+03 -7.89399258e+03
 -3.30189040e+03  9.33891518e+04 -1.28756527e+05 -6.15540508e+04
 -2.52383912e+04 -4.02934360e+04 -7