In [1]:
%matplotlib notebook
import matplotlib.pyplot as plt
import pandas as pd 
import numpy as np 
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes


def get_X_y_diabetes(features=None, verbose=False):
    diabetes = load_diabetes()
    X, y = diabetes.data, diabetes.target

    if features is None:
        print('Selecting all features')
    elif type(features) == int or (type(features) == list and len(features) == 1):
        print('Selecting one feature: {}'.format(features))
        X = X[:, features].reshape(-1, 1)
    elif type(features) == list:
        print('Selecting features list: {}'.format(features))
        X = X[:, features]
    else:
        print('Wrong format of parameter "features"')
        return

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=2021)
    if verbose:
        print('X_train.shape =', X_train.shape)
        print('y_train.shape =', y_train.shape)
        print('X_test.shape =', X_test.shape)
        print('y_test.shape =', y_test.shape)
        print('X_train[:5] = \n{}'.format(X_train[:5]))
        print('y_train[:5] = \n{}'.format(y_train[:5]))

    return X_train, X_test, y_train, y_test


In [2]:
X_train, X_test, y_train, y_test=  get_X_y_diabetes(verbose= True)

Selecting all features
X_train.shape = (331, 10)
y_train.shape = (331,)
X_test.shape = (111, 10)
y_test.shape = (111,)
X_train[:5] = 
[[-0.06363517 -0.04464164 -0.03315126 -0.03321323  0.00118295  0.02405115
  -0.02499266 -0.00259226 -0.02251653 -0.05906719]
 [ 0.01264814 -0.04464164 -0.02560657 -0.04009893 -0.03046397 -0.04515466
   0.0780932  -0.0763945  -0.07213275  0.01134862]
 [ 0.03807591  0.05068012  0.00888341  0.04252949 -0.04284755 -0.02104223
  -0.03971921 -0.00259226 -0.01811369  0.00720652]
 [-0.07816532  0.05068012  0.07786339  0.05285804  0.07823631  0.0644473
   0.02655027 -0.00259226  0.04067283 -0.00936191]
 [-0.07453279 -0.04464164 -0.0105172  -0.00567042 -0.06623874 -0.0570543
  -0.00290283 -0.03949338 -0.04257085 -0.0010777 ]]
y_train[:5] = 
[214.  98. 127. 233. 168.]


In [3]:
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler

ridge_reg = Ridge()

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
ridge_reg.fit(X_train_scaled, y_train)

regressor = ridge_reg

print('Ridge')
print('R2 train score =', regressor.score(X_train_scaled, y_train))
print('R2 test score =', regressor.score(X_test_scaled, y_test))
print('b: {}, \nw= {}'.format(regressor.intercept_, regressor.coef_))


Ridge
R2 train score = 0.5072634835482679
R2 test score = 0.5260667062916088
b: 149.98791540785498, 
w= [ -0.90572311 -11.39061084  26.93579377  11.85086621 -17.84354295
   8.53902963  -3.14662357   6.51696437  28.8868917    3.77727244]


In [4]:
from sklearn.linear_model import Lasso
lasso_reg=Lasso()
lasso_reg.fit(X_train_scaled,y_train)
regressor = lasso_reg
print ('Lasso')
print ('R2 train score =', regressor.score(X_train_scaled, y_train))
print ('R2 test score =', regressor.score(X_test_scaled, y_test))
print ('b: {}, \nw= {}'.format(regressor.intercept_, regressor.coef_)) 

Lasso
R2 train score = 0.5045236099772008
R2 test score = 0.5149363101451062
b: 149.98791540785498, 
w= [ -0.          -9.52587293  26.88368498  10.55544422  -2.57364251
  -0.         -10.89617641   0.          23.96575213   2.90179342]


In [5]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
poly= PolynomialFeatures(degree=2,include_bias=False)
X_train_poly= poly.fit_transform(X_train_scaled)
X_test_poly = poly.transform(X_test_scaled)
print ('X_train.shape= ',X_train.shape)
print ('X_train_poly.shape= ',X_train_poly.shape)
poly_lin_reg = LinearRegression().fit (X_train_poly,y_train)
regressor = poly_lin_reg
print ('R2 train score =', regressor.score(X_train_poly, y_train))
print ('R2 test score =', regressor.score(X_test_poly, y_test))
print ('b: {}, \nw= {}'.format(regressor.intercept_, regressor.coef_))

X_train.shape=  (331, 10)
X_train_poly.shape=  (331, 65)
R2 train score = 0.6207811652108601
R2 test score = 0.34719035173998225
b: -1681524261172.5713, 
w= [ 4.82904572e+00 -1.11824709e+11  2.51026869e+01  1.21883834e+01
 -8.65984862e+02  7.60133884e+02  3.02678399e+02  7.73389275e+00
  3.14776755e+02  4.57714081e+00  6.10873032e+00  8.59698772e+00
 -3.46611023e-01  2.06108475e+00  1.74999542e+01 -2.48349228e+01
 -2.37764549e+00  4.48545074e+00  3.02900314e+00 -2.49769592e+00
  1.68152426e+12  5.28911924e+00  5.72957516e-01 -1.50340748e+00
  4.13050222e+00  3.00952911e-01 -1.57152414e+01  3.84568334e+00
  3.68915653e+00  2.68922997e+00  7.15040970e+00 -1.89044571e+00
  1.39619446e+00  1.99351501e+00 -2.89397335e+00  9.06402588e-01
  1.83226967e+00 -8.28521729e-01  3.38213882e+01 -2.78085098e+01
 -8.48054218e+00  6.85974693e+00 -1.17716703e+01 -5.07866096e+00
  2.00360605e+02 -2.61299120e+02 -1.57314177e+02 -8.24574966e+01
 -6.02805595e+01 -1.12228355e+01  8.52747593e+01  9.79165955e+0