# Home Task
Use diabetes dataset (sklearn.datasets.load_diabetes) and apply

Ridge;
Lasso;
Normal Equation;
Polynomial.

In [1]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge, Lasso
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes


def get_X_y(features= None, verbose= False):
    X, y = load_diabetes(return_X_y=True)

    if features is None:
        print ('Selecting all features')
        
    elif type(features) == int or (type(features) == list and len(features)==1):
        print ('Selecting one feature: {}'.format(features))
        X= X[:,features].reshape(-1,1) # single column 
    elif type(features) == list: 
        print ('Selecting features list: {}'.format(features))
        X= X[:,features]
    else: 
        print ('wrong format of parameter "features"')
        return


    X_train, X_test, y_train, y_test=  train_test_split(X, y, random_state=2021)
    if verbose:
        print ('X_train.shape= ',X_train.shape)
        print ('y_train.shape= ',y_train.shape)
        print ('X_train [:5] = \n{}'.format(X_train[:5]))
        print ('y_train [:5] = \n{}'.format(y_train[:5]))
    return X_train, X_test, y_train, y_test

In [2]:
X_train, X_test, y_train, y_test=  get_X_y(verbose= True)

Selecting all features
X_train.shape=  (331, 10)
y_train.shape=  (331,)
X_train [:5] = 
[[-0.06363517 -0.04464164 -0.03315126 -0.03321358  0.00118295  0.02405115
  -0.02499266 -0.00259226 -0.02251217 -0.05906719]
 [ 0.01264814 -0.04464164 -0.02560657 -0.04009932 -0.03046397 -0.04515466
   0.0780932  -0.0763945  -0.07212845  0.01134862]
 [ 0.03807591  0.05068012  0.00888341  0.04252958 -0.04284755 -0.02104223
  -0.03971921 -0.00259226 -0.01811827  0.00720652]
 [-0.07816532  0.05068012  0.07786339  0.05285819  0.07823631  0.0644473
   0.02655027 -0.00259226  0.04067226 -0.00936191]
 [-0.07453279 -0.04464164 -0.0105172  -0.00567061 -0.06623874 -0.0570543
  -0.00290283 -0.03949338 -0.0425721  -0.0010777 ]]
y_train [:5] = 
[214.  98. 127. 233. 168.]


In [3]:
# Ridge

ridge_reg=Ridge()
ridge_reg.fit(X_train,y_train)
regressor = ridge_reg
print ('Ridge')
print ('R2 train score =', regressor.score(X_train, y_train))
print ('R2 test score =', regressor.score(X_test, y_test))
print ('b: {}, \nw= {}'.format(regressor.intercept_, regressor.coef_))

Ridge
R2 train score = 0.4227500042714355
R2 test score = 0.4342970082842499
b: 148.99988868218784, 
w= [  31.07135389  -67.81258571  284.12046397  158.3081174    25.34302859
  -14.6316645  -130.28687824  116.41280432  239.50350188  108.52433481]


In [4]:
# Lasso

lasso_reg=Lasso()
lasso_reg.fit(X_train,y_train)
regressor = lasso_reg
print ('Lasso')
print ('R2 train score =', regressor.score(X_train, y_train))
print ('R2 test score =', regressor.score(X_test, y_test))
print ('b: {}, \nw= {}'.format(regressor.intercept_, regressor.coef_))

Lasso
R2 train score = 0.36602010243711314
R2 test score = 0.3392074106660582
b: 149.4852586610367, 
w= [  0.          -0.         379.30470419   0.           0.
   0.          -0.           0.         317.42763802   0.        ]


In [5]:
# Normal Equation

m,n = X_train.shape
# adding 1-column
X_train_ext =  np.c_[(np.ones((m,1)),X_train)]
assert (X_train_ext.shape== (m,n+1))

print ('Solving linear regression using normal equation...')

params = np.linalg.pinv (X_train_ext.T @ X_train_ext ) @ X_train_ext.T @ y_train


params = np.linalg.pinv (X_train_ext.T @ X_train_ext ) @ X_train_ext.T @ y_train
b = params[0]
w=params[1:].reshape (1,-1) 
print ('b: {}, \nw= {}'.format(b,w)) 

print ('Predicting using normal equation...')

z_train= b+ X_train @ w.T
z_test= b+ X_test @ w.T

from sklearn.metrics import r2_score
print ('R2 train score =',  r2_score(y_train,z_train))
print ('R2 test score =', r2_score(y_test,z_test))

Solving linear regression using normal equation...
b: 148.99287782145046, 
w= [[ -19.68524788 -240.18043072  557.91466379  251.50090048 -500.39623377
   275.58122739  -11.60794591  154.01479032  651.17035619   77.51258452]]
Predicting using normal equation...
R2 train score = 0.5073702774872662
R2 test score = 0.528173969826651


In [6]:
poly= PolynomialFeatures(degree=2,include_bias=False) # default is True means to return the first feature of all 1 as for degree 0 
X_train_poly= poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)
print ('X_train.shape= ',X_train.shape)
print ('X_train_poly.shape= ',X_train_poly.shape)
# X_train_poly[:5]
poly_lin_reg = LinearRegression().fit (X_train_poly,y_train)
regressor = poly_lin_reg
print ('Polynomial + Linear Regression')
print ('R2 train score =', regressor.score(X_train_poly, y_train))
print ('R2 test score =', regressor.score(X_test_poly, y_test))
print ('b: {}, \nw= {}'.format(regressor.intercept_, regressor.coef_))

X_train.shape=  (331, 10)
X_train_poly.shape=  (331, 65)
Polynomial + Linear Regression
R2 train score = 0.6207797301635396
R2 test score = 0.34719901380574214
b: 56.79525186375287, 
w= [ 1.06168923e+02 -2.77265262e+02  5.11299899e+02  2.51497433e+02
 -1.80092300e+04  1.57192074e+04  6.57373097e+03  1.74001071e+02
  6.49554697e+03  9.66785599e+01  2.78256324e+03  3.85252924e+03
 -1.54319287e+02  9.33782637e+02  7.84052092e+03 -1.10754021e+04
 -1.11039929e+03  2.01320959e+03  1.35182836e+03 -1.10168958e+03
 -1.67426138e+00  2.29713538e+03  2.56866007e+02 -6.55374965e+02
  1.80613963e+03  1.34943461e+02 -6.93340553e+03  1.68125028e+03
  1.60066363e+03  1.15273617e+03  3.13892761e+03 -8.12973306e+02
  5.98607631e+02  9.01138185e+02 -1.26003138e+03  3.87472697e+02
  7.84628596e+02 -3.71765740e+02  1.50825980e+04 -1.23431929e+04
 -3.95041730e+03  3.06050706e+03 -5.21923360e+03 -2.22836579e+03
  8.86560173e+04 -1.15193867e+05 -7.26859339e+04 -3.64067602e+04
 -2.75141402e+04 -4.86607913e+03  