<font color = green>

# Linear Regression: Ridge, Lasso, Normal Equation, Polynomial
</font>

# Home Task 

Use diabetes dataset (`sklearn.datasets.load_diabetes`) and apply
 - Ridge 
 - Lasso
 - Polynomial


<h3><b>Load data</b></h3>

In [2]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

X, y_str = fetch_openml('diabetes', version=1, return_X_y=True, as_frame=False, parser='auto')
label_encoder = LabelEncoder()

y = label_encoder.fit_transform(y_str)
X_train, X_test, y_train, y_test=train_test_split(X,y)

print(y_str[:2], y[:2])

['tested_positive' 'tested_negative'] [1 0]


<h3><b>Ridge</b></h3>

In [3]:
from sklearn.linear_model import Ridge
ridge_reg=Ridge()
ridge_reg.fit(X_train,y_train)
regressor = ridge_reg
print ('Ridge')
print ('R2 train score =', regressor.score(X_train, y_train))
print ('R2 test score =', regressor.score(X_test, y_test))
print ('b: {}, \nw= {}'.format(regressor.intercept_, regressor.coef_)) 

Ridge
R2 train score = 0.32366933200954917
R2 test score = 0.22133362334439466
b: -0.8786789371572334, 
w= [ 0.0175309   0.00630987 -0.00295121  0.00051063 -0.00020035  0.01282302
  0.1188085   0.0042727 ]


<h3><b>Lasso</b></h3>

In [4]:
from sklearn.linear_model import Lasso
lasso_reg=Lasso()
lasso_reg.fit(X_train,y_train)
regressor = lasso_reg
print ('Lasso')
print ('R2 train score =', regressor.score(X_train, y_train))
print ('R2 test score =', regressor.score(X_test, y_test))
print ('b: {}, \nw= {}'.format(regressor.intercept_, regressor.coef_)) 

Lasso
R2 train score = 0.2357530459469137
R2 test score = 0.1476374567391715
b: -0.42599820263902693, 
w= [ 0.          0.00643085 -0.          0.         -0.          0.
  0.          0.        ]


<h3><b>Polynomial</b></h3>

<h4><b>Polynomial+Ridge</b></h4>

In [5]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Ridge

poly = PolynomialFeatures(degree=2, include_bias=False)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

# Ridge regression
poly_ridge = Ridge(alpha=1e6, max_iter=100000).fit(X_train_poly, y_train)

regressor = poly_ridge

print('Polynomial + Ridge')

print('R2 train score =', regressor.score(X_train_poly, y_train))
print('R2 test score =', regressor.score(X_test_poly, y_test))

w = regressor.coef_
print('b: {}, \nw= {}'.format(regressor.intercept_, w))

Polynomial + Ridge
R2 train score = 0.37008499745042767
R2 test score = 0.22928736115757842
b: -0.31378430569042076, 
w= [ 1.14091036e-05  4.40100852e-05 -5.04831541e-05 -3.39959964e-05
 -1.07752672e-04 -2.27944171e-05  3.57661962e-06  3.02437384e-05
  1.93648863e-04  3.81905839e-05 -5.67820824e-06  1.19127146e-04
 -5.98913598e-05  1.09489343e-04  2.19656001e-05  3.99974507e-06
  4.38651988e-05 -5.62808148e-05 -2.66804805e-05 -2.07268857e-06
  2.01467615e-05  2.95313028e-04 -3.21997283e-05  6.38686238e-06
 -2.98645464e-05  3.00942189e-06 -9.81640161e-05  2.14500299e-04
  2.18176921e-04  6.64906981e-06  1.31927878e-05  6.70639089e-05
  1.05643456e-04  5.27028955e-05 -8.49943182e-07 -2.19791323e-05
 -8.95282752e-05  3.13949088e-05  1.87022781e-04  9.72476237e-05
  1.46290867e-04  9.35227076e-07  1.40887521e-04 -1.83539453e-04]


<h4><b>Polynomial+Lasso</b></h4>

In [8]:
import numpy as np

In [9]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Lasso

poly= PolynomialFeatures(degree=3,include_bias=False) # default is True means to return the first feature of all 1 as for degree 0 
X_train_poly= poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)
print ('X_train.shape= ',X_train.shape)
print ('X_train_poly.shape= ',X_train_poly.shape)

poly_lasso = Lasso(max_iter=100000).fit (X_train_poly,y_train)
regressor = poly_lasso
print ('Polynomial + Lasso')
print ('R2 train score =', regressor.score(X_train_poly, y_train))
print ('R2 test score =', regressor.score(X_test_poly, y_test))

w= regressor.coef_
print ('b: {}, \nw= {}'.format(regressor.intercept_, w)) 
w_relevant= w[np.abs(w) > 1e-2]
print ('\nRelevant w= {}'.format(w_relevant)) 

X_train.shape=  (576, 8)
X_train_poly.shape=  (576, 164)
Polynomial + Lasso
R2 train score = 0.4855703395685975
R2 test score = -0.023304833476420317
b: -0.4079685097459041, 
w= [ 0.00000000e+00 -0.00000000e+00  0.00000000e+00  0.00000000e+00
 -0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  9.88189532e-05  0.00000000e+00 -0.00000000e+00
 -0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  3.74179513e-05 -4.41883973e-05 -0.00000000e+00 -4.36018201e-05
  1.04958676e-04  0.00000000e+00  1.19891142e-04  0.00000000e+00
  0.00000000e+00 -3.14258618e-05  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  5.62623844e-05  0.00000000e+00
  0.00000000e+00  0.00000000e+00  5.47452796e-06  0.00000000e+00
  0.00000000e+00 -0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00 -0.00000000e+00  0.00000000e+00  0.00000000e+00
 -0.00000000e+00  1.96205993e-06  9.53990866e-06  0.00000000e+00
  2.48906477e-05 -0.00000000e+00  0.000000

  model = cd_fast.enet_coordinate_descent(


<h3><b>Normal Equation</b></h3>

In [10]:
m,n = X_train.shape
# adding 1-column
X_train_ext =  np.c_[(np.ones((m,1)),X_train)]
assert (X_train_ext.shape== (m,n+1))

print ('Solving linear regression using normal equation...')

params = np.linalg.pinv (X_train_ext.T @ X_train_ext ) @ X_train_ext.T @ y_train


params = np.linalg.pinv (X_train_ext.T @ X_train_ext ) @ X_train_ext.T @ y_train
b = params[0]
w=params[1:].reshape (1,-1) 
print ('b: {}, \nw= {}'.format(b,w)) 

print ('Predicting using normal equation...')

z_train= b+ X_train @ w.T
z_test= b+ X_test @ w.T

from sklearn.metrics import r2_score
print ('R2 train score =',  r2_score(y_train,z_train))
print ('R2 test score =', r2_score(y_test,z_test))

Solving linear regression using normal equation...
b: -0.8789309461372556, 
w= [[ 0.01754806  0.00630829 -0.00295024  0.00050552 -0.00020089  0.01281271
   0.12073798  0.0042686 ]]
Predicting using normal equation...
R2 train score = 0.3236710580729645
R2 test score = 0.22152856783340935
