<font color = green>

# Linear Regression: Ridge, Lasso, Normal Equation, Polynomial
</font>

<font color = green >

### Set interactive backend 
</font>

In [1]:
%matplotlib notebook

In [4]:
import matplotlib.pyplot as plt
import pandas as pd 
import numpy as np 
from sklearn.model_selection import train_test_split

<font color = green >

### Load boston data set 
</font>

In [5]:
from sklearn.datasets import load_diabetes

In [7]:
def get_X_y(features= None, verbose= False):
    X, y = load_diabetes(return_X_y=True)

    if features is None:
        print ('Selecting all features')
        
    elif type(features) == int or (type(features) == list and len(features)==1):
        print ('Selecting one feature: {}'.format(features))
        X= X[:,features].reshape(-1,1) # single column 
    elif type(features) == list: 
        print ('Selecting features list: {}'.format(features))
        X= X[:,features]
    else: 
        print ('wrong format of parameter "features"')
        return


    X_train, X_test, y_train, y_test=  train_test_split(X, y, random_state=2021)
    if verbose:
        print ('X_train.shape= ',X_train.shape)
        print ('y_train.shape= ',y_train.shape)
        print ('X_train [:5] = \n{}'.format(X_train[:5]))
        print ('y_train [:5] = \n{}'.format(y_train[:5]))
    return X_train, X_test, y_train, y_test

In [8]:
X_train, X_test, y_train, y_test=  get_X_y(verbose= True)
# X_train, X_test, y_train, y_test=  get_X_y([5],verbose= False)

Selecting all features
X_train.shape=  (331, 10)
y_train.shape=  (331,)
X_train [:5] = 
[[-0.06363517 -0.04464164 -0.03315126 -0.03321323  0.00118295  0.02405115
  -0.02499266 -0.00259226 -0.02251653 -0.05906719]
 [ 0.01264814 -0.04464164 -0.02560657 -0.04009893 -0.03046397 -0.04515466
   0.0780932  -0.0763945  -0.07213275  0.01134862]
 [ 0.03807591  0.05068012  0.00888341  0.04252949 -0.04284755 -0.02104223
  -0.03971921 -0.00259226 -0.01811369  0.00720652]
 [-0.07816532  0.05068012  0.07786339  0.05285804  0.07823631  0.0644473
   0.02655027 -0.00259226  0.04067283 -0.00936191]
 [-0.07453279 -0.04464164 -0.0105172  -0.00567042 -0.06623874 -0.0570543
  -0.00290283 -0.03949338 -0.04257085 -0.0010777 ]]
y_train [:5] = 
[214.  98. 127. 233. 168.]


<font color = green >

### Linear Regression

</font>

In [9]:
from sklearn.linear_model import LinearRegression
lin_reg=LinearRegression()
lin_reg.fit(X_train,y_train)
regressor = lin_reg
print ('Linear Regression')
print ('R2 train score =', regressor.score(X_train, y_train))
print ('R2 test score =', regressor.score(X_test, y_test))
print ('b: {}, \nw= {}'.format(regressor.intercept_, regressor.coef_)) 

Linear Regression
R2 train score = 0.5073693366380002
R2 test score = 0.5281729599217633
b: 148.99290898243794, 
w= [ -19.6849459  -240.17712443  557.92071086  251.49875073 -500.35528341
  275.55002947  -11.62872458  154.0055582   651.15320811   77.51418657]


<font color = green >

### Ridge

</font>

In [10]:
from sklearn.linear_model import Ridge
ridge_reg=Ridge()
ridge_reg.fit(X_train,y_train)
regressor = ridge_reg
print ('Ridge')
print ('R2 train score =', regressor.score(X_train, y_train))
print ('R2 test score =', regressor.score(X_test, y_test))
print ('b: {}, \nw= {}'.format(regressor.intercept_, regressor.coef_)) 

Ridge
R2 train score = 0.4227491733930173
R2 test score = 0.4342973225973642
b: 148.99989270370446, 
w= [  31.07148535  -67.8120157   284.12144626  158.3077359    25.34329106
  -14.63150099 -130.28719404  116.41304414  239.50188481  108.52469397]


<font color = green >

### Lasso

</font>

In [11]:
from sklearn.linear_model import Lasso
lasso_reg=Lasso()
lasso_reg.fit(X_train,y_train)
regressor = lasso_reg
print ('Lasso')
print ('R2 train score =', regressor.score(X_train, y_train))
print ('R2 test score =', regressor.score(X_test, y_test))
print ('b: {}, \nw= {}'.format(regressor.intercept_, regressor.coef_)) 

Lasso
R2 train score = 0.36601908968194896
R2 test score = 0.33920924807921515
b: 149.48529539341314, 
w= [  0.          -0.         379.30812187   0.           0.
   0.          -0.           0.         317.42349078   0.        ]


<font color = green >

### Comparing to KNN

</font>

In [12]:
X_train, X_test, y_train, y_test=  get_X_y()

from sklearn.neighbors import KNeighborsRegressor
print ('KNN')
for n_neighbors in range(1,10):
    regressor = KNeighborsRegressor(n_neighbors=n_neighbors).fit (X_train,y_train)
    print ('n_neighbors={}'.format (n_neighbors))
    print ('R2 train score =', regressor.score(X_train, y_train))
    print ('R2 test score =', regressor.score(X_test, y_test))

Selecting all features
KNN
n_neighbors=1
R2 train score = 1.0
R2 test score = -0.09095569737227827
n_neighbors=2
R2 train score = 0.75205211713433
R2 test score = 0.17016447278513447
n_neighbors=3
R2 train score = 0.6758033181412969
R2 test score = 0.2381809619257087
n_neighbors=4
R2 train score = 0.6155525801130263
R2 test score = 0.27733360958680453
n_neighbors=5
R2 train score = 0.5971693804267382
R2 test score = 0.3284875313602935
n_neighbors=6
R2 train score = 0.5798068060304129
R2 test score = 0.34198004354416
n_neighbors=7
R2 train score = 0.5774888738145572
R2 test score = 0.34425865483160567
n_neighbors=8
R2 train score = 0.5672443132619438
R2 test score = 0.3729549962651513
n_neighbors=9
R2 train score = 0.5641246455859588
R2 test score = 0.36825966395622667


<font color = green >

## Normal Equation

</font>

In [13]:
X_train, X_test, y_train, y_test=  get_X_y()
m,n = X_train.shape
# adding 1-column
X_train_ext =  np.c_[(np.ones((m,1)),X_train)]
assert (X_train_ext.shape== (m,n+1))

print ('Solving linear regression using normal equation...')

params = np.linalg.pinv (X_train_ext.T @ X_train_ext ) @ X_train_ext.T @ y_train


params = np.linalg.pinv (X_train_ext.T @ X_train_ext ) @ X_train_ext.T @ y_train
b = params[0]
w=params[1:].reshape (1,-1) 
print ('b: {}, \nw= {}'.format(b,w)) 

print ('Predicting using normal equation...')

z_train= b+ X_train @ w.T
z_test= b+ X_test @ w.T

from sklearn.metrics import r2_score
print ('R2 train score =',  r2_score(y_train,z_train))
print ('R2 test score =', r2_score(y_test,z_test))

Selecting all features
Solving linear regression using normal equation...
b: 148.99290898243888, 
w= [[ -19.6849459  -240.17712443  557.92071086  251.49875073 -500.35528341
   275.55002947  -11.62872458  154.0055582   651.15320811   77.51418657]]
Predicting using normal equation...
R2 train score = 0.5073693366380001
R2 test score = 0.5281729599217677


<font color = green >

## Polynomial

</font>

In [14]:
from sklearn.preprocessing import PolynomialFeatures

<font color = green >

### Polynomial + Linear Regression

</font>

In [15]:
X_train, X_test, y_train, y_test=  get_X_y()

poly= PolynomialFeatures(degree=2,include_bias=False)
X_train_poly= poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)
print ('X_train.shape= ',X_train.shape)
print ('X_train_poly.shape= ',X_train_poly.shape)
# X_train_poly[:5]
poly_lin_reg = LinearRegression().fit (X_train_poly,y_train)
regressor = poly_lin_reg
print ('Polynomial + Linear Regression')
print ('R2 train score =', regressor.score(X_train_poly, y_train))
print ('R2 test score =', regressor.score(X_test_poly, y_test))
print ('b: {}, \nw= {}'.format(regressor.intercept_, regressor.coef_)) 

Selecting all features
X_train.shape=  (331, 10)
X_train_poly.shape=  (331, 65)
Polynomial + Linear Regression
R2 train score = 0.6207810962295993
R2 test score = 0.34722439867190236
b: 55.745642090171586, 
w= [ 1.06137498e+02 -2.77244219e+02  5.11354358e+02  2.51478306e+02
 -1.82518302e+04  1.59323845e+04  6.66445690e+03  1.74014774e+02
  6.57536398e+03  9.66610282e+01  2.78325334e+03  3.85281468e+03
 -1.53395915e+02  9.33380694e+02  7.84255464e+03 -1.10762461e+04
 -1.11174456e+03  2.01277652e+03  1.35040875e+03 -1.10327017e+03
 -1.67413429e+00  2.29828166e+03  2.55277891e+02 -6.62033960e+02
  1.81130613e+03  1.37538779e+02 -6.93403727e+03  1.68439720e+03
  1.60179356e+03  1.15224299e+03  3.13930733e+03 -8.23706391e+02
  6.06446052e+02  9.05587243e+02 -1.25957240e+03  3.92326702e+02
  7.84474860e+02 -3.72762355e+02  1.50641940e+04 -1.23251806e+04
 -3.94541792e+03  3.05725415e+03 -5.21151753e+03 -2.22762962e+03
  8.83280542e+04 -1.14624080e+05 -7.24321258e+04 -3.63921143e+04
 -2.640891

#### Note: the R2 scores on training and test  show it is overfitting
Though it is better that w/o poly

In [16]:
# Try with normalization

from sklearn.preprocessing import StandardScaler

X_train, X_test, y_train, y_test=  get_X_y(verbose= False)

poly= PolynomialFeatures(degree=2,include_bias=False)  
X_train_poly= poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)
print ('X_train.shape= ',X_train.shape)
print ('X_train_poly.shape= ',X_train_poly.shape)

scaler= StandardScaler()
X_train_poly_scaled = scaler.fit_transform(X_train_poly)
X_test_poly_scaled= scaler.transform(X_test_poly)
# X_train_poly[:5]
poly_lin_reg = LinearRegression().fit (X_train_poly_scaled,y_train)
regressor = poly_lin_reg
print ('Polynomial + Normalization + Linear Regression')
print ('R2 train score =', regressor.score(X_train_poly_scaled, y_train))
print ('R2 test score =', regressor.score(X_test_poly_scaled, y_test))
print ('b: {}, \nw= {}'.format(regressor.intercept_, regressor.coef_)) 

Selecting all features
X_train.shape=  (331, 10)
X_train_poly.shape=  (331, 65)
Polynomial + Normalization + Linear Regression
R2 train score = 0.6186800817374429
R2 test score = 0.3567788640015631
b: 133.4063452967691, 
w= [ 4.97209636e+00  3.88562348e+14  2.49072149e+01  1.16213097e+01
 -8.73121834e+02  7.64096681e+02  3.07384169e+02  1.22193351e+01
  3.15778767e+02  3.94531250e+00  6.37500000e+00  7.76562500e+00
 -3.82812500e-01  1.81250000e+00  1.31875000e+01 -2.00937500e+01
 -7.85156250e-01  5.37500000e+00  4.32812500e+00 -2.74218750e+00
 -3.88562348e+14  4.54882812e+00  1.10937500e+00  3.06054688e+00
  9.45312500e-01 -8.98437500e-01 -1.55390625e+01  3.72265625e+00
  2.80566406e+00  4.73437500e+00  6.14062500e+00 -2.04687500e+00
  3.45312500e+00  1.02343750e+00 -5.94140625e+00  2.01562500e+00
  2.03906250e+00  4.06250000e-01  2.55781250e+01 -2.06093750e+01
 -7.20703125e+00  6.06054688e+00 -1.00996094e+01 -4.67187500e+00
  2.98390625e+02 -4.00804688e+02 -1.51332031e+02 -9.88281250e

<font color = green >

### Polynomial + Ridge

</font>

In [21]:
X_train, X_test, y_train, y_test=  get_X_y(verbose= False)

poly= PolynomialFeatures(degree=2,include_bias=False) 
X_train_poly= poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)
print ('X_train.shape= ',X_train.shape)
print ('X_train_poly.shape= ',X_train_poly.shape)

poly_ridge = Ridge(alpha=1e6, max_iter=100000).fit(X_train_poly, y_train)
regressor = poly_ridge
print ('Polynomial + Ridge')

print ('R2 train score =', regressor.score(X_train_poly, y_train))
print ('R2 test score =', regressor.score(X_test_poly, y_test))
w= regressor.coef_
# print ('b: {}, \nw= {}'.format(regressor.intercept_, w)) 

Selecting all features
X_train.shape=  (331, 10)
X_train_poly.shape=  (331, 65)
Polynomial + Ridge
R2 train score = 2.1609200691052877e-06
R2 test score = -0.013170793147653903


<font color = green >

#### Try polynomial degree = 3 for Ridge 

</font>

In [22]:
poly= PolynomialFeatures(degree=3,include_bias=False) # default is True means to return the first feature of all 1 as for degree 0 
X_train_poly= poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)
print ('X_train.shape= ',X_train.shape)
print ('X_train_poly.shape= ',X_train_poly.shape)

poly_ridge = Ridge(alpha=5e10, max_iter=100000).fit (X_train_poly,y_train)
regressor = poly_ridge
print ('Polynomial + Ridge')

print ('R2 train score =', regressor.score(X_train_poly, y_train))
print ('R2 test score =', regressor.score(X_test_poly, y_test))
w= regressor.coef_


X_train.shape=  (331, 10)
X_train_poly.shape=  (331, 285)
Polynomial + Ridge
R2 train score = 4.322475710694107e-11
R2 test score = -0.01317309394302435


<font color = green >

### Polynomial + Lasso

</font>

In [23]:
X_train, X_test, y_train, y_test=  get_X_y(verbose= False)

poly= PolynomialFeatures(degree=3,include_bias=False) # default is True means to return the first feature of all 1 as for degree 0 
X_train_poly= poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)
print ('X_train.shape= ',X_train.shape)
print ('X_train_poly.shape= ',X_train_poly.shape)

poly_lasso = Lasso(max_iter=100000).fit (X_train_poly,y_train)
regressor = poly_lasso
print ('Polynomial + Lasso')
print ('R2 train score =', regressor.score(X_train_poly, y_train))
print ('R2 test score =', regressor.score(X_test_poly, y_test))

w= regressor.coef_
print ('b: {}, \nw= {}'.format(regressor.intercept_, w)) 
w_relevant= w[np.abs(w) > 1e-2]
print ('\nRelevant w= {}'.format(w_relevant)) 



Selecting all features
X_train.shape=  (331, 10)
X_train_poly.shape=  (331, 285)
Polynomial + Lasso
R2 train score = 0.36601908968194896
R2 test score = 0.33920924807921515
b: 149.48529539341314, 
w= [  0.          -0.         379.30812187   0.           0.
   0.          -0.           0.         317.42349078   0.
   0.           0.           0.           0.          -0.
  -0.          -0.           0.           0.           0.
  -0.           0.           0.           0.          -0.
   0.          -0.           0.           0.           0.
   0.          -0.          -0.          -0.           0.
   0.           0.           0.           0.          -0.
   0.           0.           0.           0.           0.
  -0.          -0.          -0.           0.           0.
  -0.           0.          -0.          -0.           0.
  -0.          -0.           0.          -0.           0.
   0.           0.           0.           0.           0.
   0.          -0.           0.           0.  

<font color = green >

#### Try polynomial degree = 3  for Lasso

</font>

In [24]:
poly= PolynomialFeatures(degree=3,include_bias=False) 
X_train_poly= poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)
print ('X_train.shape= ',X_train.shape)
print ('X_train_poly.shape= ',X_train_poly.shape)

poly_lasso = Lasso(alpha= 10, max_iter=1000000).fit (X_train_poly,y_train)
regressor = poly_lasso
print ('Polynomial + Lasso')
print ('R2 train score =', regressor.score(X_train_poly, y_train))
print ('R2 test score =', regressor.score(X_test_poly, y_test))

w= regressor.coef_
w_relevant= w[np.abs(w) > 1e-2]
print ('\nRelevant w= {}'.format(w_relevant)) 

X_train.shape=  (331, 10)
X_train_poly.shape=  (331, 285)
Polynomial + Lasso
R2 train score = 0.0
R2 test score = -0.01317309398904798

Relevant w= []
