# Regularized regressions:

In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge, Lasso
from sklearn.metrics import r2_score

## Read in data:

The dataset comes from 1974 MotoTrend US Magazine.

In [None]:
df = pd.read_csv('data_mtcars.csv', header='infer')

In [None]:
df.shape

(32, 12)

In [None]:
df.head(5)

Unnamed: 0,model,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
1,Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
2,Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
3,Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
4,Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2


In [None]:
# Separate into X and Y.
X=df.drop(columns=['model','mpg'])
Y=df.mpg

 Split the dataset with test size of 30%


In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(X,Y,test_size=0.30,random_state=0)  #splitting data with test size of 30%

#### 1.2. Ridge regression:

In [None]:
lambdaRange = range(1,100)

In [None]:
rr = Ridge(alpha=0.01)
rr.fit(X_train, y_train) 
pred_train_rr= rr.predict(X_train)
print(np.sqrt(mean_squared_error(y_train,pred_train_rr)))
print(r2_score(y_train, pred_train_rr))

pred_test_rr= rr.predict(X_test)
print(np.sqrt(mean_squared_error(y_test,pred_test_rr))) 
print(r2_score(y_test, pred_test_rr))

complete the code below  

In [None]:
bestRsq = 0.0                                                # To contain the best R^2.
bestLambda = 0.0                                             # To contain the best lambda.

for aLambda in lambdaRange:
    ridge = Ridge(aLambda)
    ridge.fit(x_train, y_train) 
    pred_train_r= ridge.predict(x_train)
    score = r2_score(y_train, pred_train_r)


    print("Lambda = ", aLambda, " and  R^2 = ", np.round(score,3))


Lambda =  1  and  R^2 =  0.918
Lambda =  2  and  R^2 =  0.906
Lambda =  3  and  R^2 =  0.896
Lambda =  4  and  R^2 =  0.888
Lambda =  5  and  R^2 =  0.882
Lambda =  6  and  R^2 =  0.877
Lambda =  7  and  R^2 =  0.872
Lambda =  8  and  R^2 =  0.868
Lambda =  9  and  R^2 =  0.864
Lambda =  10  and  R^2 =  0.86
Lambda =  11  and  R^2 =  0.857
Lambda =  12  and  R^2 =  0.854
Lambda =  13  and  R^2 =  0.851
Lambda =  14  and  R^2 =  0.849
Lambda =  15  and  R^2 =  0.846
Lambda =  16  and  R^2 =  0.844
Lambda =  17  and  R^2 =  0.842
Lambda =  18  and  R^2 =  0.84
Lambda =  19  and  R^2 =  0.838
Lambda =  20  and  R^2 =  0.836
Lambda =  21  and  R^2 =  0.835
Lambda =  22  and  R^2 =  0.833
Lambda =  23  and  R^2 =  0.831
Lambda =  24  and  R^2 =  0.83
Lambda =  25  and  R^2 =  0.828
Lambda =  26  and  R^2 =  0.827
Lambda =  27  and  R^2 =  0.826
Lambda =  28  and  R^2 =  0.824
Lambda =  29  and  R^2 =  0.823
Lambda =  30  and  R^2 =  0.822
Lambda =  31  and  R^2 =  0.821
Lambda =  32  and  R

train the ridge model with the best lambda value

In [None]:
# Show the intercept.
ridge.intercept_

33.332677039242

In [None]:
# Show other parameters.
ridge.coef_

array([-0.07486782, -0.03147511, -0.02170372,  0.05043239, -0.17001932,
       -0.05287025,  0.02233716,  0.06294993,  0.03454282, -0.24829522])

NOTE: In Ridge, even when $\lambda$ is large no parameter is exactly 0.

#### 1.3. Lasso regression:

In [None]:
lambdaRange = range(1,100)

In [None]:
bestRsq = 0.0                                                # To contain the best R^2.
bestLambda = 0.0                                             # To contain the best lambda.
for aLambda in lambdaRange:
    lasso = Lasso(aLambda)
    lasso.fit(x_train, y_train) 
    pred_train_l= lasso.predict(x_train)
    bestRsq = r2_score(y_train, pred_train_r)
    print("Lambda = ", aLambda, " and  R^2 = ", np.round(bestRsq ,3))


Lambda =  1  and  R^2 =  0.787
Lambda =  2  and  R^2 =  0.787
Lambda =  3  and  R^2 =  0.787
Lambda =  4  and  R^2 =  0.787
Lambda =  5  and  R^2 =  0.787
Lambda =  6  and  R^2 =  0.787
Lambda =  7  and  R^2 =  0.787
Lambda =  8  and  R^2 =  0.787
Lambda =  9  and  R^2 =  0.787
Lambda =  10  and  R^2 =  0.787
Lambda =  11  and  R^2 =  0.787
Lambda =  12  and  R^2 =  0.787
Lambda =  13  and  R^2 =  0.787
Lambda =  14  and  R^2 =  0.787
Lambda =  15  and  R^2 =  0.787
Lambda =  16  and  R^2 =  0.787
Lambda =  17  and  R^2 =  0.787
Lambda =  18  and  R^2 =  0.787
Lambda =  19  and  R^2 =  0.787
Lambda =  20  and  R^2 =  0.787
Lambda =  21  and  R^2 =  0.787
Lambda =  22  and  R^2 =  0.787
Lambda =  23  and  R^2 =  0.787
Lambda =  24  and  R^2 =  0.787
Lambda =  25  and  R^2 =  0.787
Lambda =  26  and  R^2 =  0.787
Lambda =  27  and  R^2 =  0.787
Lambda =  28  and  R^2 =  0.787
Lambda =  29  and  R^2 =  0.787
Lambda =  30  and  R^2 =  0.787
Lambda =  31  and  R^2 =  0.787
Lambda =  32  and

train the ridge model with the best lambda value

In [None]:
#train the ridge model with the best lambda value Show the intercept.
lasso.intercept_

29.44925613142623

In [None]:
# Show other parameters.
lasso.coef_

array([-0.        , -0.03646978, -0.00246897,  0.        , -0.        ,
        0.        ,  0.        ,  0.        ,  0.        , -0.        ])

NOTE: Lasso can make some parameters exactly 0 when $\lambda$ is large enough.