# Ridge / Tikhonov Regression

# Lasso Regression

# Key Differences between Ridge and Lasso Regression

In summary, the main differences between Ridge and Lasso regression are:

Ridge regression uses L2 regularization, while Lasso regression uses L1 regularization.
Ridge regression does not enforce sparsity and only shrinks the coefficients towards zero, whereas Lasso regression can lead to sparse models by setting some coefficients exactly to zero.
Ridge regression is effective in handling multicollinearity, while Lasso regression performs well in feature selection and handling high-dimensional data.


The choice between Ridge and Lasso regression depends on the specific problem and the desired properties of the model. In some cases, a combination of both techniques, known as Elastic Net regression, can be employed to leverage the advantages of both L1 and L2 regularization.

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# Model building
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso

from sklearn.metrics import r2_score

In [2]:
# Import the data from csv file:
mpg_df = pd.read_csv("Data/mpg_vehicle_data.csv")
mpg_df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino


# Objective is to predict the Mileage (mpg) based on the features

In [3]:
# Name is irrelevant for mileage prediction
mpg_df = mpg_df.drop('name', axis=1)
mpg_df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin
0,18.0,8,307.0,130,3504,12.0,70,1
1,15.0,8,350.0,165,3693,11.5,70,1
2,18.0,8,318.0,150,3436,11.0,70,1
3,16.0,8,304.0,150,3433,12.0,70,1
4,17.0,8,302.0,140,3449,10.5,70,1


In [4]:
# Replace the categorical variable (origin) with String values
mpg_df['origin'] = mpg_df['origin'].replace({1:'america', 2:'europe',3:'asia'})
mpg_df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin
0,18.0,8,307.0,130,3504,12.0,70,america
1,15.0,8,350.0,165,3693,11.5,70,america
2,18.0,8,318.0,150,3436,11.0,70,america
3,16.0,8,304.0,150,3433,12.0,70,america
4,17.0,8,302.0,140,3449,10.5,70,america


In [5]:
# Perform One Hot Encoding so that weights are not affected
# Note: If we leave it as 1, 2, 3 etc, it will affect the weights. Hence we need to convert them to binary 
# by doing One Hot Encoding - pd.get_dummies()
mpg_df = pd.get_dummies(mpg_df, columns=['origin'])
mpg_df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin_america,origin_asia,origin_europe
0,18.0,8,307.0,130,3504,12.0,70,True,False,False
1,15.0,8,350.0,165,3693,11.5,70,True,False,False
2,18.0,8,318.0,150,3436,11.0,70,True,False,False
3,16.0,8,304.0,150,3433,12.0,70,True,False,False
4,17.0,8,302.0,140,3449,10.5,70,True,False,False


In [6]:
# 'horsepower' column contains some '?'. Replace them with np.nan
mpg_df = mpg_df.replace('?', np.nan)

In [7]:
mpg_df['horsepower'].value_counts()
# No more '?' left

horsepower
150    22
90     20
88     19
110    18
100    17
       ..
61      1
93      1
148     1
152     1
82      1
Name: count, Length: 93, dtype: int64

In [8]:
# Check for Null Values
mpg_df.isnull().sum()
# 'horsepower' column has 6 nan values

mpg               0
cylinders         0
displacement      0
horsepower        6
weight            0
acceleration      0
model_year        0
origin_america    0
origin_asia       0
origin_europe     0
dtype: int64

In [9]:
mpg_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 398 entries, 0 to 397
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   mpg             398 non-null    float64
 1   cylinders       398 non-null    int64  
 2   displacement    398 non-null    float64
 3   horsepower      392 non-null    object 
 4   weight          398 non-null    int64  
 5   acceleration    398 non-null    float64
 6   model_year      398 non-null    int64  
 7   origin_america  398 non-null    bool   
 8   origin_asia     398 non-null    bool   
 9   origin_europe   398 non-null    bool   
dtypes: bool(3), float64(3), int64(3), object(1)
memory usage: 23.1+ KB


In [10]:
# Replace the nan values in 'horsepower' column with median
mpg_df = mpg_df.apply(lambda x: x.fillna(x.median()), axis=0)

In [11]:
# All nan values are now replaced with median
mpg_df.isnull().sum()

mpg               0
cylinders         0
displacement      0
horsepower        0
weight            0
acceleration      0
model_year        0
origin_america    0
origin_asia       0
origin_europe     0
dtype: int64

#  Split into Dependent and Independent variables

In [12]:
# splitting the data into x and y basis independent variable and dependent variable

x = mpg_df.drop('mpg', axis=1)
y = mpg_df[['mpg']]

In [13]:
x.head()

Unnamed: 0,cylinders,displacement,horsepower,weight,acceleration,model_year,origin_america,origin_asia,origin_europe
0,8,307.0,130,3504,12.0,70,True,False,False
1,8,350.0,165,3693,11.5,70,True,False,False
2,8,318.0,150,3436,11.0,70,True,False,False
3,8,304.0,150,3433,12.0,70,True,False,False
4,8,302.0,140,3449,10.5,70,True,False,False


In [14]:
y.head()

Unnamed: 0,mpg
0,18.0
1,15.0
2,18.0
3,16.0
4,17.0


# Feature Scaling / Standardization

In [15]:
from sklearn import preprocessing
x_scaled = preprocessing.scale(x)       # Z-score normalization
x_scaled = pd.DataFrame(x_scaled, columns=x.columns)
x_scaled.head()

Unnamed: 0,cylinders,displacement,horsepower,weight,acceleration,model_year,origin_america,origin_asia,origin_europe
0,1.498191,1.090604,0.673118,0.63087,-1.295498,-1.627426,0.773559,-0.497643,-0.461968
1,1.498191,1.503514,1.589958,0.854333,-1.477038,-1.627426,0.773559,-0.497643,-0.461968
2,1.498191,1.196232,1.197027,0.55047,-1.658577,-1.627426,0.773559,-0.497643,-0.461968
3,1.498191,1.061796,1.197027,0.546923,-1.295498,-1.627426,0.773559,-0.497643,-0.461968
4,1.498191,1.042591,0.935072,0.565841,-1.840117,-1.627426,0.773559,-0.497643,-0.461968


In [16]:
y_scaled = preprocessing.scale(y)
y_scaled = pd.DataFrame(y_scaled, columns=y.columns)
y_scaled.head()

Unnamed: 0,mpg
0,-0.706439
1,-1.090751
2,-0.706439
3,-0.962647
4,-0.834543


# Train Test Split

In [17]:
# splitting the data into training and test
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x_scaled, y_scaled, test_size=0.30, random_state=1)

# Fit a simple Linear model

In [18]:
regression_model = LinearRegression()
regression_model.fit(x_train, y_train)

for idx, col_name in enumerate(x_train.columns):
    print("The Co-efficient for {} is {}".format(col_name, regression_model.coef_[0][idx]))

The Co-efficient for cylinders is -0.08592264254448734
The Co-efficient for displacement is 0.3861501766895437
The Co-efficient for horsepower is -0.10637514644618916
The Co-efficient for weight is -0.7965737428612097
The Co-efficient for acceleration is 0.02184681331891979
The Co-efficient for model_year is 0.3959410531014954
The Co-efficient for origin_america is -0.09399896644893509
The Co-efficient for origin_asia is 0.044917890138051704
The Co-efficient for origin_europe is 0.07243059852959383


In [19]:
regression_model.coef_

array([[-0.08592264,  0.38615018, -0.10637515, -0.79657374,  0.02184681,
         0.39594105, -0.09399897,  0.04491789,  0.0724306 ]])

In [20]:
intercept = regression_model.intercept_[0]
print("The intercept for our model is {}".format(intercept))

The intercept for our model is 0.015510225561902383


# Create a RIDGE model and note the different coefficients values

In [21]:
ridge = Ridge(alpha=.3)
ridge.fit(x_train, y_train)
print("Ridge Model:", (ridge.coef_))

Ridge Model: [[-0.0800581   0.36661042 -0.10890119 -0.78324655  0.01917898  0.39442138
  -0.0930884   0.04466769  0.07153523]]


# Create a LASSO model and note the different coefficients values for different alpha values

In [22]:
lasso1 = Lasso(alpha=0.3)
lasso1.fit(x_train, y_train)
print("Lasso Model :", (lasso1.coef_))
# The coefficients with '0' are not required and dropped automatically by the algorithm

Lasso Model : [-0.         -0.         -0.         -0.48827809  0.          0.13850254
 -0.          0.          0.        ]


In [23]:
lasso2 = Lasso(alpha=0.1)
lasso2.fit(x_train, y_train)
print("Lasso Model :", (lasso2.coef_))
# The coefficients with '0' are not required and dropped automatically by the algorithm

Lasso Model : [-0.         -0.         -0.01464723 -0.60711757  0.          0.29460087
 -0.04017427  0.          0.        ]


# Let us compare their scores

In [24]:
# Linear Regression
print("Linear Regression")
print(regression_model.score(x_train, y_train))
print(regression_model.score(x_test, y_test))

Linear Regression
0.8141025501610559
0.8433135132808832


In [25]:
# Ridge Regression
print("Ridge Regression")
print(ridge.score(x_train, y_train))
print(ridge.score(x_test, y_test))

Ridge Regression
0.8140828080856514
0.8437999817350272


In [26]:
# Lasso Regression
print("Lasso Regression")
print(lasso2.score(x_train, y_train))
print(lasso2.score(x_test, y_test))

# Insight: Even though the accuracy of Lasso is slightly less than other methods, but still it is better 
# because it has used only 4 variables in test data while other methods have used all 9 variables
# Vary the alpha value to get better accuracy

Lasso Regression
0.7878910251573478
0.8315130533007058


# Let us generate polynomial models reflecting the non-linear interaction between some dimensions

In [27]:
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=2, interaction_only = True)
# PolynomialFeatures will increase the number of dimensions by creating duplicate columns for unique values in each column

In [28]:
x.shape

(398, 9)

In [29]:
x_poly = poly.fit_transform(x_scaled)
x_train, x_test, y_train, y_test = train_test_split(x_poly, y, test_size=0.30,random_state=1)
x_train.shape

(278, 46)

# Apply Linear Regression on the polynomial features

In [30]:
regression_model.fit(x_train, y_train)
print(regression_model.coef_[0])

[ 6.80378666e-14  7.76532695e-02 -5.76620093e-01 -1.90640538e+00
 -5.29355791e+00 -6.12398171e-01  3.06569911e+00 -7.48647628e+10
  1.61115729e+12 -1.55750368e+12 -1.81900389e+00  1.94158758e-01
  1.86208505e+00  1.50413610e+00 -1.48461224e+00 -2.10197908e+12
 -1.73238522e+12 -1.65356596e+12  4.04633275e-01  1.65410460e+00
 -8.90737384e-01  2.46149134e+00 -4.47609825e+11 -3.68905976e+11
 -3.52121664e+11 -6.94999353e-01 -3.09415674e-01 -1.48338310e+00
 -1.15271960e+11 -9.50035332e+10 -9.06811067e+10 -1.55318722e-01
  2.51965010e-01  7.48932052e+11  6.17246303e+11  5.89163118e+11
  4.75131713e-01  2.54584007e+11  2.09820152e+11  2.00273853e+11
  6.39662437e+10  5.27189713e+10  5.03203881e+10  9.67662504e+10
 -2.36721691e+12  1.48529836e+12]


In [31]:
print(regression_model.score(x_train, y_train))
print(regression_model.score(x_test, y_test))

0.902600154784986
0.8674985468017328


# Apply Ridge Regression on the polynomial features

In [32]:
ridge = Ridge(alpha=0.3)
ridge.fit(x_train, y_train)
print("Ridge Model: ", (ridge.coef_))

Ridge Model:  [[ 0.          0.06740642 -0.61900803 -1.97236759 -5.15141317 -0.62282102
   3.04381568  0.1723188   0.15891088 -0.38553368 -1.4895438   0.02925116
   1.72762625  1.4201127  -1.38679985 -0.05712906  1.13569653 -1.11720961
   0.30089657  1.53987731 -0.84218996  2.38658282  0.21457492  0.50684735
  -0.80377008 -0.47592772 -0.30069342 -1.50318104 -0.61710306  0.43440955
   0.32933252 -0.14480549  0.25597746  0.47585604 -0.93880283  0.37865359
   0.4784103  -0.67039722  0.22915648  0.61211567 -0.49887694  0.4033774
   0.21155723 -0.29118156  0.41878367  0.1184465 ]]


In [33]:
print(ridge.score(x_train, y_train))
print(ridge.score(x_test, y_test))

0.9025975935207239
0.8673792928418451


# Apply Lasso Regression on the polynomial features

In [34]:
lasso = Lasso(alpha=0.1)
lasso.fit(x_train, y_train)
print("Lasso Model :", (lasso.coef_))

Lasso Model : [ 0.         -0.         -0.         -1.7183747  -5.16992739 -0.
  2.84786548 -0.13998004  0.          0.         -0.          0.
  0.1356132   0.3263917  -0.         -0.          0.13868519 -0.11476459
  0.          1.41934547  0.          0.          0.          0.
 -0.          0.08806845  0.         -0.70114511  0.          0.
 -0.         -0.         -0.          0.         -0.         -0.00588205
  0.1439408  -0.62572831  0.          0.35593854 -0.36198583  0.
  0.         -0.         -0.         -0.        ]


In [35]:
print(lasso.score(x_train, y_train))
print(lasso.score(x_test, y_test))

0.8889225050634915
0.8806921908206775


In [36]:
# Note the increased accuracy after applying Ridge or Lasso Regression