# Demonstration of Lasso and Ridge regularization.

In [1]:
# import libraries.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
from sklearn import preprocessing
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import r2_score

In [4]:
# read the data set.
dt = pd.read_csv(r"C:\Users\DELL\OneDrive\Desktop\Naresh IT\11th- l1, l2\TASK-22_LASSO,RIDGE\car-mpg.csv")

In [5]:
dt

Unnamed: 0,mpg,cyl,disp,hp,wt,acc,yr,origin,car_type,car_name
0,18.0,8,307.0,130,3504,12.0,70,1,0,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,0,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,0,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,0,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,0,ford torino
...,...,...,...,...,...,...,...,...,...,...
393,27.0,4,140.0,86,2790,15.6,82,1,1,ford mustang gl
394,44.0,4,97.0,52,2130,24.6,82,2,1,vw pickup
395,32.0,4,135.0,84,2295,11.6,82,1,1,dodge rampage
396,28.0,4,120.0,79,2625,18.6,82,1,1,ford ranger


In [6]:
dt.head()

Unnamed: 0,mpg,cyl,disp,hp,wt,acc,yr,origin,car_type,car_name
0,18.0,8,307.0,130,3504,12.0,70,1,0,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,0,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,0,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,0,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,0,ford torino


In [7]:
dt = dt.drop(['car_name'], axis = 1)
dt['origin'] = dt['origin'].replace({1: 'america', 2: 'europe', 3: 'asia'})
dt = pd.get_dummies(dt,columns = ['origin'], dtype = int)
dt = dt.replace('?' , np.nan)

In [8]:
dt

Unnamed: 0,mpg,cyl,disp,hp,wt,acc,yr,car_type,origin_america,origin_asia,origin_europe
0,18.0,8,307.0,130,3504,12.0,70,0,1,0,0
1,15.0,8,350.0,165,3693,11.5,70,0,1,0,0
2,18.0,8,318.0,150,3436,11.0,70,0,1,0,0
3,16.0,8,304.0,150,3433,12.0,70,0,1,0,0
4,17.0,8,302.0,140,3449,10.5,70,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...
393,27.0,4,140.0,86,2790,15.6,82,1,1,0,0
394,44.0,4,97.0,52,2130,24.6,82,1,0,0,1
395,32.0,4,135.0,84,2295,11.6,82,1,1,0,0
396,28.0,4,120.0,79,2625,18.6,82,1,1,0,0


In [10]:
dt = dt.apply(pd.to_numeric, errors = 'ignore')

numeric_cols = dt.select_dtypes(include = [np.number]).columns
dt[numeric_cols] = dt[numeric_cols].apply(lambda x: x.fillna(x.median()))

  dt = dt.apply(pd.to_numeric, errors = 'ignore')


In [11]:
dt.head()

Unnamed: 0,mpg,cyl,disp,hp,wt,acc,yr,car_type,origin_america,origin_asia,origin_europe
0,18.0,8,307.0,130.0,3504,12.0,70,0,1,0,0
1,15.0,8,350.0,165.0,3693,11.5,70,0,1,0,0
2,18.0,8,318.0,150.0,3436,11.0,70,0,1,0,0
3,16.0,8,304.0,150.0,3433,12.0,70,0,1,0,0
4,17.0,8,302.0,140.0,3449,10.5,70,0,1,0,0


In [12]:
x = dt.drop(['mpg'], axis = 1) # independent variable
y = dt[['mpg']] # dependent variable

In [14]:
# scaling the data
x_s = preprocessing.scale(x)
x_s = pd.DataFrame(x_s, columns = x.columns) # converting scaled data into data frames

In [15]:
y_s = preprocessing.scale(y)
y_s = pd.DataFrame(y_s, columns = y.columns) # ideally train, test data should be in columns

In [16]:
x_s

Unnamed: 0,cyl,disp,hp,wt,acc,yr,car_type,origin_america,origin_asia,origin_europe
0,1.498191,1.090604,0.673118,0.630870,-1.295498,-1.627426,-1.062235,0.773559,-0.497643,-0.461968
1,1.498191,1.503514,1.589958,0.854333,-1.477038,-1.627426,-1.062235,0.773559,-0.497643,-0.461968
2,1.498191,1.196232,1.197027,0.550470,-1.658577,-1.627426,-1.062235,0.773559,-0.497643,-0.461968
3,1.498191,1.061796,1.197027,0.546923,-1.295498,-1.627426,-1.062235,0.773559,-0.497643,-0.461968
4,1.498191,1.042591,0.935072,0.565841,-1.840117,-1.627426,-1.062235,0.773559,-0.497643,-0.461968
...,...,...,...,...,...,...,...,...,...,...
393,-0.856321,-0.513026,-0.479482,-0.213324,0.011586,1.621983,0.941412,0.773559,-0.497643,-0.461968
394,-0.856321,-0.925936,-1.370127,-0.993671,3.279296,1.621983,0.941412,-1.292726,-0.497643,2.164651
395,-0.856321,-0.561039,-0.531873,-0.798585,-1.440730,1.621983,0.941412,0.773559,-0.497643,-0.461968
396,-0.856321,-0.705077,-0.662850,-0.408411,1.100822,1.621983,0.941412,0.773559,-0.497643,-0.461968


In [17]:
x_train, x_test, y_train, y_test = train_test_split(x_s, y_s, test_size = 0.20, random_state = 0)
x_train.shape

(318, 10)

In [18]:
# simple linear regression

In [19]:
regression_model = LinearRegression()
regression_model.fit(x_train, y_train)

for idx, col_name in enumerate(x_train.columns):
    print('The coefficient for {} is {}' .format(col_name, regression_model.coef_[0][idx]))

intercept = regression_model.intercept_[0]
print('The intercept is {}' .format(intercept))

The coefficient for cyl is 0.24638776053571634
The coefficient for disp is 0.2917709209866447
The coefficient for hp is -0.18081621820393684
The coefficient for wt is -0.667553060986813
The coefficient for acc is 0.06537309205777046
The coefficient for yr is 0.3481770259426718
The coefficient for car_type is 0.3339231253960359
The coefficient for origin_america is -0.08117984631927032
The coefficient for origin_asia is 0.0698609820966492
The coefficient for origin_europe is 0.030003161242288048
The intercept is -0.01800683137092324


In [20]:
# ridge regression

ridge_model = Ridge(alpha = 0.4)
ridge_model.fit(x_train, y_train)

print('Ridge model coef: {}' .format(ridge_model.coef_))

Ridge model coef: [ 0.24242411  0.28008024 -0.18071842 -0.65711583  0.06353256  0.34721777
  0.32998816 -0.08077573  0.06989674  0.02945199]


In [22]:
# lasso regression

lasso_model = Lasso(alpha = 0.1)
lasso_model.fit(x_train, y_train)

print('Lasso model coef: {}' .format(lasso_model.coef_))

Lasso model coef: [-0.         -0.         -0.07247557 -0.45867691  0.          0.2698134
  0.11341188 -0.04988145  0.          0.        ]


In [23]:
# score comparision.

# simple linear regression
print(regression_model.score(x_train, y_train))
print(regression_model.score(x_test, y_test))

print('*****************************')

# ridge regression
print(regression_model.score(x_train, y_train))
print(regression_model.score(x_test, y_test))

print('******************************')

# Lasso regression
print(lasso_model.score(x_train, y_train))
print(lasso_model.score(x_test, y_test))



0.8373422857977738
0.8474768646673948
*****************************
0.8373422857977738
0.8474768646673948
******************************
0.8007202116330951
0.8283046020148332
