In [19]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression, Ridge, RidgeCV, Lasso, LassoCV
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error

car_price = pd.read_csv('CarPrice_Assignment.csv')
car_price.head()

Unnamed: 0,car_ID,symboling,CarName,fueltype,aspiration,doornumber,carbody,drivewheel,enginelocation,wheelbase,...,enginesize,fuelsystem,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg,price
0,1,3,alfa-romero giulia,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,13495.0
1,2,3,alfa-romero stelvio,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,16500.0
2,3,1,alfa-romero Quadrifoglio,gas,std,two,hatchback,rwd,front,94.5,...,152,mpfi,2.68,3.47,9.0,154,5000,19,26,16500.0
3,4,2,audi 100 ls,gas,std,four,sedan,fwd,front,99.8,...,109,mpfi,3.19,3.4,10.0,102,5500,24,30,13950.0
4,5,2,audi 100ls,gas,std,four,sedan,4wd,front,99.4,...,136,mpfi,3.19,3.4,8.0,115,5500,18,22,17450.0


In [20]:
X = car_price[['wheelbase', 'enginesize', 'compressionratio', 'horsepower', 'peakrpm', 'citympg', 'highwaympg']].copy()
Y = car_price['price']

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2)

In [21]:
#Running lasso cv
lasso_cv = LassoCV(alphas = [0.001, 0.01, 0.1, 1, 10, 100], cv = 5).fit(X_train, Y_train)
lasso_cv.alpha_

100.0

In [22]:
#Building lasso
lasso_md = Lasso(alpha = 0.001).fit(X_train, Y_train)
lasso_md.coef_

array([ 160.97205229,  116.97385438,  305.97687759,   45.22970934,
          2.67621876, -101.34707746,   -0.36939983])

In [23]:
#Building linear regression model
lm_md = make_pipeline(StandardScaler(), LinearRegression()).fit(X_train, Y_train)

#Predicting on the test
lm_pred = lm_md.predict(X_test)
lm_pred

array([12333.42718127, 33530.54533076, 12486.71081088, 11825.13724274,
        8639.25502458, -1153.24249426,  6057.59930848,  8233.08417696,
        5360.10541582, 11955.7164958 ,  8836.90022663, 11825.13724274,
        9607.2002059 ,  6663.73093172, 13347.66207669, 13172.30748647,
       10161.8525518 , 17369.71393437, 11355.77492731, 11645.79576514,
       32864.07451581, 17073.50745965, 13172.30748647, 22112.44498063,
       12902.51227765,  6854.05887781, 14271.32661368, 14950.29527575,
        6663.73093172,  6054.53953853, 16837.52639045,  9564.58565515,
       18986.561144  , 11420.1558526 , 14170.24223028, 16418.50996135,
        6897.67863768, 23664.16575085, 18315.44058442, 16946.45267373,
       10342.34418561])

In [28]:
lm_mse = mean_squared_error(Y_test, lm_pred)
print('MSE of linear model is ', lm_mse)

MSE of linear model is  8904869.047604205


In [29]:
#ridge regression
ridge_cv = RidgeCV(alphas = [0.001, 0.01, 0.1, 1, 10, 100], cv = 5).fit(X_train, Y_train)
ridge_cv.alpha_

## Extracting the best lambda
cv_lambda = ridge_cv.alpha_

#Building Ridge
ridge_md = Ridge(alpha = cv_lambda).fit(X_train, Y_train)

#Predicting on the test
ridge_pred = ridge_md.predict(X_test)
ridge_pred

ridge_mse = mean_squared_error(Y_test, ridge_pred)
print('MSE of linear model is ', ridge_mse)

MSE of linear model is  8966429.105122097
