In [289]:
import numpy as np
import pandas as pd

from sklearn.datasets import load_boston
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LassoCV
from sklearn.linear_model import RidgeCV
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Normalizer

import matplotlib
import matplotlib.pyplot as plt
from matplotlib import ticker

import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

%matplotlib inline

sns.set(style="whitegrid", font_scale=1.3)
matplotlib.rcParams["legend.framealpha"] = 1
matplotlib.rcParams["legend.frameon"] = True

In [290]:
data1=pd.read_csv('/Users/ameyadalvi/Downloads/auto-mpg.csv')
data1

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,car name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
393,27.0,4,140.0,86,2790,15.6,82,1,ford mustang gl
394,44.0,4,97.0,52,2130,24.6,82,2,vw pickup
395,32.0,4,135.0,84,2295,11.6,82,1,dodge rampage
396,28.0,4,120.0,79,2625,18.6,82,1,ford ranger


In [291]:
data = data1.drop(['car name'], axis =1)
data

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin
0,18.0,8,307.0,130,3504,12.0,70,1
1,15.0,8,350.0,165,3693,11.5,70,1
2,18.0,8,318.0,150,3436,11.0,70,1
3,16.0,8,304.0,150,3433,12.0,70,1
4,17.0,8,302.0,140,3449,10.5,70,1
...,...,...,...,...,...,...,...,...
393,27.0,4,140.0,86,2790,15.6,82,1
394,44.0,4,97.0,52,2130,24.6,82,2
395,32.0,4,135.0,84,2295,11.6,82,1
396,28.0,4,120.0,79,2625,18.6,82,1


In [292]:
data = data[data['horsepower'] != '?']

In [293]:
columns = data.columns

## Feature Scaling 

In [294]:
scaled_data = preprocessing.normalize(data, norm='l2')

In [296]:
scaled_data = pd.DataFrame(scaled_data, columns=columns)
scaled_data

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin
0,0.005113,0.002272,0.087201,0.036926,0.995286,0.003409,0.019883,0.000284
1,0.004039,0.002154,0.094240,0.044427,0.994364,0.003096,0.018848,0.000269
2,0.005210,0.002316,0.092048,0.043419,0.994580,0.003184,0.020262,0.000289
3,0.004637,0.002319,0.088104,0.043472,0.994936,0.003478,0.020287,0.000290
4,0.004905,0.002308,0.087137,0.040395,0.995153,0.003030,0.020197,0.000289
...,...,...,...,...,...,...,...,...
387,0.009656,0.001431,0.050068,0.030756,0.997778,0.005579,0.029325,0.000358
388,0.020609,0.001874,0.045433,0.024356,0.997650,0.011522,0.038407,0.000937
389,0.013900,0.001737,0.058639,0.036487,0.996865,0.005039,0.035618,0.000434
390,0.010645,0.001521,0.045620,0.030033,0.997938,0.007071,0.031174,0.000380


In [297]:
X =  scaled_data.loc[:, scaled_data.columns != 'mpg']
y =  scaled_data['mpg']

In [298]:
X

Unnamed: 0,cylinders,displacement,horsepower,weight,acceleration,model year,origin
0,0.002272,0.087201,0.036926,0.995286,0.003409,0.019883,0.000284
1,0.002154,0.094240,0.044427,0.994364,0.003096,0.018848,0.000269
2,0.002316,0.092048,0.043419,0.994580,0.003184,0.020262,0.000289
3,0.002319,0.088104,0.043472,0.994936,0.003478,0.020287,0.000290
4,0.002308,0.087137,0.040395,0.995153,0.003030,0.020197,0.000289
...,...,...,...,...,...,...,...
387,0.001431,0.050068,0.030756,0.997778,0.005579,0.029325,0.000358
388,0.001874,0.045433,0.024356,0.997650,0.011522,0.038407,0.000937
389,0.001737,0.058639,0.036487,0.996865,0.005039,0.035618,0.000434
390,0.001521,0.045620,0.030033,0.997938,0.007071,0.031174,0.000380


In [299]:
y = pd.DataFrame(y)
y

Unnamed: 0,mpg
0,0.005113
1,0.004039
2,0.005210
3,0.004637
4,0.004905
...,...
387,0.009656
388,0.020609
389,0.013900
390,0.010645


## Train-Test split

In [302]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.33, random_state=42)

In [303]:
lin_reg = LinearRegression().fit(X_train, y_train)

In [304]:
lin_reg.score(X_train, y_train)

0.9404605356967636

In [305]:
lin_reg.coef_

array([[ 0.84020765, -0.11601289, -0.1712066 , -1.61009463, -0.27982384,
         0.56987164,  0.84684289]])

In [306]:
pd.DataFrame(lin_reg.coef_.T, X.columns, columns=['coef'])

Unnamed: 0,coef
cylinders,0.840208
displacement,-0.116013
horsepower,-0.171207
weight,-1.610095
acceleration,-0.279824
model year,0.569872
origin,0.846843


In [307]:
y_pred = lin_reg.predict(X_test)
y_pred

array([[0.01267267],
       [0.00866908],
       [0.01969105],
       [0.0134625 ],
       [0.01020144],
       [0.01432713],
       [0.00147298],
       [0.0135562 ],
       [0.00654619],
       [0.0174952 ],
       [0.00301125],
       [0.00839905],
       [0.0032203 ],
       [0.01397359],
       [0.00604188],
       [0.01104012],
       [0.00692792],
       [0.01404067],
       [0.01069111],
       [0.01181983],
       [0.00719722],
       [0.01641581],
       [0.01805211],
       [0.00472091],
       [0.01592244],
       [0.0116356 ],
       [0.00753488],
       [0.0049994 ],
       [0.0159144 ],
       [0.00996987],
       [0.00284691],
       [0.00737859],
       [0.00572101],
       [0.01196291],
       [0.00273408],
       [0.01835147],
       [0.00244072],
       [0.01221107],
       [0.00264903],
       [0.0002851 ],
       [0.00340301],
       [0.01394022],
       [0.01751044],
       [0.01241422],
       [0.00244616],
       [0.00101387],
       [0.00564676],
       [0.015

In [308]:
print("The mean squared error is:",mean_squared_error(y_test,y_pred))

The mean squared error is: 1.874651041530226e-06


## LassoCV

In [309]:
lasso_reg = LassoCV(cv=5, random_state=0).fit(X_train, y_train)

In [310]:
lasso_reg.score(X_train, y_train)

0.9324314168665049

In [311]:
lasso_reg.coef_

array([ 0.        ,  0.00883293, -0.04832203, -0.        ,  0.        ,
        0.64119412,  0.        ])

In [312]:
pd.DataFrame(lasso_reg.coef_.T, X.columns, columns=['coef'])

Unnamed: 0,coef
cylinders,0.0
displacement,0.008833
horsepower,-0.048322
weight,-0.0
acceleration,0.0
model year,0.641194
origin,0.0


In [313]:
lasso_reg.alpha_

1.312731356606791e-07

In [314]:
y_pred = lasso_reg.predict(X_test)
y_pred

array([0.01260688, 0.00894652, 0.01923548, 0.01460651, 0.0104539 ,
       0.01397322, 0.00126352, 0.01322659, 0.00658744, 0.01677195,
       0.00279698, 0.00797766, 0.00326121, 0.0135227 , 0.00586991,
       0.01158622, 0.00703407, 0.01361138, 0.01058325, 0.01189611,
       0.00748508, 0.01654543, 0.0175429 , 0.00442937, 0.01552139,
       0.01177757, 0.00711359, 0.00499998, 0.0159428 , 0.01000692,
       0.00260622, 0.00750925, 0.00567762, 0.01141269, 0.00230255,
       0.01776273, 0.00201619, 0.01294701, 0.00246081, 0.00082352,
       0.00305293, 0.01381361, 0.01783222, 0.01201817, 0.00211815,
       0.00116038, 0.00571896, 0.01523608, 0.0112058 , 0.01645383,
       0.00257695, 0.01133367, 0.01136655, 0.01548413, 0.00997521,
       0.00488342, 0.0066345 , 0.00817867, 0.01000746, 0.01146293,
       0.00072141, 0.00769124, 0.00990396, 0.00785316, 0.01117661,
       0.01328805, 0.00993311, 0.01612416, 0.00660716, 0.00164878,
       0.01062155, 0.00275505, 0.00898809, 0.01293317, 0.00703

In [315]:
print("The mean squared error is:",mean_squared_error(y_test,y_pred))

The mean squared error is: 1.8765639641889238e-06


## RidgeCV

In [316]:
ridge_reg = RidgeCV(cv=5).fit(X_train, y_train)

In [317]:
ridge_reg.score(X_train, y_train)

0.5023618383898814

In [318]:
ridge_reg.coef_

array([[ 0.00077346, -0.08655993, -0.00562391,  0.00290521,  0.0157326 ,
         0.06747499,  0.0030243 ]])

In [319]:
pd.DataFrame(ridge_reg.coef_.T, X.columns, columns=['coef'])

Unnamed: 0,coef
cylinders,0.000773
displacement,-0.08656
horsepower,-0.005624
weight,0.002905
acceleration,0.015733
model year,0.067475
origin,0.003024


In [320]:
ridge_reg.alpha_

0.1

In [321]:
y_pred = ridge_reg.predict(X_test)
y_pred

array([[0.01113881],
       [0.01075597],
       [0.01126822],
       [0.01114685],
       [0.01031742],
       [0.01117199],
       [0.00633148],
       [0.01129137],
       [0.00826623],
       [0.01226452],
       [0.00501152],
       [0.00978435],
       [0.00690382],
       [0.01114981],
       [0.01002412],
       [0.01109258],
       [0.00876357],
       [0.01104512],
       [0.01075609],
       [0.010596  ],
       [0.00828497],
       [0.01146262],
       [0.01148313],
       [0.00639037],
       [0.01175479],
       [0.01107689],
       [0.01084627],
       [0.00839736],
       [0.01102089],
       [0.01047054],
       [0.00695737],
       [0.00808046],
       [0.00771044],
       [0.01084814],
       [0.00598279],
       [0.0115276 ],
       [0.00809357],
       [0.01099173],
       [0.00768982],
       [0.00683377],
       [0.0073492 ],
       [0.0113516 ],
       [0.0108789 ],
       [0.01119205],
       [0.00720271],
       [0.00556526],
       [0.00753945],
       [0.011

In [322]:
print("The mean squared error is:",mean_squared_error(y_test,y_pred))

The mean squared error is: 1.1796890606887199e-05


In [323]:
lin_reg.coef_

array([[ 0.84020765, -0.11601289, -0.1712066 , -1.61009463, -0.27982384,
         0.56987164,  0.84684289]])

In [324]:
lasso_reg.coef_

array([ 0.        ,  0.00883293, -0.04832203, -0.        ,  0.        ,
        0.64119412,  0.        ])

In [325]:
ridge_reg.coef_

array([[ 0.00077346, -0.08655993, -0.00562391,  0.00290521,  0.0157326 ,
         0.06747499,  0.0030243 ]])