In [1]:
#Load Libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

In [2]:
#Load Dataset
data=pd.read_csv('./mtcars.csv')
data.head()

Unnamed: 0,model,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
1,Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
2,Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
3,Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
4,Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2


In [3]:
#Drop Model Column
data2=data.drop('model',axis=1)
data2.head()

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
1,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
2,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
3,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
4,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2


In [4]:
#Create Standard Model

#Define x and y variable
x = data2.drop('mpg',axis=1).to_numpy()
y = data2['mpg'].to_numpy()

#Create Train and Test Datasets
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=100)

#Model
from sklearn.linear_model import LassoLarsIC

In [5]:
#Construct some pipelines 
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

#Create Pipeline

pipeline =[]

pipe_aic = Pipeline([('scl', StandardScaler()),
                    ('clf', LassoLarsIC(criterion='aic'))])
pipeline.insert(0,pipe_aic)

pipe_bic= Pipeline([('scl', StandardScaler()),
                     ('clf', LassoLarsIC(criterion='bic'))])
pipeline.insert(1,pipe_bic)

# Set grid search params 

modelpara =[]

param_gridaic = {}
modelpara.insert(0,param_gridaic)

param_gridbic = {}
modelpara.insert(1,param_gridbic)

In [6]:
#Define Gridsearch Function

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RepeatedKFold
from sklearn import metrics

def Gridsearch_cv(model, params):
    
    #Cross-validation Function
    cv2=RepeatedKFold(n_splits=10, n_repeats=5, random_state=100)
        
    #GridSearch CV
    gs_clf = GridSearchCV(model, params, cv=cv2,scoring='neg_root_mean_squared_error')
    gs_clf = gs_clf.fit(x_train, y_train)
    model = gs_clf.best_estimator_
    
    # Use best model and test data for final evaluation
    y_pred = model.predict(x_test)

    #Identify Best Parameters to Optimize the Model
    bestpara=str(gs_clf.best_params_)
   
    #Output Heading
    print('\nOptimized Model')
    print('\nModel Name:',str(pipeline.named_steps['clf']))
        
    #Output Validation Statistics
    print('\nBest Parameters:',bestpara)
    
    #Test data accuracy of model with best params    
    print('\nIntercept: {:.2f}'.format(float(gs_clf.best_estimator_.named_steps['clf'].intercept_)))
    coeff_table=pd.DataFrame(np.transpose(gs_clf.best_estimator_.named_steps['clf'].coef_),
                             data2.drop('mpg',axis=1).columns,
                              columns=['Coefficients'])
    print('\n')
    print(coeff_table)
    
    #Print R2
    print('\nR2: {:0.2f}'.format(metrics.r2_score(y_test,y_pred)))
    adjusted_r_squared2 = 1-(1-metrics.r2_score(y_test,y_pred))*(len(y)-1)/(len(y)-x.shape[1]-1)
    print('Adj_R2: {:0.2f}'.format(adjusted_r_squared2))

    #Print MSE and RMSE
    print('Mean Absolute Error: {:.2f}'.format(metrics.mean_absolute_error(y_test, y_pred)))  
    print('Mean Squared Error: {:.2f}'.format(metrics.mean_squared_error(y_test, y_pred)))  
    print('Root Mean Squared Error: {:.2f}'.format(np.sqrt(metrics.mean_squared_error(y_test, y_pred)))) 

In [7]:
#Run Models
for pipeline, modelpara in zip(pipeline,modelpara):
    Gridsearch_cv(pipeline,modelpara)


Optimized Model

Model Name: LassoLarsIC()

Best Parameters: {}

Intercept: 19.87


      Coefficients
cyl       0.000000
disp      0.000000
hp        0.000000
drat      0.512997
wt       -1.719240
qsec      0.566978
vs        0.000000
am        0.738035
gear      1.846619
carb     -2.591402

R2: 0.62
Adj_R2: 0.44
Mean Absolute Error: 2.21
Mean Squared Error: 9.94
Root Mean Squared Error: 3.15

Optimized Model

Model Name: LassoLarsIC(criterion='bic')

Best Parameters: {}

Intercept: 19.87


      Coefficients
cyl       0.000000
disp      0.000000
hp        0.000000
drat      0.512997
wt       -1.719240
qsec      0.566978
vs        0.000000
am        0.738035
gear      1.846619
carb     -2.591402

R2: 0.62
Adj_R2: 0.44
Mean Absolute Error: 2.21
Mean Squared Error: 9.94
Root Mean Squared Error: 3.15
