This follows the work in 05-HyperparameterTuningWithGridSearch.ipynb

In [2]:
# imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR

from sklearn.model_selection import GridSearchCV

import warnings
warnings.filterwarnings("ignore")

In [3]:
automobile_df = pd.read_csv('datasets/auto-mpg-processed.csv')

automobile_df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,age
0,15.0,8,304.0,150,3892,12.5,48
1,19.2,6,231.0,105,3535,19.2,42
2,26.0,4,96.0,69,2189,18.0,48
3,26.0,4,79.0,67,1963,15.5,46
4,29.0,4,97.0,75,2171,16.0,45


In [4]:
X = automobile_df.drop(['mpg', 'age'], axis=1)

Y = automobile_df['mpg']

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

In [6]:
parameters = {'alpha': [0.2, 0.4, 0.6, 0.7, 0.8, 0.9, 1.0]}

grid_search = GridSearchCV(Lasso(), parameters, cv=3, return_train_score=True) #cv = 3, is 3-fold cross validation
grid_search.fit(x_train, y_train)

grid_search.best_params_

{'alpha': 0.6}

In [7]:
for i in range(len(parameters['alpha'])):
    print('Parameters: ', grid_search.cv_results_['params'][i])

    print('Mean Test Score: ', grid_search.cv_results_['mean_test_score'][i])
    
    print('Rank: ', grid_search.cv_results_['rank_test_score'][i])

Parameters:  {'alpha': 0.2}
Mean Test Score:  0.6962915264227195
Rank:  7
Parameters:  {'alpha': 0.4}
Mean Test Score:  0.697738072061564
Rank:  6
Parameters:  {'alpha': 0.6}
Mean Test Score:  0.6982680926216928
Rank:  1
Parameters:  {'alpha': 0.7}
Mean Test Score:  0.6982635621852792
Rank:  2
Parameters:  {'alpha': 0.8}
Mean Test Score:  0.6982567696074969
Rank:  3
Parameters:  {'alpha': 0.9}
Mean Test Score:  0.6982499713932607
Rank:  4
Parameters:  {'alpha': 1.0}
Mean Test Score:  0.6982434187118418
Rank:  5


In [8]:
lasso_model = Lasso(alpha=grid_search.best_params_['alpha']).fit(x_train, y_train)

In [9]:
y_pred = lasso_model.predict(x_test)

print('Training score: ', lasso_model.score(x_train, y_train))
print('Test score: ', r2_score(y_test, y_pred))

Training score:  0.7061431439826814
Test score:  0.7085342250821367


### Kneighbours Regression

In [10]:
parameters = {'n_neighbors': [10, 12, 14, 18, 20, 25, 30, 35, 50]}

grid_search = GridSearchCV(KNeighborsRegressor(), parameters, cv=3, return_train_score=True)
grid_search.fit(x_train, y_train)

grid_search.best_params_

{'n_neighbors': 25}

In [11]:
for i in range(len(parameters['n_neighbors'])):
    print('Parameters: ', grid_search.cv_results_['params'][i])
    print('Mean Test Score: ', grid_search.cv_results_['mean_test_score'][i])
    print('Rank: ', grid_search.cv_results_['rank_test_score'][i])

Parameters:  {'n_neighbors': 10}
Mean Test Score:  0.6996778488824925
Rank:  6
Parameters:  {'n_neighbors': 12}
Mean Test Score:  0.6970312773239771
Rank:  8
Parameters:  {'n_neighbors': 14}
Mean Test Score:  0.6984292513456142
Rank:  7
Parameters:  {'n_neighbors': 18}
Mean Test Score:  0.6967472468129724
Rank:  9
Parameters:  {'n_neighbors': 20}
Mean Test Score:  0.7023981995231297
Rank:  5
Parameters:  {'n_neighbors': 25}
Mean Test Score:  0.7122019635357604
Rank:  1
Parameters:  {'n_neighbors': 30}
Mean Test Score:  0.7111933829976657
Rank:  2
Parameters:  {'n_neighbors': 35}
Mean Test Score:  0.7066001182402387
Rank:  3
Parameters:  {'n_neighbors': 50}
Mean Test Score:  0.7050644581375969
Rank:  4


In [12]:
kneighbors_model = KNeighborsRegressor(n_neighbors=grid_search.best_params_['n_neighbors']).fit(x_train, y_train)

In [13]:
y_pred = kneighbors_model.predict(x_test)

print('Training score: ', kneighbors_model.score(x_train, y_train))
print('Testing score: ', r2_score(y_test, y_pred))

Training score:  0.7370424455785011
Testing score:  0.6872738971145862


In [14]:
parameters = {'max_depth':[1, 2, 3, 4, 5, 7, 8]}

grid_search = GridSearchCV(DecisionTreeRegressor(), parameters, cv=3, return_train_score=True)
grid_search.fit(x_train, y_train)

grid_search.best_params_

{'max_depth': 3}

In [15]:
decision_tree_model = DecisionTreeRegressor(max_depth=grid_search.best_params_['max_depth']).fit(x_train, y_train)

In [16]:
y_pred = kneighbors_model.predict(x_test)

print('Training score: ', kneighbors_model.score(x_train, y_train))
print('Testing score: ', r2_score(y_test, y_pred))

Training score:  0.7370424455785011
Testing score:  0.6872738971145862


In [17]:
parameters = {'epsilon': [0.05, 0.1, 0.2, 0.3],
              'C': [0.2, 0.3]}

grid_search = GridSearchCV(SVR(kernel='linear'), parameters, cv=3, return_train_score=True)
grid_search.fit(x_train, y_train)

grid_search.best_params_

{'C': 0.3, 'epsilon': 0.05}

In [18]:
svr_model = SVR(kernel='linear',
                epsilon=grid_search.best_params_['epsilon'], 
                C=grid_search.best_params_['C']).fit(x_train, y_train)

In [19]:
y_pred = svr_model.predict(x_test)

print('Training score: ', svr_model.score(x_train, y_train))
print('Testing score: ', r2_score(y_test, y_pred))

Training score:  0.693719423743661
Testing score:  0.6880070421715073
