<a href="https://colab.research.google.com/github/22Ifeoma22/22Ifeoma22/blob/main/HyperparameterTuningForRegressionModels.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
import warnings
warnings.filterwarnings("ignore")


In [17]:
automobile_df = pd.read_csv("/content/sample_data/auto-mpg-processed.csv")

automobile_df.head()


Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,age
0,18.0,8,307.0,130,3504,12.0,49
1,15.0,8,350.0,165,3693,11.5,49
2,18.0,8,318.0,150,3436,11.0,49
3,16.0,8,304.0,150,3433,12.0,49
4,17.0,8,302.0,140,3449,10.5,49


In [18]:
X = automobile_df.drop(['mpg', 'age'], axis=1)


In [19]:
Y = automobile_df['mpg']


In [20]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)


In [21]:
parameters = {'alpha': [0.2, 0.4, 0.6, 0.7, 0.8, 0.9, 1.0]}
grid_search = GridSearchCV(Lasso(), parameters, cv=3, return_train_score=True)
grid_search.fit(x_train, y_train)

grid_search.best_params_


{'alpha': 1.0}

In [22]:
for i in range(len(parameters['alpha'])):
       print('Parameters: ', grid_search.cv_results_['params'][i])
       print('Mean Test Score: ', grid_search.cv_results_['mean_test_score'][i])
       print('Rank: ', grid_search.cv_results_['rank_test_score'][i])



Parameters:  {'alpha': 0.2}
Mean Test Score:  0.6992858000026176
Rank:  7
Parameters:  {'alpha': 0.4}
Mean Test Score:  0.7004465806914716
Rank:  6
Parameters:  {'alpha': 0.6}
Mean Test Score:  0.7006330844901631
Rank:  5
Parameters:  {'alpha': 0.7}
Mean Test Score:  0.7006529555417872
Rank:  4
Parameters:  {'alpha': 0.8}
Mean Test Score:  0.7006704372524958
Rank:  3
Parameters:  {'alpha': 0.9}
Mean Test Score:  0.7006847239666558
Rank:  2
Parameters:  {'alpha': 1.0}
Mean Test Score:  0.7006998923021363
Rank:  1


In [23]:
lasso_model = Lasso(alpha=grid_search.best_params_['alpha']).fit(x_train, y_train)

In [24]:
y_pred = lasso_model.predict(x_test)
print('Training score: ', lasso_model.score(x_train, y_train))
print('Test score: ', r2_score(y_test, y_pred))


Training score:  0.7072179817930488
Test score:  0.7000351956785935


In [25]:
parameters = {'n_neighbors': [10, 12, 14, 18, 20, 25, 30, 35, 50]}
grid_search = GridSearchCV(KNeighborsRegressor(), parameters, cv=3, return_train_score=True)
grid_search.fit(x_train, y_train)

grid_search.best_params_


{'n_neighbors': 25}

In [26]:
for i in range(len(parameters['n_neighbors'])):
     print('Parameters: ', grid_search.cv_results_['params'][i])
     print('Mean Test Score: ', grid_search.cv_results_['mean_test_score'][i])
     print('Rank: ', grid_search.cv_results_['rank_test_score'][i])


Parameters:  {'n_neighbors': 10}
Mean Test Score:  0.717265086771795
Rank:  3
Parameters:  {'n_neighbors': 12}
Mean Test Score:  0.7142756658737239
Rank:  7
Parameters:  {'n_neighbors': 14}
Mean Test Score:  0.7142244541693393
Rank:  8
Parameters:  {'n_neighbors': 18}
Mean Test Score:  0.7109877720388628
Rank:  9
Parameters:  {'n_neighbors': 20}
Mean Test Score:  0.7148328680810386
Rank:  5
Parameters:  {'n_neighbors': 25}
Mean Test Score:  0.7218236821627407
Rank:  1
Parameters:  {'n_neighbors': 30}
Mean Test Score:  0.7167496177405526
Rank:  4
Parameters:  {'n_neighbors': 35}
Mean Test Score:  0.7177353289540646
Rank:  2
Parameters:  {'n_neighbors': 50}
Mean Test Score:  0.7144478638211321
Rank:  6


In [27]:
 kneighbors_model = KNeighborsRegressor(n_neighbors=grid_search.best_params_['n_neighbors']).fit(x_train, y_train)

In [28]:
y_pred = kneighbors_model.predict(x_test)
print('Training score: ', kneighbors_model.score(x_train, y_train))
print('Testing score: ', r2_score(y_test, y_pred))


Training score:  0.735040014493121
Testing score:  0.6944821543667241


In [29]:
parameters = {'max_depth':[1, 2, 3, 4, 5, 7, 8]}
grid_search = GridSearchCV(DecisionTreeRegressor(), parameters, cv=3, return_train_score=True)
grid_search.fit(x_train, y_train)
grid_search.best_params_


{'max_depth': 3}

In [30]:
decision_tree_model = DecisionTreeRegressor(max_depth=grid_search.best_params_['max_depth']).fit(x_train, y_train)



In [31]:
y_pred = kneighbors_model.predict(x_test)
print('Training score: ', kneighbors_model.score(x_train, y_train))
print('Testing score: ', r2_score(y_test, y_pred))


Training score:  0.735040014493121
Testing score:  0.6944821543667241


In [32]:
parameters = {'epsilon': [0.05, 0.1, 0.2, 0.3],
              'C': [0.2, 0.3]}
grid_search = GridSearchCV(SVR(kernel='linear'), parameters, cv=3, return_train_score=True)
grid_search.fit(x_train, y_train)

grid_search.best_params_


{'C': 0.3, 'epsilon': 0.05}

In [33]:
svr_model = SVR(kernel='linear',
    epsilon=grid_search.best_params_['epsilon'],
     C=grid_search.best_params_['C']).fit(x_train, y_train)




In [34]:
y_pred = svr_model.predict(x_test)
print('Training score: ', svr_model.score(x_train, y_train))
print('Testing score: ', r2_score(y_test, y_pred))


Training score:  0.7009947486418164
Testing score:  0.6656168130406962
