# Hyperparameter tuning
This notebook explores hyperparameter tuning. It uses the boston house price dataset built into Sklearn.

## Import

In [10]:
# Core libraries
import pandas as pd

# Sklearn processing
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV

# Sklearn regression algorithms
from sklearn.neighbors import KNeighborsRegressor

# Sklearn regression model evaluation functions
from sklearn.metrics import r2_score

## Load data, split into X and y and scale data

In [11]:
# Load Boston housing data set
boston = pd.read_csv("boston.csv")

# Define the X (input) and y (target) features
X = boston.drop("MEDV", axis=1)
y = boston["MEDV"]

# Rescale the input features
scaler = MinMaxScaler(feature_range=(0,1))
X = scaler.fit_transform(X)

  return self.partial_fit(X, y)


## Build a model with default hyperparameters

In [12]:
# Create an empty model
model = KNeighborsRegressor()

In [13]:
# Inspect the model's default hyperparameters:
model

KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
          metric_params=None, n_jobs=None, n_neighbors=5, p=2,
          weights='uniform')

In [14]:
# What hyperparameters can we tune?
model.get_params()

{'algorithm': 'auto',
 'leaf_size': 30,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}

## Tune hyperparameters with grid search 

In [15]:
# Select an algorithm
algorithm = KNeighborsRegressor()

# Create 3 folds
seed = 13
kfold = KFold(n_splits=3, shuffle=True, random_state=seed)

# Define our candidate hyperparameters
hp_candidates = [{'n_neighbors': [2,3,4,5,6], 'weights': ['uniform','distance']}]

# Search for best hyperparameters
grid = GridSearchCV(estimator=algorithm, param_grid=hp_candidates, cv=kfold, scoring='r2')
grid.fit(X, y)

# Get the results
print(grid.best_score_)
print(grid.best_estimator_)
print(grid.best_params_)

0.7664050253839596
KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
          metric_params=None, n_jobs=None, n_neighbors=3, p=2,
          weights='distance')
{'n_neighbors': 3, 'weights': 'distance'}


### Get a full breakdown of the grid search

In [16]:
grid.cv_results_



{'mean_fit_time': array([0.00066471, 0.00033243, 0.00067369, 0.00033251, 0.0006636 ,
        0.00064691, 0.00066368, 0.00066535, 0.00066559, 0.00066686]),
 'std_fit_time': array([0.00047003, 0.00047013, 0.00047652, 0.00047025, 0.00046924,
        0.00045744, 0.0004693 , 0.00047047, 0.00047064, 0.00047154]),
 'mean_score_time': array([0.00166202, 0.0016609 , 0.00132004, 0.00199382, 0.00166273,
        0.001683  , 0.00132918, 0.0013295 , 0.00166162, 0.00165995]),
 'std_score_time': array([4.70190333e-04, 4.69714426e-04, 4.77465269e-04, 4.89903609e-07,
        4.71035753e-04, 4.62561415e-04, 4.69516710e-04, 4.70302644e-04,
        4.70415035e-04, 4.71932352e-04]),
 'param_n_neighbors': masked_array(data=[2, 2, 3, 3, 4, 4, 5, 5, 6, 6],
              mask=[False, False, False, False, False, False, False, False,
                    False, False],
        fill_value='?',
             dtype=object),
 'param_weights': masked_array(data=['uniform', 'distance', 'uniform', 'distance',
            

### Prove that best_score_ is the mean of all the k-fold scores
Here's a little check to see how best_score_ is derived from cv_results_

In [17]:
# Get the index of the best hyperparameter combination chosen by GridSearchCv()
grid.best_index_

3

In [18]:
# Get the mean and std of the k-fold scores for the best hyperparameter combination
print(grid.cv_results_['mean_test_score'][grid.best_index_])
print(grid.cv_results_['std_test_score'][grid.best_index_])

0.7664050253839596
0.018060189661311097
