# Investigating Ray Tune hyperparameter tuning runs

This notebook uses Ray Tune's built-in search visualisation tools to show you how well tuning is doing, which hyperparameters are important/unimportant, etc. I suggest pointing it to your running Ray Tune search & regularly running it to make sure that the search is making progress. You may need to periodically interrupt your search & restart with tighter hyperparameter search ranges if you find that some hyperparameters are consistently terrible.

In [None]:
%matplotlib inline

import glob
import os

import cloudpickle
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import ray
import seaborn as sns

from ray import tune
from skopt.plots import plot_evaluations, plot_objective

sns.set(context='notebook', style='darkgrid')

## Variables that you can configure

In [None]:
# Directory used by the running Ray Tune instance. Should contain a file named
# experiment_state-<date>.json.
# *THIS SHOULD BE THE ONLY THING YOU NEED TO CHANGE*
RUNNING_RAY_TUNE_DIR = 'runs/chain_runs/16/grid_search/'

## Loading internal scikit-optimise experiment state

In [None]:
search_alg_pattern = os.path.join(RUNNING_RAY_TUNE_DIR, 'search-alg-*.pkl')
pickle_paths = glob.glob(search_alg_pattern)
if not pickle_paths:
    raise IOError(
        "Could not find any matches for skopt state pattern, "
        f"{search_alg_pattern!r}. Check whether skopt's .pkl file actually "
        f"exists in RUNNING_RAY_TUNE_DIR={RUNNING_RAY_TUNE_DIR!r}.")
with open(pickle_path, 'rb') as fp:
    _, skopt_alg = cloudpickle.load(fp)
skopt_res = skopt_alg.get_result()

## Generating hyperparameter sensitivity plots

In [None]:
_ = plot_evaluations(skopt_res)

In [None]:
_ = plot_objective(skopt_res, n_samples=40)  # , minimum='expected_minimum_random', n_minimum_search=1000)

In [None]:
n_results = len(skopt_res.func_vals)
sns.distplot(skopt_res.func_vals, rug=True, norm_hist=False, kde=False, bins=10 if n_results >= 20 else None)
plt.title(f"Final loss distribution from {n_results} runs (lower = better)")
plt.xlabel("Final loss")
plt.ylabel("Frequency")
plt.show()

## Listing the best encountered hyperparameter settings, ordered by loss

In [None]:
# we plot any config that has loss below 'thresh'
# (by default, I've made it show the top 10 best configs;
# you can change 'thresh' to anything you want)
thresh = max(sorted(skopt_res.func_vals)[:10])
good_inds, = np.nonzero(skopt_res.func_vals <= thresh)
for conf_num, good_ind in enumerate(good_inds, start=1):
    print(
        f"Good config at index {good_ind} ({conf_num}/"
        f"{len(good_inds)}), thresh {thresh}:")
    # TODO: print function value here too
    all_dims = skopt_res.space.dimensions
    for dim, value in zip(all_dims, skopt_res.x_iters[good_ind]):
        print(f'    {dim.name} = {value}')
        
print(f'Amalgamated "good" configs at thresh {thresh}:')
for dim_idx, dimension in enumerate(skopt_res.space.dimensions):
    values = [skopt_res.x_iters[i][dim_idx] for i in good_inds]
    if isinstance(values[0], float):
        values_str = f"[{', '.join('%.3g' % v for v in values)}]"
    else:
        values_str = str(values)
    print(f'    {dimension.name} = {values_str}')

## Getting skopt to guess which configurations are going to perform best

In [None]:
skopt_minima = []
for i in range(10):
    skopt_min = expected_minimum_random_sampling(
        skopt_res, n_random_starts=1000000)
    skopt_minima.append(skopt_min[0])
print("skopt's guess at best configs (randomly sampled proposals):")
for idx, dim in enumerate(skopt_res.space.dimensions):
    name = dim.name
    values = [m[idx] for m in skopt_minima]
    if isinstance(values[0], float):
        stringified = [f'{v:.3g}' for v in values]
    else:
        stringified = map(str, values)
    min_str = f'  {name} = [{", ".join(stringified)}]'
    print(min_str)