# Hyperparameter Tuning using Ray Tune API

In [None]:
from sklearn.datasets import load_breast_cancer

from ray import tune
from ray.data import from_pandas
from ray.train import RunConfig, ScalingConfig
from ray.train.xgboost import XGBoostTrainer
from ray.tune.tuner import Tuner

def get_dataset():
    data_raw = load_breast_cancer(as_frame=True)
    dataset_df = data_raw["data"]
    dataset_df["target"] = data_raw["target"]
    dataset = from_pandas(dataset_df)
    return dataset

trainer = XGBoostTrainer(
    label_column="target",
    params={},
    datasets={"train": get_dataset()},
)

param_space = {
    "scaling_config": ScalingConfig(
        num_workers=tune.grid_search([2, 4]),
        resources_per_worker={
            "CPU": tune.grid_search([1, 2]),
        },
    ),
    # You can even grid search various datasets in Tune.
    # "datasets": {
    #     "train": tune.grid_search(
    #         [ds1, ds2]
    #     ),
    # },
    "params": {
        "objective": "binary:logistic",
        "tree_method": "approx",
        "eval_metric": ["logloss", "error"],
        "eta": tune.loguniform(1e-4, 1e-1),
        "subsample": tune.uniform(0.5, 1.0),
        "max_depth": tune.randint(1, 9),
    },
}
tuner = Tuner(trainable=trainer, param_space=param_space,
    run_config=RunConfig(name="my_tune_run"))
results = tuner.fit()


import random
from ray import train, tune
def random_error_trainable(config):
    if random.random() < 0.5:
        return {"loss": 0.0}
    else:
        raise ValueError("This is an error")
tuner = tune.Tuner(
    random_error_trainable,
    run_config=train.RunConfig(name="example-experiment"),
    tune_config=tune.TuneConfig(num_samples=10),
)
try:
    result_grid = tuner.fit()
except ValueError:
    pass
for i in range(len(result_grid)):
    result = result_grid[i]
    if not result.error:
            print(f"Trial finishes successfully with metrics"
               f"{result.metrics}.")
    else:
            print(f"Trial failed with error {result.error}.")

# Get the best result based on a particular metric.
best_result = result_grid.get_best_result( 
    metric="loss", mode="min")
# Get the best checkpoint corresponding to the best result.
best_checkpoint = best_result.checkpoint 
# Get a dataframe for the last reported results of all of the trials
df = result_grid.get_dataframe() 
# Get a dataframe for the minimum loss seen for each trial
df = result_grid.get_dataframe(metric="loss", mode="min") 

# Define an objective fxn to optimise

In [1]:
def objective(x, a, b):
    return a * (x ** 0.5) + b

- With the Function API, you can report intermediate metrics by simply calling ```train.report()``` within the function.
- The ```config``` argument in the function is a dictionary populated automatically by Ray Tune and corresponding to the hyperparameters selected for the trial from the ```search space```.
- Do not use ```train.report()``` within a ```Trainable``` class.


In [2]:
from ray import train, tune


def trainable(config: dict):
    intermediate_score = 0
    for x in range(20):
        intermediate_score = objective(x, config["a"], config["b"])
        train.report({"score": intermediate_score})  # This sends the score to Tune.


tuner = tune.Tuner(trainable, param_space={"a": 2, "b": 4})
results = tuner.fit()

0,1
Current time:,2024-10-20 11:56:13
Running for:,00:00:03.16
Memory:,17.6/503.4 GiB

Trial name,status,loc,iter,total time (s),score
trainable_863e1_00000,TERMINATED,10.56.7.46:350241,20,0.00499034,12.7178


2024-10-20 11:56:13,463	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/home/sur06423/ray_results/trainable_2024-10-20_11-56-01' in 0.0238s.
2024-10-20 11:56:13,470	INFO tune.py:1041 -- Total run time: 7.12 seconds (3.14 seconds for the tuning loop).


# Class API Tutorials

# Class API Checkpointing

In [None]:
import os
import torch
from torch import nn

from ray import train, tune


class MyTrainableClass(tune.Trainable):
    def setup(self, config):
        self.model = nn.Sequential(
            nn.Linear(config.get("input_size", 32), 32), nn.ReLU(), nn.Linear(32, 10)
        )

    def step(self):
        return {}

    def save_checkpoint(self, tmp_checkpoint_dir):
        checkpoint_path = os.path.join(tmp_checkpoint_dir, "model.pth")
        torch.save(self.model.state_dict(), checkpoint_path)
        return tmp_checkpoint_dir

    def load_checkpoint(self, tmp_checkpoint_dir):
        checkpoint_path = os.path.join(tmp_checkpoint_dir, "model.pth")
        self.model.load_state_dict(torch.load(checkpoint_path))


tuner = tune.Tuner(
    MyTrainableClass,
    param_space={"input_size": 64},
    run_config=train.RunConfig(
        stop={"training_iteration": 2},
        checkpoint_config=train.CheckpointConfig(checkpoint_frequency=2),
    ),
)
tuner.fit()

- Ray Tune supports callbacks that are called during various times of the training process. Callbacks can be passed as a parameter to RunConfig, taken in by Tuner, and the sub-method you provide will be invoked automatically.

- This simple callback just prints a metric each time a result is received:

In [None]:
from ray import train, tune
from ray.train import RunConfig
from ray.tune import Callback


class MyCallback(Callback):
    def on_trial_result(self, iteration, trials, trial, result, **info):
        print(f"Got result: {result['metric']}")


def train_fn(config):
    for i in range(10):
        train.report({"metric": i})


tuner = tune.Tuner(
    train_fn,
    run_config=RunConfig(callbacks=[MyCallback()]))
tuner.fit()

# How to report Metrcis in both APIs
- You can log arbitrary values and metrics in both Function and Class training APIs:
- Note that train.report() is not meant to transfer large amounts of data, like models or datasets. Doing so can incur large overheads and slow down your Tune run significantly.

In [None]:
def trainable(config):
    for i in range(num_epochs):
        ...
        train.report({"acc": accuracy, "metric_foo": random_metric_1, "bar": metric_2})

class Trainable(tune.Trainable):
    def step(self):
        ...
        # don't call report here!
        return dict(acc=accuracy, metric_foo=random_metric_1, bar=metric_2)

# Reusing Actors in Tune
- Your Trainable can often take a long time to start. To avoid this, you can do tune.TuneConfig(reuse_actors=True) (which is taken in by Tuner) to reuse the same Trainable Python process and object for multiple hyperparameters.

- This requires you to implement Trainable.reset_config, which provides a new set of hyperparameters. It is up to the user to correctly update the hyperparameters of your trainable.

In [None]:
class PytorchTrainable(tune.Trainable):
    """Train a Pytorch ConvNet."""

    def setup(self, config):
        self.train_loader, self.test_loader = get_data_loaders()
        self.model = ConvNet()
        self.optimizer = optim.SGD(
            self.model.parameters(),
            lr=config.get("lr", 0.01),
            momentum=config.get("momentum", 0.9))

    def reset_config(self, new_config):
        for param_group in self.optimizer.param_groups:
            if "lr" in new_config:
                param_group["lr"] = new_config["lr"]
            if "momentum" in new_config:
                param_group["momentum"] = new_config["momentum"]

        self.model = ConvNet()
        self.config = new_config
        return True

# Tune Search API

In [None]:
config = {
    # Sample a float uniformly between -5.0 and -1.0
    "uniform": tune.uniform(-5, -1),

    # Sample a float uniformly between 3.2 and 5.4,
    # rounding to multiples of 0.2
    "quniform": tune.quniform(3.2, 5.4, 0.2),

    # Sample a float uniformly between 0.0001 and 0.01, while
    # sampling in log space
    "loguniform": tune.loguniform(1e-4, 1e-2),

    # Sample a float uniformly between 0.0001 and 0.1, while
    # sampling in log space and rounding to multiples of 0.00005
    "qloguniform": tune.qloguniform(1e-4, 1e-1, 5e-5),

    # Sample a random float from a normal distribution with
    # mean=10 and sd=2
    "randn": tune.randn(10, 2),

    # Sample a random float from a normal distribution with
    # mean=10 and sd=2, rounding to multiples of 0.2
    "qrandn": tune.qrandn(10, 2, 0.2),

    # Sample a integer uniformly between -9 (inclusive) and 15 (exclusive)
    "randint": tune.randint(-9, 15),

    # Sample a random uniformly between -21 (inclusive) and 12 (inclusive (!))
    # rounding to multiples of 3 (includes 12)
    # if q is 1, then randint is called instead with the upper bound exclusive
    "qrandint": tune.qrandint(-21, 12, 3),

    # Sample a integer uniformly between 1 (inclusive) and 10 (exclusive),
    # while sampling in log space
    "lograndint": tune.lograndint(1, 10),

    # Sample a integer uniformly between 1 (inclusive) and 10 (inclusive (!)),
    # while sampling in log space and rounding to multiples of 2
    # if q is 1, then lograndint is called instead with the upper bound exclusive
    "qlograndint": tune.qlograndint(1, 10, 2),

    # Sample an option uniformly from the specified choices
    "choice": tune.choice(["a", "b", "c"]),

    # Sample from a random function, in this case one that
    # depends on another value from the search space
    "func": tune.sample_from(lambda spec: spec.config.uniform * 0.01),

    # Do a grid search over these values. Every value will be sampled
    # ``num_samples`` times (``num_samples`` is the parameter you pass to ``tune.TuneConfig``,
    # which is taken in by ``Tuner``)
    "grid": tune.grid_search([32, 64, 128])
}

# Tune Search Algorithms (tune.search)

- Tune’s Search Algorithms are wrappers around open-source optimization libraries for efficient hyperparameter selection. Each library has a specific way of defining the search space - please refer to their documentation for more details. Tune will automatically convert search spaces passed to Tuner to the library format in most cases.

- You can utilize these search algorithms as follows:

In [None]:
from ray import train, tune
from ray.train import RunConfig
from ray.tune.search.optuna import OptunaSearch

def train_fn(config):
    # This objective function is just for demonstration purposes
    train.report({"loss": config["param"]})

tuner = tune.Tuner(
    train_fn,
    tune_config=tune.TuneConfig(
        search_alg=OptunaSearch(),
        num_samples=100,
        metric="loss",
        mode="min",
    ),
    param_space={"param": tune.uniform(0, 1)},
)
results = tuner.fit()