<a href="https://colab.research.google.com/github/DonRoboto/SBT_v3erSem/blob/main/ray_example1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install ray

Collecting ray
  Downloading ray-2.8.1-cp310-cp310-manylinux2014_x86_64.whl (62.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.6/62.6 MB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: ray
Successfully installed ray-2.8.1


In [6]:
!pip install gymnasium

Collecting gymnasium
  Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m953.9/953.9 kB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
Collecting farama-notifications>=0.0.1 (from gymnasium)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Installing collected packages: farama-notifications, gymnasium
Successfully installed farama-notifications-0.0.4 gymnasium-0.29.1


In [10]:
import random

import ray
from ray import train, tune
from ray.tune.schedulers import PopulationBasedTraining


In [11]:

if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--smoke-test", action="store_true", help="Finish quickly for testing"
    )
    args, _ = parser.parse_known_args()

    # Postprocess the perturbed config to ensure it's still valid
    def explore(config):
        # ensure we collect enough timesteps to do sgd
        if config["train_batch_size"] < config["sgd_minibatch_size"] * 2:
            config["train_batch_size"] = config["sgd_minibatch_size"] * 2
        # ensure we run at least one sgd iter
        if config["num_sgd_iter"] < 1:
            config["num_sgd_iter"] = 1
        return config

    hyperparam_mutations = {
        "lambda": lambda: random.uniform(0.9, 1.0),
        "clip_param": lambda: random.uniform(0.01, 0.5),
        "lr": [1e-3, 5e-4, 1e-4, 5e-5, 1e-5],
        "num_sgd_iter": lambda: random.randint(1, 30),
        "sgd_minibatch_size": lambda: random.randint(128, 16384),
        "train_batch_size": lambda: random.randint(2000, 160000),
    }

    pbt = PopulationBasedTraining(
        time_attr="time_total_s",
        perturbation_interval=120,
        resample_probability=0.25,
        # Specifies the mutations of these hyperparams
        hyperparam_mutations=hyperparam_mutations,
        custom_explore_fn=explore,
    )

    # Stop when we've either reached 100 training iterations or reward=300
    stopping_criteria = {"training_iteration": 100, "episode_reward_mean": 300}

    tuner = tune.Tuner(
        "PPO",
        tune_config=tune.TuneConfig(
            metric="episode_reward_mean",
            mode="max",
            scheduler=pbt,
            num_samples=1 if args.smoke_test else 2,
        ),
        param_space={
            #"env": "Humanoid-v2",
            "env": "CartPole-v2",
            "kl_coeff": 1.0,
            "num_workers": 4,
            "num_cpus": 1,  # number of CPUs to use per trial
            "num_gpus": 0,  # number of GPUs to use per trial
            "model": {"free_log_std": True},
            # These params are tuned from a fixed starting value.
            "lambda": 0.95,
            "clip_param": 0.2,
            "lr": 1e-4,
            # These params start off randomly drawn from a set.
            "num_sgd_iter": tune.choice([10, 20, 30]),
            "sgd_minibatch_size": tune.choice([128, 512, 2048]),
            "train_batch_size": tune.choice([10000, 20000, 40000]),
        },
        run_config=train.RunConfig(stop=stopping_criteria),
    )
    results = tuner.fit()

2023-12-07 02:13:45,092	INFO tune.py:595 -- [output] This will use the new output engine with verbosity 1. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+------------------------------------------------------------+
| Configuration for experiment     PPO_2023-12-07_02-13-45   |
+------------------------------------------------------------+
| Search algorithm                 BasicVariantGenerator     |
| Scheduler                        PopulationBasedTraining   |
| Number of trials                 2                         |
+------------------------------------------------------------+

View detailed results here: /root/ray_results/PPO_2023-12-07_02-13-45

Trial status: 2 PENDING
Current time: 2023-12-07 02:13:45. Total running time: 0s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial name                    status       num_sgd_iter     sgd_minibatch_size     train_batch_size |
+-----------------------------------------------------------------------------------------------------+
| PPO_CartPole-v2_3fcda_00000   PENDING           



Trial status: 2 PENDING
Current time: 2023-12-07 02:14:45. Total running time: 1min 0s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial name                    status       num_sgd_iter     sgd_minibatch_size     train_batch_size |
+-----------------------------------------------------------------------------------------------------+
| PPO_CartPole-v2_3fcda_00000   PENDING                20                    128                20000 |
| PPO_CartPole-v2_3fcda_00001   PENDING                10                    512                40000 |
+-----------------------------------------------------------------------------------------------------+
Trial status: 2 PENDING
Current time: 2023-12-07 02:15:15. Total running time: 1min 30s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial nam



Trial status: 2 PENDING
Current time: 2023-12-07 02:15:45. Total running time: 2min 0s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial name                    status       num_sgd_iter     sgd_minibatch_size     train_batch_size |
+-----------------------------------------------------------------------------------------------------+
| PPO_CartPole-v2_3fcda_00000   PENDING                20                    128                20000 |
| PPO_CartPole-v2_3fcda_00001   PENDING                10                    512                40000 |
+-----------------------------------------------------------------------------------------------------+
Trial status: 2 PENDING
Current time: 2023-12-07 02:16:15. Total running time: 2min 30s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial nam



Trial status: 2 PENDING
Current time: 2023-12-07 02:16:45. Total running time: 3min 0s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial name                    status       num_sgd_iter     sgd_minibatch_size     train_batch_size |
+-----------------------------------------------------------------------------------------------------+
| PPO_CartPole-v2_3fcda_00000   PENDING                20                    128                20000 |
| PPO_CartPole-v2_3fcda_00001   PENDING                10                    512                40000 |
+-----------------------------------------------------------------------------------------------------+
Trial status: 2 PENDING
Current time: 2023-12-07 02:17:15. Total running time: 3min 30s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial nam



Trial status: 2 PENDING
Current time: 2023-12-07 02:17:45. Total running time: 4min 0s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial name                    status       num_sgd_iter     sgd_minibatch_size     train_batch_size |
+-----------------------------------------------------------------------------------------------------+
| PPO_CartPole-v2_3fcda_00000   PENDING                20                    128                20000 |
| PPO_CartPole-v2_3fcda_00001   PENDING                10                    512                40000 |
+-----------------------------------------------------------------------------------------------------+
Trial status: 2 PENDING
Current time: 2023-12-07 02:18:15. Total running time: 4min 30s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial nam



Trial status: 2 PENDING
Current time: 2023-12-07 02:18:45. Total running time: 5min 0s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial name                    status       num_sgd_iter     sgd_minibatch_size     train_batch_size |
+-----------------------------------------------------------------------------------------------------+
| PPO_CartPole-v2_3fcda_00000   PENDING                20                    128                20000 |
| PPO_CartPole-v2_3fcda_00001   PENDING                10                    512                40000 |
+-----------------------------------------------------------------------------------------------------+
Trial status: 2 PENDING
Current time: 2023-12-07 02:19:15. Total running time: 5min 30s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial nam



Trial status: 2 PENDING
Current time: 2023-12-07 02:19:45. Total running time: 6min 0s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial name                    status       num_sgd_iter     sgd_minibatch_size     train_batch_size |
+-----------------------------------------------------------------------------------------------------+
| PPO_CartPole-v2_3fcda_00000   PENDING                20                    128                20000 |
| PPO_CartPole-v2_3fcda_00001   PENDING                10                    512                40000 |
+-----------------------------------------------------------------------------------------------------+
Trial status: 2 PENDING
Current time: 2023-12-07 02:20:15. Total running time: 6min 30s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial nam



Trial status: 2 PENDING
Current time: 2023-12-07 02:20:45. Total running time: 7min 0s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial name                    status       num_sgd_iter     sgd_minibatch_size     train_batch_size |
+-----------------------------------------------------------------------------------------------------+
| PPO_CartPole-v2_3fcda_00000   PENDING                20                    128                20000 |
| PPO_CartPole-v2_3fcda_00001   PENDING                10                    512                40000 |
+-----------------------------------------------------------------------------------------------------+
Trial status: 2 PENDING
Current time: 2023-12-07 02:21:15. Total running time: 7min 30s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial nam



Trial status: 2 PENDING
Current time: 2023-12-07 02:21:45. Total running time: 8min 0s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial name                    status       num_sgd_iter     sgd_minibatch_size     train_batch_size |
+-----------------------------------------------------------------------------------------------------+
| PPO_CartPole-v2_3fcda_00000   PENDING                20                    128                20000 |
| PPO_CartPole-v2_3fcda_00001   PENDING                10                    512                40000 |
+-----------------------------------------------------------------------------------------------------+
Trial status: 2 PENDING
Current time: 2023-12-07 02:22:15. Total running time: 8min 30s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial nam



Trial status: 2 PENDING
Current time: 2023-12-07 02:22:45. Total running time: 9min 0s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial name                    status       num_sgd_iter     sgd_minibatch_size     train_batch_size |
+-----------------------------------------------------------------------------------------------------+
| PPO_CartPole-v2_3fcda_00000   PENDING                20                    128                20000 |
| PPO_CartPole-v2_3fcda_00001   PENDING                10                    512                40000 |
+-----------------------------------------------------------------------------------------------------+
Trial status: 2 PENDING
Current time: 2023-12-07 02:23:15. Total running time: 9min 30s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial nam



Trial status: 2 PENDING
Current time: 2023-12-07 02:23:45. Total running time: 10min 0s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial name                    status       num_sgd_iter     sgd_minibatch_size     train_batch_size |
+-----------------------------------------------------------------------------------------------------+
| PPO_CartPole-v2_3fcda_00000   PENDING                20                    128                20000 |
| PPO_CartPole-v2_3fcda_00001   PENDING                10                    512                40000 |
+-----------------------------------------------------------------------------------------------------+
Trial status: 2 PENDING
Current time: 2023-12-07 02:24:15. Total running time: 10min 30s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial n



Trial status: 2 PENDING
Current time: 2023-12-07 02:24:45. Total running time: 11min 0s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial name                    status       num_sgd_iter     sgd_minibatch_size     train_batch_size |
+-----------------------------------------------------------------------------------------------------+
| PPO_CartPole-v2_3fcda_00000   PENDING                20                    128                20000 |
| PPO_CartPole-v2_3fcda_00001   PENDING                10                    512                40000 |
+-----------------------------------------------------------------------------------------------------+
Trial status: 2 PENDING
Current time: 2023-12-07 02:25:15. Total running time: 11min 30s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial n



Trial status: 2 PENDING
Current time: 2023-12-07 02:25:45. Total running time: 12min 0s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial name                    status       num_sgd_iter     sgd_minibatch_size     train_batch_size |
+-----------------------------------------------------------------------------------------------------+
| PPO_CartPole-v2_3fcda_00000   PENDING                20                    128                20000 |
| PPO_CartPole-v2_3fcda_00001   PENDING                10                    512                40000 |
+-----------------------------------------------------------------------------------------------------+
Trial status: 2 PENDING
Current time: 2023-12-07 02:26:15. Total running time: 12min 30s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial n



Trial status: 2 PENDING
Current time: 2023-12-07 02:26:45. Total running time: 13min 0s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial name                    status       num_sgd_iter     sgd_minibatch_size     train_batch_size |
+-----------------------------------------------------------------------------------------------------+
| PPO_CartPole-v2_3fcda_00000   PENDING                20                    128                20000 |
| PPO_CartPole-v2_3fcda_00001   PENDING                10                    512                40000 |
+-----------------------------------------------------------------------------------------------------+
Trial status: 2 PENDING
Current time: 2023-12-07 02:27:15. Total running time: 13min 30s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial n



Trial status: 2 PENDING
Current time: 2023-12-07 02:27:45. Total running time: 14min 0s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial name                    status       num_sgd_iter     sgd_minibatch_size     train_batch_size |
+-----------------------------------------------------------------------------------------------------+
| PPO_CartPole-v2_3fcda_00000   PENDING                20                    128                20000 |
| PPO_CartPole-v2_3fcda_00001   PENDING                10                    512                40000 |
+-----------------------------------------------------------------------------------------------------+
Trial status: 2 PENDING
Current time: 2023-12-07 02:28:15. Total running time: 14min 30s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial n



Trial status: 2 PENDING
Current time: 2023-12-07 02:28:45. Total running time: 15min 0s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial name                    status       num_sgd_iter     sgd_minibatch_size     train_batch_size |
+-----------------------------------------------------------------------------------------------------+
| PPO_CartPole-v2_3fcda_00000   PENDING                20                    128                20000 |
| PPO_CartPole-v2_3fcda_00001   PENDING                10                    512                40000 |
+-----------------------------------------------------------------------------------------------------+
Trial status: 2 PENDING
Current time: 2023-12-07 02:29:15. Total running time: 15min 30s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial n



Trial status: 2 PENDING
Current time: 2023-12-07 02:29:45. Total running time: 16min 0s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial name                    status       num_sgd_iter     sgd_minibatch_size     train_batch_size |
+-----------------------------------------------------------------------------------------------------+
| PPO_CartPole-v2_3fcda_00000   PENDING                20                    128                20000 |
| PPO_CartPole-v2_3fcda_00001   PENDING                10                    512                40000 |
+-----------------------------------------------------------------------------------------------------+
Trial status: 2 PENDING
Current time: 2023-12-07 02:30:15. Total running time: 16min 30s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial n



Trial status: 2 PENDING
Current time: 2023-12-07 02:30:45. Total running time: 17min 0s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial name                    status       num_sgd_iter     sgd_minibatch_size     train_batch_size |
+-----------------------------------------------------------------------------------------------------+
| PPO_CartPole-v2_3fcda_00000   PENDING                20                    128                20000 |
| PPO_CartPole-v2_3fcda_00001   PENDING                10                    512                40000 |
+-----------------------------------------------------------------------------------------------------+
Trial status: 2 PENDING
Current time: 2023-12-07 02:31:15. Total running time: 17min 30s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial n



Trial status: 2 PENDING
Current time: 2023-12-07 02:31:45. Total running time: 18min 0s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial name                    status       num_sgd_iter     sgd_minibatch_size     train_batch_size |
+-----------------------------------------------------------------------------------------------------+
| PPO_CartPole-v2_3fcda_00000   PENDING                20                    128                20000 |
| PPO_CartPole-v2_3fcda_00001   PENDING                10                    512                40000 |
+-----------------------------------------------------------------------------------------------------+
Trial status: 2 PENDING
Current time: 2023-12-07 02:32:15. Total running time: 18min 30s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial n



Trial status: 2 PENDING
Current time: 2023-12-07 02:32:45. Total running time: 19min 0s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial name                    status       num_sgd_iter     sgd_minibatch_size     train_batch_size |
+-----------------------------------------------------------------------------------------------------+
| PPO_CartPole-v2_3fcda_00000   PENDING                20                    128                20000 |
| PPO_CartPole-v2_3fcda_00001   PENDING                10                    512                40000 |
+-----------------------------------------------------------------------------------------------------+
Trial status: 2 PENDING
Current time: 2023-12-07 02:33:16. Total running time: 19min 30s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial n



Trial status: 2 PENDING
Current time: 2023-12-07 02:33:46. Total running time: 20min 0s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial name                    status       num_sgd_iter     sgd_minibatch_size     train_batch_size |
+-----------------------------------------------------------------------------------------------------+
| PPO_CartPole-v2_3fcda_00000   PENDING                20                    128                20000 |
| PPO_CartPole-v2_3fcda_00001   PENDING                10                    512                40000 |
+-----------------------------------------------------------------------------------------------------+
Trial status: 2 PENDING
Current time: 2023-12-07 02:34:16. Total running time: 20min 30s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial n



Trial status: 2 PENDING
Current time: 2023-12-07 02:34:46. Total running time: 21min 0s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial name                    status       num_sgd_iter     sgd_minibatch_size     train_batch_size |
+-----------------------------------------------------------------------------------------------------+
| PPO_CartPole-v2_3fcda_00000   PENDING                20                    128                20000 |
| PPO_CartPole-v2_3fcda_00001   PENDING                10                    512                40000 |
+-----------------------------------------------------------------------------------------------------+
Trial status: 2 PENDING
Current time: 2023-12-07 02:35:16. Total running time: 21min 30s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial n



Trial status: 2 PENDING
Current time: 2023-12-07 02:35:46. Total running time: 22min 0s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial name                    status       num_sgd_iter     sgd_minibatch_size     train_batch_size |
+-----------------------------------------------------------------------------------------------------+
| PPO_CartPole-v2_3fcda_00000   PENDING                20                    128                20000 |
| PPO_CartPole-v2_3fcda_00001   PENDING                10                    512                40000 |
+-----------------------------------------------------------------------------------------------------+
Trial status: 2 PENDING
Current time: 2023-12-07 02:36:16. Total running time: 22min 31s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial n



Trial status: 2 PENDING
Current time: 2023-12-07 02:36:46. Total running time: 23min 1s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial name                    status       num_sgd_iter     sgd_minibatch_size     train_batch_size |
+-----------------------------------------------------------------------------------------------------+
| PPO_CartPole-v2_3fcda_00000   PENDING                20                    128                20000 |
| PPO_CartPole-v2_3fcda_00001   PENDING                10                    512                40000 |
+-----------------------------------------------------------------------------------------------------+
Trial status: 2 PENDING
Current time: 2023-12-07 02:37:16. Total running time: 23min 31s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial n



Trial status: 2 PENDING
Current time: 2023-12-07 02:37:46. Total running time: 24min 1s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial name                    status       num_sgd_iter     sgd_minibatch_size     train_batch_size |
+-----------------------------------------------------------------------------------------------------+
| PPO_CartPole-v2_3fcda_00000   PENDING                20                    128                20000 |
| PPO_CartPole-v2_3fcda_00001   PENDING                10                    512                40000 |
+-----------------------------------------------------------------------------------------------------+
Trial status: 2 PENDING
Current time: 2023-12-07 02:38:16. Total running time: 24min 31s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial n



Trial status: 2 PENDING
Current time: 2023-12-07 02:38:46. Total running time: 25min 1s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial name                    status       num_sgd_iter     sgd_minibatch_size     train_batch_size |
+-----------------------------------------------------------------------------------------------------+
| PPO_CartPole-v2_3fcda_00000   PENDING                20                    128                20000 |
| PPO_CartPole-v2_3fcda_00001   PENDING                10                    512                40000 |
+-----------------------------------------------------------------------------------------------------+
Trial status: 2 PENDING
Current time: 2023-12-07 02:39:16. Total running time: 25min 31s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial n



Trial status: 2 PENDING
Current time: 2023-12-07 02:39:46. Total running time: 26min 1s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial name                    status       num_sgd_iter     sgd_minibatch_size     train_batch_size |
+-----------------------------------------------------------------------------------------------------+
| PPO_CartPole-v2_3fcda_00000   PENDING                20                    128                20000 |
| PPO_CartPole-v2_3fcda_00001   PENDING                10                    512                40000 |
+-----------------------------------------------------------------------------------------------------+
Trial status: 2 PENDING
Current time: 2023-12-07 02:40:16. Total running time: 26min 31s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial n



Trial status: 2 PENDING
Current time: 2023-12-07 02:40:46. Total running time: 27min 1s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial name                    status       num_sgd_iter     sgd_minibatch_size     train_batch_size |
+-----------------------------------------------------------------------------------------------------+
| PPO_CartPole-v2_3fcda_00000   PENDING                20                    128                20000 |
| PPO_CartPole-v2_3fcda_00001   PENDING                10                    512                40000 |
+-----------------------------------------------------------------------------------------------------+
Trial status: 2 PENDING
Current time: 2023-12-07 02:41:16. Total running time: 27min 31s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial n



Trial status: 2 PENDING
Current time: 2023-12-07 02:41:46. Total running time: 28min 1s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial name                    status       num_sgd_iter     sgd_minibatch_size     train_batch_size |
+-----------------------------------------------------------------------------------------------------+
| PPO_CartPole-v2_3fcda_00000   PENDING                20                    128                20000 |
| PPO_CartPole-v2_3fcda_00001   PENDING                10                    512                40000 |
+-----------------------------------------------------------------------------------------------------+
Trial status: 2 PENDING
Current time: 2023-12-07 02:42:16. Total running time: 28min 31s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial n



Trial status: 2 PENDING
Current time: 2023-12-07 02:42:46. Total running time: 29min 1s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial name                    status       num_sgd_iter     sgd_minibatch_size     train_batch_size |
+-----------------------------------------------------------------------------------------------------+
| PPO_CartPole-v2_3fcda_00000   PENDING                20                    128                20000 |
| PPO_CartPole-v2_3fcda_00001   PENDING                10                    512                40000 |
+-----------------------------------------------------------------------------------------------------+


Resume experiment with: Tuner.restore(path="/root/ray_results/PPO_2023-12-07_02-13-45", trainable=...)
- PPO_CartPole-v2_3fcda_00000: FileNotFoundError('Could not fetch metrics for PPO_CartPole-v2_3fcda_00000: both result.json and progress.csv were not found at /root/ray_results/PPO_2023-12-07_02-13-45/PPO_CartPole-v2_3fcda_00000_0_num_sgd_iter=20,sgd_minibatch_size=128,train_batch_size=20000_2023-12-07_02-13-45')
- PPO_CartPole-v2_3fcda_00001: FileNotFoundError('Could not fetch metrics for PPO_CartPole-v2_3fcda_00001: both result.json and progress.csv were not found at /root/ray_results/PPO_2023-12-07_02-13-45/PPO_CartPole-v2_3fcda_00001_1_num_sgd_iter=10,sgd_minibatch_size=512,train_batch_size=40000_2023-12-07_02-13-45')


Trial status: 2 PENDING
Current time: 2023-12-07 02:42:47. Total running time: 29min 2s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+-----------------------------------------------------------------------------------------------------+
| Trial name                    status       num_sgd_iter     sgd_minibatch_size     train_batch_size |
+-----------------------------------------------------------------------------------------------------+
| PPO_CartPole-v2_3fcda_00000   PENDING                20                    128                20000 |
| PPO_CartPole-v2_3fcda_00001   PENDING                10                    512                40000 |
+-----------------------------------------------------------------------------------------------------+



In [12]:
import pprint

best_result = results.get_best_result()

print("Best performing trial's final set of hyperparameters:\n")
pprint.pprint(
    {k: v for k, v in best_result.config.items() if k in hyperparam_mutations}
)

print("\nBest performing trial's final reported metrics:\n")

metrics_to_print = [
    "episode_reward_mean",
    "episode_reward_max",
    "episode_reward_min",
    "episode_len_mean",
]
pprint.pprint({k: v for k, v in best_result.metrics.items() if k in metrics_to_print})



RuntimeError: ignored

In [None]:
from ray.rllib.algorithms.algorithm import Algorithm

loaded_ppo = Algorithm.from_checkpoint(best_result.checkpoint)
loaded_policy = loaded_ppo.get_policy()

# See your trained policy in action
# loaded_policy.compute_single_action(...)