# Initial AI Agent PPO

In [None]:
# to read later: https://raw.githubusercontent.com/ray-project/ray/master/rllib/examples/unity3d_env_local.py


from myRTClass import MyGranTurismoRTGYM, DEFAULT_CONFIG_DICT
import gymnasium
import ray
from ray.rllib.algorithms import ppo
from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.algorithms.sac import SACConfig
from ray import tune
import json
import numpy as np

ray.shutdown()

my_config = DEFAULT_CONFIG_DICT
my_config["interface"] = MyGranTurismoRTGYM
my_config["time_step_duration"] = 0.05
my_config["start_obs_capture"] = 0.05
my_config["time_step_timeout_factor"] = 1.0
my_config["ep_max_length"] = 1024
my_config["act_buf_len"] = 3
my_config["reset_act_buf"] = False
my_config["benchmark"] = True
my_config["benchmark_polyak"] = 0.2


def env_creator(env_config):
  env = gymnasium.make("real-time-gym-v1", config=my_config)
  return env  # return an env instance

from ray.tune.registry import register_env
register_env("gt-rtgym-env-v1", env_creator)

ray.init()

#agent = ppo.PPO(env="gt-rtgym-env-v1", config=ppoconfig)

algo = (
    PPOConfig()
    #SACConfig()
    .resources(
        num_gpus=1
        )
    .rollouts(
        num_rollout_workers=1,
        #batch_mode="truncate_episodes",
        #rollout_fragment_length=128
        )
    .framework("torch")
    .environment(
        env="gt-rtgym-env-v1",
        disable_env_checking=True,
        render_env=False,
        )
    .training(
            #lr=0.0003,
            #lambda_=0.95,
            #gamma=0.99,
            sgd_minibatch_size=128,
            train_batch_size=1024,
            #num_sgd_iter=8,
            #clip_param=0.2,
            model={"fcnet_hiddens": [12, 12]},
        )
    .build()
)

N = 500
results = []
episode_data = []
episode_json = []


for n in range(N):
    result = algo.train()
    results.append(result)
  
    episode = {
        "n": n,
        "episode_reward_mean": result["episode_reward_mean"], 
        "episode_reward_max":  result["episode_reward_max"],  
        "episode_len_mean":    result["episode_len_mean"],
    }

    episode_data.append(episode)
    episode_json.append(json.dumps(episode))
    
    print('Loop: ', n, 'Max reward: ', {episode["episode_reward_max"]})
    
path_to_checkpoint = algo.save()
print(
    "An Algorithm checkpoint has been created inside directory: "
    f"'{path_to_checkpoint}'."
)

ray.shutdown()

In [None]:
from ray.rllib.algorithms.algorithm import Algorithm
from myRTClass import MyGranTurismoRTGYM, DEFAULT_CONFIG_DICT
import gymnasium
import ray
from ray.rllib.algorithms import ppo
from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.algorithms.sac import SACConfig
import json
import numpy as np

my_config = DEFAULT_CONFIG_DICT
my_config["interface"] = MyGranTurismoRTGYM
my_config["time_step_duration"] = 0.05
my_config["start_obs_capture"] = 0.05
my_config["time_step_timeout_factor"] = 1.0
my_config["ep_max_length"] = 256
my_config["act_buf_len"] = 3
my_config["reset_act_buf"] = False
my_config["benchmark"] = True
my_config["benchmark_polyak"] = 0.2

def env_creator(env_config):
  env = gymnasium.make("real-time-gym-v1", config=my_config)
  return env  # return an env instance

from ray.tune.registry import register_env
register_env("gt-rtgym-env-v1", env_creator)

# Use the Algorithm's `from_checkpoint` utility to get a new algo instance
# that has the exact same state as the old one, from which the checkpoint was
# created in the first place:
loadedAlgo = Algorithm.from_checkpoint(path_to_checkpoint)

In [None]:

N = 300
results = []
episode_data = []
episode_json = []


for n in range(N):
    result = loadedAlgo.train()
    results.append(result)
  
    episode = {
        "n": n,
        "episode_reward_mean": result["episode_reward_mean"], 
        "episode_reward_max":  result["episode_reward_max"],  
        "episode_len_mean":    result["episode_len_mean"],
    }

    episode_data.append(episode)
    episode_json.append(json.dumps(episode))
    
    print('Loop: ', n, 'Max reward: ', {episode["episode_reward_max"]})

In [None]:
path_to_checkpoint = algo.save()
print(
    "An Algorithm checkpoint has been created inside directory: "
    f"'{path_to_checkpoint}'."
)

In [None]:
ray.shutdown()

# SAC Current Developing

In [2]:
# to read later: https://raw.githubusercontent.com/ray-project/ray/master/rllib/examples/unity3d_env_local.py
from myRTClass import MyGranTurismoRTGYM, DEFAULT_CONFIG_DICT
import gymnasium
import ray
from ray import tune, air
from ray.rllib.algorithms import ppo
from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.algorithms.sac import SACConfig
from ray.rllib.algorithms.a3c import A3CConfig
import json
import numpy as np
from time import sleep
ray.shutdown()

my_config = DEFAULT_CONFIG_DICT
my_config["interface"] = MyGranTurismoRTGYM
my_config["time_step_duration"] = 0.1
my_config["start_obs_capture"] = 0.1
my_config["time_step_timeout_factor"] = 1.0
my_config["ep_max_length"] = 128
my_config["act_buf_len"] = 3
my_config["reset_act_buf"] = False
my_config["benchmark"] = True
my_config["benchmark_polyak"] = 0.2

def env_creator(env_config):
  env = gymnasium.make("real-time-gym-v1", config=my_config)
  return env  # return an env instance

from ray.tune.registry import register_env
register_env("gt-rtgym-env-v1", env_creator)
ray.init()

algo = (
    #PPOConfig()
    SACConfig()
    .resources(
        num_gpus=1
        )
    .rollouts(
        num_rollout_workers=1,
        # batch_mode="truncate_episodes",
        # rollout_fragment_length=128,
        )
    .framework("torch")
    .environment(
        env="gt-rtgym-env-v1",
        disable_env_checking=True,
        render_env=False,
        )
    .training(
            #lr=tune.grid_search([0.01, 0.001, 0.0001])
            #lambda_=0.95,
            #gamma=0.99,
            #sgd_minibatch_size=128,
            #train_batch_size=1024,
            #num_sgd_iter=8,
            #clip_param=0.2,
            #model={"fcnet_hiddens": [12, 12]},
        )
    .build()
)

N = 50

for n in range(N):
    result = algo.train()
    if n % 5 == 0:
        print("Saved", n)
        algo.save()
path_to_checkpoint = algo.save()
print(
    "An Algorithm checkpoint has been created inside directory: "
    f"'{path_to_checkpoint}'."
)

ray.shutdown()

2023-05-08 23:34:47,275	INFO worker.py:1544 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


[2m[36m(RolloutWorker pid=10332)[0m GT Real Time instantiated
[2m[36m(RolloutWorker pid=10332)[0m GT AI Server instantiated for rtgym
[2m[36m(RolloutWorker pid=10332)[0m starting up on localhost port 9999
[2m[36m(RolloutWorker pid=10332)[0m Waiting for a connection
[2m[36m(RolloutWorker pid=10332)[0m Connection from ('127.0.0.1', 49279)
[2m[36m(RolloutWorker pid=10332)[0m default SAC action


2023-05-08 23:35:02,844	INFO trainable.py:172 -- Trainable.setup took 13.980 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


[2m[36m(RolloutWorker pid=10332)[0m reset triggered
[2m[36m(RolloutWorker pid=10332)[0m reload save




Saved 0




[2m[36m(RolloutWorker pid=10332)[0m reset triggered
[2m[36m(RolloutWorker pid=10332)[0m reload save




[2m[36m(RolloutWorker pid=10332)[0m reset triggered
[2m[36m(RolloutWorker pid=10332)[0m reload save




[2m[36m(RolloutWorker pid=10332)[0m reset triggered
[2m[36m(RolloutWorker pid=10332)[0m reload save




[2m[36m(RolloutWorker pid=10332)[0m reset triggered
[2m[36m(RolloutWorker pid=10332)[0m reload save




Saved 5
[2m[36m(RolloutWorker pid=10332)[0m reset triggered
[2m[36m(RolloutWorker pid=10332)[0m reload save




[2m[36m(RolloutWorker pid=10332)[0m reset triggered
[2m[36m(RolloutWorker pid=10332)[0m reload save




[2m[36m(RolloutWorker pid=10332)[0m reset triggered
[2m[36m(RolloutWorker pid=10332)[0m reload save




[2m[36m(RolloutWorker pid=10332)[0m reset triggered
[2m[36m(RolloutWorker pid=10332)[0m reload save




Saved 10
[2m[36m(RolloutWorker pid=10332)[0m reset triggered
[2m[36m(RolloutWorker pid=10332)[0m reload save




[2m[36m(RolloutWorker pid=10332)[0m reset triggered
[2m[36m(RolloutWorker pid=10332)[0m reload save




[2m[36m(RolloutWorker pid=10332)[0m reset triggered
[2m[36m(RolloutWorker pid=10332)[0m reload save




[2m[36m(RolloutWorker pid=10332)[0m reset triggered
[2m[36m(RolloutWorker pid=10332)[0m reload save




Saved 15




[2m[36m(RolloutWorker pid=10332)[0m reset triggered
[2m[36m(RolloutWorker pid=10332)[0m reload save




KeyboardInterrupt: 



# A3C Method

In [2]:
# to read later: https://raw.githubusercontent.com/ray-project/ray/master/rllib/examples/unity3d_env_local.py
from myRTClass import MyGranTurismoRTGYM, DEFAULT_CONFIG_DICT
import gymnasium
import ray
from ray import tune, air
from ray.rllib.algorithms import ppo
from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.algorithms.sac import SACConfig
from ray.rllib.algorithms.a3c import A3CConfig
import json
import numpy as np
from time import sleep
ray.shutdown()

my_config = DEFAULT_CONFIG_DICT
my_config["interface"] = MyGranTurismoRTGYM
my_config["time_step_duration"] = 0.05
my_config["start_obs_capture"] = 0.05
my_config["time_step_timeout_factor"] = 1.0
my_config["ep_max_length"] = 512
my_config["act_buf_len"] = 3
my_config["reset_act_buf"] = False
my_config["benchmark"] = True
my_config["benchmark_polyak"] = 0.2

def env_creator(env_config):
  env = gymnasium.make("real-time-gym-v1", config=my_config)
  return env  # return an env instance

from ray.tune.registry import register_env
register_env("gt-rtgym-env-v1", env_creator)
ray.init()

algo = (
    #PPOConfig()
    #SACConfig()
    A3CConfig()
    .resources(
        #num_gpus=1
        )
    .rollouts(
        num_rollout_workers=1,
        # batch_mode="truncate_episodes",
        # rollout_fragment_length=128,
        )
    .framework("torch")
    .environment(
        env="gt-rtgym-env-v1",
        disable_env_checking=True,
        render_env=False,
        )
    .training(
            #lr=tune.grid_search([0.01, 0.001, 0.0001])
            #lambda_=0.95,
            #gamma=0.99,
            #sgd_minibatch_size=128,
            #train_batch_size=1024,
            #num_sgd_iter=8,
            #clip_param=0.2,
            #model={"fcnet_hiddens": [12, 12]},
        )
    .build()
)

N = 2000

for n in range(N):
    result = algo.train()
    if n % 10 == 0:
        print("Saved", n)
        algo.save()
path_to_checkpoint = algo.save()
print(
    "An Algorithm checkpoint has been created inside directory: "
    f"'{path_to_checkpoint}'."
)

ray.shutdown()

2023-05-13 00:13:02,460	INFO worker.py:1544 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


[2m[36m(RolloutWorker pid=3128)[0m GT Real Time instantiated
[2m[36m(RolloutWorker pid=3128)[0m GT AI Server instantiated for rtgym
[2m[36m(RolloutWorker pid=3128)[0m starting up on localhost port 9999
[2m[36m(RolloutWorker pid=3128)[0m Waiting for a connection


2023-05-13 00:13:17,479	INFO trainable.py:172 -- Trainable.setup took 13.503 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


[2m[36m(RolloutWorker pid=3128)[0m Connection from ('127.0.0.1', 51697)
[2m[36m(RolloutWorker pid=3128)[0m default SAC action
[2m[36m(RolloutWorker pid=3128)[0m reset triggered
[2m[36m(RolloutWorker pid=3128)[0m reload save




Saved 0




[2m[36m(RolloutWorker pid=3128)[0m reset triggered
[2m[36m(RolloutWorker pid=3128)[0m reload save




KeyboardInterrupt: 

In [None]:
# to read later: https://raw.githubusercontent.com/ray-project/ray/master/rllib/examples/unity3d_env_local.py
from myRTClass import MyGranTurismoRTGYM, DEFAULT_CONFIG_DICT
import gymnasium
import ray
from ray.rllib.algorithms import ppo
from ray.rllib.algorithms.algorithm import Algorithm
from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.algorithms.sac import SACConfig
from ray.rllib.algorithms.a3c import A3CConfig
import json
import numpy as np
from time import sleep

ray.shutdown()

my_config = DEFAULT_CONFIG_DICT
my_config["interface"] = MyGranTurismoRTGYM
my_config["time_step_duration"] = 0.05
my_config["start_obs_capture"] = 0.05
my_config["time_step_timeout_factor"] = 1.0
#my_config["ep_max_length"] = 512
my_config["act_buf_len"] = 3
my_config["reset_act_buf"] = False
my_config["benchmark"] = True
my_config["benchmark_polyak"] = 0.2


def env_creator(env_config):
  env = gymnasium.make("real-time-gym-v1", config=my_config)
  return env  # return an env instance

from ray.tune.registry import register_env
register_env("gt-rtgym-env-v1", env_creator)

loadedAlgo = Algorithm.from_checkpoint(path_to_checkpoint)

loadedAlgo.train()

In [None]:
import ray
from ray import air, tune
ray.shutdown()
ray.init()


config = PPOConfig().training(lr=tune.grid_search([0.01, 0.001, 0.0001])).environment(env="CartPole-v1")

tuner = tune.Tuner(
    "PPO",
    run_config=air.RunConfig(
        stop={"episode_reward_mean": 150},
    ),
    param_space=config,
)

tuner.fit()

# PPO with checkpoints

In [1]:
# to read later: https://raw.githubusercontent.com/ray-project/ray/master/rllib/examples/unity3d_env_local.py
from myRTClass import MyGranTurismoRTGYM, DEFAULT_CONFIG_DICT
import gymnasium
import ray
from ray import tune, air
from ray.rllib.algorithms import ppo
from ray.rllib.algorithms.ppo import PPOConfig
import json
import numpy as np
from time import sleep
ray.shutdown()

my_config = DEFAULT_CONFIG_DICT
my_config["interface"] = MyGranTurismoRTGYM
my_config["time_step_duration"] = 0.05
my_config["start_obs_capture"] = 0.05
my_config["time_step_timeout_factor"] = 1.0
my_config["ep_max_length"] = 512
my_config["act_buf_len"] = 3
my_config["reset_act_buf"] = False
my_config["benchmark"] = True
my_config["benchmark_polyak"] = 0.2

def env_creator(env_config):
  env = gymnasium.make("real-time-gym-v1", config=env_config)
  return env  # return an env instance

from ray.tune.registry import register_env
register_env("gt-rtgym-env-v1", lambda config: env_creator(my_config)) # better way

ray.init()

algo = (
    PPOConfig()
    .resources(
        #num_gpus=1
        )
    .rollouts(
        num_rollout_workers=1,
        enable_connectors=True,
        # batch_mode="truncate_episodes",
        # rollout_fragment_length=128,
        )
    .framework("torch")
    .environment(
        env="gt-rtgym-env-v1",
        disable_env_checking=True,
        render_env=False,
        )
    .training(
            #lr=tune.grid_search([0.01, 0.001, 0.0001])
            #lambda_=0.95,
            #gamma=0.99,
            #sgd_minibatch_size=128,
            #train_batch_size=1024,
            #num_sgd_iter=8,
            #clip_param=0.2,
            #model={"fcnet_hiddens": [12, 12]},
        )
    .build()
)

N = 500

for n in range(N):
    result = algo.train()
    if n % 1 == 0:
        print("Saved", n)
        algo.save()
path_to_checkpoint = algo.save()
print(
    "An Algorithm checkpoint has been created inside directory: "
    f"'{path_to_checkpoint}'."
)

ray.shutdown()

2023-05-13 23:47:03,333	INFO worker.py:1544 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8266 [39m[22m
2023-05-13 23:47:05,035	INFO algorithm.py:506 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(RolloutWorker pid=20424)[0m GT Real Time instantiated
[2m[36m(RolloutWorker pid=20424)[0m GT AI Server instantiated for rtgym
[2m[36m(RolloutWorker pid=20424)[0m starting up on localhost port 9999
[2m[36m(RolloutWorker pid=20424)[0m Waiting for a connection


2023-05-13 23:47:25,264	INFO trainable.py:172 -- Trainable.setup took 20.230 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


[2m[36m(RolloutWorker pid=20424)[0m Connection from ('127.0.0.1', 54862)
[2m[36m(RolloutWorker pid=20424)[0m default SAC action
[2m[36m(RolloutWorker pid=20424)[0m reset triggered
[2m[36m(RolloutWorker pid=20424)[0m reload save
[2m[36m(RolloutWorker pid=20424)[0m reset triggered
[2m[36m(RolloutWorker pid=20424)[0m reload save




[2m[36m(RolloutWorker pid=20424)[0m reset triggered
[2m[36m(RolloutWorker pid=20424)[0m reload save




[2m[36m(RolloutWorker pid=20424)[0m reset triggered
[2m[36m(RolloutWorker pid=20424)[0m reload save




KeyboardInterrupt: 

# Continue Training

In [1]:
from myRTClass import MyGranTurismoRTGYM, DEFAULT_CONFIG_DICT
import gymnasium
import ray
from ray import tune, air
from ray.rllib.algorithms import ppo
from ray.rllib.algorithms.ppo import PPOConfig
import json
import numpy as np
from time import sleep
ray.shutdown()

my_config = DEFAULT_CONFIG_DICT
my_config["interface"] = MyGranTurismoRTGYM
my_config["time_step_duration"] = 0.05
my_config["start_obs_capture"] = 0.05
my_config["time_step_timeout_factor"] = 1.0
my_config["ep_max_length"] = 512
my_config["act_buf_len"] = 3
my_config["reset_act_buf"] = False
my_config["benchmark"] = True
my_config["benchmark_polyak"] = 0.2

def env_creator(env_config):
  env = gymnasium.make("real-time-gym-v1", config=env_config)
  return env  # return an env instance

from ray.tune.registry import register_env
register_env("gt-rtgym-env-v1", lambda config: env_creator(my_config)) # better way

ray.init()

from ray.rllib.algorithms.algorithm import Algorithm
algo = Algorithm.from_checkpoint("C:/Users/nadir/ray_results/PPO_gt-rtgym-env-v1_2023-05-13_00-31-13_kmaa6ii/checkpoint_000161")

2023-05-13 23:51:00,859	INFO worker.py:1544 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8266 [39m[22m
2023-05-13 23:51:02,454	INFO algorithm.py:506 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(RolloutWorker pid=23896)[0m GT Real Time instantiated
[2m[36m(RolloutWorker pid=23896)[0m GT AI Server instantiated for rtgym
[2m[36m(RolloutWorker pid=23896)[0m starting up on localhost port 9999
[2m[36m(RolloutWorker pid=23896)[0m Waiting for a connection


2023-05-13 23:51:13,722	INFO trainable.py:172 -- Trainable.setup took 11.269 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


[2m[36m(RolloutWorker pid=23896)[0m Connection from ('127.0.0.1', 55182)
[2m[36m(RolloutWorker pid=23896)[0m default SAC action


In [2]:
N = 50

for n in range(N):
    result = algo.train()
    if n % 5 == 0:
        print("Saved", n)
        algo.save()
path_to_checkpoint = algo.save()
print(
    "An Algorithm checkpoint has been created inside directory: "
    f"'{path_to_checkpoint}'."
)

[2m[36m(RolloutWorker pid=23896)[0m reset triggered
[2m[36m(RolloutWorker pid=23896)[0m reload save




[2m[36m(RolloutWorker pid=23896)[0m reset triggered
[2m[36m(RolloutWorker pid=23896)[0m reload save




[2m[36m(RolloutWorker pid=23896)[0m reset triggered
[2m[36m(RolloutWorker pid=23896)[0m reload save
[2m[36m(RolloutWorker pid=23896)[0m reset triggered
[2m[36m(RolloutWorker pid=23896)[0m reload save




[2m[36m(RolloutWorker pid=23896)[0m reset triggered
[2m[36m(RolloutWorker pid=23896)[0m reload save
[2m[36m(RolloutWorker pid=23896)[0m reset triggered
[2m[36m(RolloutWorker pid=23896)[0m reload save




[2m[36m(RolloutWorker pid=23896)[0m reset triggered
[2m[36m(RolloutWorker pid=23896)[0m reload save




Test Agent

In [2]:
from ray.rllib.algorithms.algorithm import Algorithm
import ray
from myRTClass import MyGranTurismoRTGYM, DEFAULT_CONFIG_DICT
import gymnasium

ray.shutdown()

my_config = DEFAULT_CONFIG_DICT
my_config["interface"] = MyGranTurismoRTGYM
my_config["time_step_duration"] = 0.05
my_config["start_obs_capture"] = 0.05
my_config["time_step_timeout_factor"] = 1.0
my_config["act_buf_len"] = 3
my_config["reset_act_buf"] = False
my_config["benchmark"] = True
my_config["benchmark_polyak"] = 0.2

def env_creator(env_config):
  env = gymnasium.make("real-time-gym-v1", config=env_config)
  return env  # return an env instance

from ray.tune.registry import register_env
register_env("gt-rtgym-env-v1", lambda config: env_creator(my_config)) # better way
#register_env("gt-rtgym-env-v1", env_creator)



ray.init()

# env = gymnasium.make("real-time-gym-v1")
algo = Algorithm.from_checkpoint("C:/Users/nadir/ray_results/PPO_gt-rtgym-env-v1_2023-05-13_00-31-13_kmaa6ii/checkpoint_000161")


2023-05-13 23:42:33,654	INFO worker.py:1544 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8266 [39m[22m
2023-05-13 23:42:35,210	INFO algorithm.py:506 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(RolloutWorker pid=20848)[0m GT Real Time instantiated
[2m[36m(RolloutWorker pid=20848)[0m GT AI Server instantiated for rtgym
[2m[36m(RolloutWorker pid=20848)[0m starting up on localhost port 9999
[2m[36m(RolloutWorker pid=20848)[0m Waiting for a connection




[2m[36m(RolloutWorker pid=20848)[0m Connection from ('127.0.0.1', 54273)
[2m[36m(RolloutWorker pid=20848)[0m default SAC action


In [3]:
algo.stop()

In [4]:
policy = algo.get_policy()
model = policy.model

In [5]:
new_config = {
    # Indicate that the Algorithm we setup here doesn't need an actual env.
    "env": None,
    "observation_space": model.obs_space,
    "action_space": model.action_space,
    # ...
}

In [6]:
algo.config = new_config

In [1]:
env = gymnasium.make("real-time-gym-v1", config=my_config)

NameError: name 'gymnasium' is not defined

In [None]:
episode_reward = 0
terminated = truncated = False
obs, info = env.reset()

while not terminated and not truncated:
    action = algo.compute_single_action(obs)
    obs, reward, terminated, truncated, info = env.step(action)
    episode_reward += reward

In [7]:
import gymnasium.spaces as spaces
import numpy as np

def get_observation_space():
    eClutch = spaces.Box(low=0, high=3, shape=(1,), dtype='float32')
    eSpeed = spaces.Box(low=0, high=10000, shape=(1,), dtype='float32')
    eGear =  spaces.Box(low=0, high=6, shape=(1,), dtype='float32')
    vSpeed = spaces.Box(low=0, high=500, shape=(1,), dtype='float32')
    rState = spaces.Box(low=0, high=5, shape=(1,), dtype='float32')
    vDir = spaces.Box(low=0, high=3, shape=(1,), dtype='float32')
    return spaces.Tuple((rState, eClutch, eSpeed, eGear, vSpeed, vDir))
    
def get_action_space():
    #return spaces.Box(low=-1.0, high=1.0, shape=(3,))
    return spaces.Box(low=np.array([0.0, 0.0, -1.0]), high=np.array([1.0, 1.0, 1.0]), dtype='float64')

In [8]:
new_config2 = {
    # Indicate that the Algorithm we setup here doesn't need an actual env.
    "env": None,
    "observation_space": get_observation_space(),
    "action_space": get_action_space(),
    # ...
}

In [9]:
env = gymnasium.make("real-time-gym-v1", config=my_config)

GT Real Time instantiated
GT AI Server instantiated for rtgym
starting up on localhost port 9999
Waiting for a connection
Connection from ('127.0.0.1', 54304)
default SAC action


In [12]:
algo.config = new_config

In [13]:
episode_reward = 0
terminated = truncated = False
obs, info = env.reset()

while not terminated and not truncated:
    action = algo.compute_single_action(obs)
    obs, reward, terminated, truncated, info = env.step(action)
    episode_reward += reward

reset triggered
reload save


AttributeError: 'dict' object has no attribute 'normalize_actions'



Try using savepoints

In [22]:
from ray.rllib.algorithms.algorithm import Algorithm
agent = Algorithm.from_checkpoint("C:/Users/nadir/ray_results/PPO_gt-rtgym-env-v1_2023-05-13_00-31-13_kmaa6ii/checkpoint_000161")

2023-05-13 23:15:03,727	INFO algorithm.py:506 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
2023-05-13 23:15:06,293	INFO worker.py:1544 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8266 [39m[22m
2023-05-13 23:15:13,439	ERROR actor_manager.py:496 -- Ray error, taking actor 1 out of service. The actor died because of an error raised in its creation task, [36mray::RolloutWorker.__init__()[39m (pid=21412, ip=127.0.0.1, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x00000254018892E0>)
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\gymnasium\envs\registration.py", line 569, in make
    _check_version_exists(ns, name, version)
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\gymnasium\envs\registration.py", line 219, in _check_version_exists
    _check_name_exists(ns, name)
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\gymnasium

EnvError: The env string you provided ('gt-rtgym-env-v1') is:
a) Not a supported/installed environment.
b) Not a tune-registered environment creator.
c) Not a valid env class string.

Try one of the following:
a) For Atari support: `pip install gym[atari] autorom[accept-rom-license]`.
   For VizDoom support: Install VizDoom
   (https://github.com/mwydmuch/ViZDoom/blob/master/doc/Building.md) and
   `pip install vizdoomgym`.
   For PyBullet support: `pip install pybullet`.
b) To register your custom env, do `from ray import tune;
   tune.register('[name]', lambda cfg: [return env obj from here using cfg])`.
   Then in your config, do `config['env'] = [name]`.
c) Make sure you provide a fully qualified classpath, e.g.:
   `ray.rllib.examples.env.repeat_after_me_env.RepeatAfterMeEnv`


In [19]:
from ray.rllib.policy.policy import Policy

# env = gymnasium.make("real-time-gym-v1")
my_restored_policy = Policy.from_checkpoint("C:/Users/nadir/ray_results/PPO_gt-rtgym-env-v1_2023-05-13_00-31-13_kmaa6ii/checkpoint_000161")

print("next")



2023-05-13 23:10:41,887	INFO policy.py:1214 -- Policy (worker=local) running on CPU.
2023-05-13 23:10:41,887	INFO torch_policy_v2.py:110 -- Found 1 visible cuda devices.


next


In [3]:
ray.init()

2023-05-13 22:59:01,352	INFO worker.py:1544 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


0,1
Python version:,3.9.9
Ray version:,2.3.0
Dashboard:,http://127.0.0.1:8265


In [4]:
env = gymnasium.make("real-time-gym-v1", config=my_config)

GT Real Time instantiated
GT AI Server instantiated for rtgym
starting up on localhost port 9999
Waiting for a connection
Connection from ('127.0.0.1', 51390)
default SAC action


In [20]:
episode_reward = 0
terminated = truncated = False
obs, info = env.reset()

reset triggered
reload save




In [21]:
while not terminated and not truncated:
    action = my_restored_policy.compute_single_action(obs)
    obs, reward, terminated, truncated, info = env.step(action)
    episode_reward += reward

AttributeError: 'dict' object has no attribute 'compute_single_action'