# Setup Gym Environment

```py
my_config["interface"] = MyGranTurismoRTGYM
my_config["time_step_duration"] = 0.05 # when to give up
my_config["start_obs_capture"] = 0.05 # when to capture 
my_config["time_step_timeout_factor"] = 1.0 # how late is OK
my_config["act_buf_len"] = 3 # how many past actions
my_config["reset_act_buf"] = True # resect action buffer on reset
my_config["benchmark"] = True
my_config["benchmark_polyak"] = 0.2
```

This section needs to be setup for any method

In [1]:
debugAsGym = False

from myRTClass import MyGranTurismoRTGYM, DEFAULT_CONFIG_DICT
import gymnasium

my_config = DEFAULT_CONFIG_DICT
my_config["interface"] = MyGranTurismoRTGYM
my_config["time_step_duration"] = 0.1
my_config["start_obs_capture"] = 0.1
my_config["time_step_timeout_factor"] = 1.0
my_config["ep_max_length"] = 312
my_config["act_buf_len"] = 3
my_config["reset_act_buf"] = False
my_config["benchmark"] = True
my_config["benchmark_polyak"] = 0.2

my_config["interface_kwargs"] = {
  'debugFlag': False, # do not use render() while True
  'img_hist_len': 3,
  'modelMode': 3,
  'agent' : 'A3C',
  #  [42, 42, K], [84, 84, K], [10, 10, K], [240, 320, K] and  [480, 640, K]
  'imageWidth' : 42, # there is a default Cov layer for PPO with 240 x 320
  'imageHeight' : 42,
  'trackChoice' : 2, # 1 is High Speed Ring, 2 is 0-400m, 
}

In [2]:
if debugAsGym:
    env = gymnasium.make("real-time-gym-v1", config=my_config)

In [3]:
if debugAsGym:
    obs,info = env.reset()

In [4]:
if debugAsGym:
    obs, info = env.reset()
    env.render()
    print("Obs_actual (shape)")
    [print(o.shape) for o in obs]

In [5]:
if debugAsGym:
    print("Obs_space (shape)")
    [print(o.shape) for o in env.observation_space]

In [6]:
if debugAsGym:
    import pprint
    obs, info = env.reset()
    obs, rew, terminated, truncated, info = env.step(env.action_space.sample())
    obs_space = env.observation_space
    while not (terminated or truncated):
        act = env.action_space.sample()
        obs, rew, terminated, truncated, info = env.step(act)
        env.render()


# Register the environment in a way that RLlib is happy

In [7]:
if not debugAsGym:
    def env_creator(env_config):
        env = gymnasium.make("real-time-gym-v1", config=env_config)
        return env  # return an env instance

    from ray.tune.registry import register_env
    register_env("gt-rtgym-env-v1", lambda config: env_creator(my_config)) # better way

In [8]:
if not debugAsGym:
    from ray.rllib.algorithms.a3c import A3CConfig

    algo = (
        A3CConfig()
        .resources(
            num_gpus=1
            )
        .rollouts(
            num_rollout_workers=1,
            enable_connectors=True,
            batch_mode="complete_episodes",
            )
        .framework("torch")
        .environment(
            env="gt-rtgym-env-v1",
            disable_env_checking=True,
            render_env=False,
            )
        .training(
            #lr=tune.grid_search([0.01, 0.001, 0.0001])
            #lambda_=0.95,
            #gamma=0.99,
            #sgd_minibatch_size=128,
            train_batch_size=512,
            #num_sgd_iter=8,
            #clip_param=0.2,
            model={"fcnet_hiddens": [16, 16]},
        )
        .build()
    )

2023-05-16 00:43:09,409	INFO worker.py:1616 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


[2m[36m(RolloutWorker pid=12500)[0m GT Real Time instantiated
[2m[36m(RolloutWorker pid=12500)[0m GT AI Server instantiated for rtgym
[2m[36m(RolloutWorker pid=12500)[0m simple reward system
[2m[36m(RolloutWorker pid=12500)[0m starting up on localhost port 9999
[2m[36m(RolloutWorker pid=12500)[0m Waiting for a connection
[2m[36m(RolloutWorker pid=12500)[0m Connection from ('127.0.0.1', 61728)
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3


2023-05-16 00:43:25,299	INFO trainable.py:172 -- Trainable.setup took 18.544 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


In [9]:
if not debugAsGym:
    result = algo.train()

[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




In [10]:
algo.save()

'C:\\Users\\nadir/ray_results\\A3C_gt-rtgym-env-v1_2023-05-16_00-43-06lvgo_2s3\\checkpoint_000001'

In [11]:
if not debugAsGym:
    N = 2000

    for n in range(N):
        result = algo.train()
        print("Loop: ", n)
        if n % 20 == 0:
            print("Saved", n)
            algo.save()
    algo.save()

[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  0
Saved 0
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  1
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3
Loop:  2




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3
Loop:  3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  4
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  5
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  6
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  7
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  8




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3
Loop:  9
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3
Loop:  10
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  11
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  12
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  13
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  14
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  15




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  16
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  17
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  18




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  19




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3
Loop:  20
Saved 20
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  21
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3
Loop:  22
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  23
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  24
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  25
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  26




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  27
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  28
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  29
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  30
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  31
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  32




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  33




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3
Loop:  34
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  35
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  36




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3
Loop:  37
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  38
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  39
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  40
Saved 40
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  41
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  42
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  43
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  44
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  45
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  46
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  47
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  48
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  49
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  50
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  51
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  52
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3
Loop:  53
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  54
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3
Loop:  55




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3
Loop:  56
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  57
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  58
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3
Loop:  59
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  60
Saved 60
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  61
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3
[2m[36m(RolloutWorker pid=12500)[0m 




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  62
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  63
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  64




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  65
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  66
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  67
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  68
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3
Loop:  69
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  70
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3
Loop:  71




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  72
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  73
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  74




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  75
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  76
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  77
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  78




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  79
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  80
Saved 80
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  81
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  82
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  83




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  84
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  85
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  86
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  87
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  88
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  89
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  90
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3
Loop:  91
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  92
[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




[2m[36m(RolloutWorker pid=12500)[0m reset triggered
[2m[36m(RolloutWorker pid=12500)[0m reload save for track : 3




Loop:  93


2023-05-16 02:22:52,686	ERROR actor_manager.py:507 -- Ray error, taking actor 1 out of service. [36mray::RolloutWorker.apply()[39m (pid=12500, ip=127.0.0.1, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x000001D603F55D00>)
  File "python\ray\_raylet.pyx", line 877, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 881, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 821, in ray._raylet.execute_task.function_executor
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\_private\function_manager.py", line 670, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\util\tracing\tracing_helper.py", line 460, in _resume_span
    return method(self, *_args, **_kwargs)
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\utils\actor_manager.py", line 185, in apply
    raise e
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib

RayTaskError(ValueError): [36mray::RolloutWorker.apply()[39m (pid=12500, ip=127.0.0.1, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x000001D603F55D00>)
  File "python\ray\_raylet.pyx", line 877, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 881, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 821, in ray._raylet.execute_task.function_executor
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\_private\function_manager.py", line 670, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\util\tracing\tracing_helper.py", line 460, in _resume_span
    return method(self, *_args, **_kwargs)
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\utils\actor_manager.py", line 185, in apply
    raise e
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\utils\actor_manager.py", line 176, in apply
    return func(self, *args, **kwargs)
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\algorithms\a3c\a3c.py", line 204, in sample_and_compute_grads
    samples = worker.sample()
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\util\tracing\tracing_helper.py", line 460, in _resume_span
    return method(self, *_args, **_kwargs)
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 915, in sample
    batches = [self.input_reader.next()]
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\evaluation\sampler.py", line 92, in next
    batches = [self.get_data()]
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\evaluation\sampler.py", line 500, in get_data
    raise rollout
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\threading.py", line 973, in _bootstrap_inner
    self.run()
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\evaluation\sampler.py", line 440, in run
    raise e
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\evaluation\sampler.py", line 437, in run
    self._run()
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\evaluation\sampler.py", line 486, in _run
    item = next(env_runner)
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 323, in run
    outputs = self.step()
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 361, in step
    eval_results = self._do_policy_eval(to_eval=to_eval)
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 1049, in _do_policy_eval
    eval_results[policy_id] = policy.compute_actions_from_input_dict(
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\policy\torch_policy_v2.py", line 522, in compute_actions_from_input_dict
    return self._compute_action_helper(
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\utils\threading.py", line 24, in wrapper
    return func(self, *a, **k)
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\policy\torch_policy_v2.py", line 1153, in _compute_action_helper
    action_dist = dist_class(dist_inputs, self.model)
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\models\torch\torch_action_dist.py", line 250, in __init__
    self.dist = torch.distributions.normal.Normal(mean, torch.exp(log_std))
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\torch\distributions\normal.py", line 54, in __init__
    super(Normal, self).__init__(batch_shape, validate_args=validate_args)
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\torch\distributions\distribution.py", line 55, in __init__
    raise ValueError(
ValueError: Expected parameter loc (Tensor of shape (1, 3)) of distribution Normal(loc: torch.Size([1, 3]), scale: torch.Size([1, 3])) to satisfy the constraint Real(), but found invalid values:
tensor([[nan, nan, nan]])

2023-05-16 02:22:58,084	ERROR worker.py:408 -- Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::RolloutWorker.apply()[39m (pid=12500, ip=127.0.0.1, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x000001D603F55D00>)
  File "python\ray\_raylet.pyx", line 877, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 881, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 821, in ray._raylet.execute_task.function_executor
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\_private\function_manager.py", line 670, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\util\tracing\tracing_helper.py", line 460, in _resume_span
    return method(self, *_args, **_kwargs)
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\utils\actor_manager.py", line 185, in apply
    raise e
  File "c:\Users\nadir\anaconda

In [1]:
debugAsGym = False

from myRTClass import MyGranTurismoRTGYM, DEFAULT_CONFIG_DICT
import gymnasium

my_config = DEFAULT_CONFIG_DICT
my_config["interface"] = MyGranTurismoRTGYM
my_config["time_step_duration"] = 0.1
my_config["start_obs_capture"] = 0.1
my_config["time_step_timeout_factor"] = 1.0
my_config["ep_max_length"] = 312
my_config["act_buf_len"] = 3
my_config["reset_act_buf"] = False
my_config["benchmark"] = True
my_config["benchmark_polyak"] = 0.2

my_config["interface_kwargs"] = {
  'debugFlag': False, # do not use render() while True
  'img_hist_len': 3,
  'modelMode': 3,
  'agent' : 'A3C',
  #  [42, 42, K], [84, 84, K], [10, 10, K], [240, 320, K] and  [480, 640, K]
  'imageWidth' : 42, # there is a default Cov layer for PPO with 240 x 320
  'imageHeight' : 42,
  'trackChoice' : 2, # 1 is High Speed Ring, 2 is 0-400m, 
}

if not debugAsGym:
    def env_creator(env_config):
        env = gymnasium.make("real-time-gym-v1", config=env_config)
        return env  # return an env instance

    from ray.tune.registry import register_env
    register_env("gt-rtgym-env-v1", lambda config: env_creator(my_config)) # better way

from ray.rllib.algorithms.algorithm import Algorithm
algo = Algorithm.from_checkpoint("C:/Users/nadir/ray_results/A3C_gt-rtgym-env-v1_2023-05-16_00-43-06lvgo_2s3/checkpoint_000082")

2023-05-16 07:25:34,916	INFO worker.py:1616 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


[2m[36m(RolloutWorker pid=8384)[0m GT Real Time instantiated
[2m[36m(RolloutWorker pid=8384)[0m GT AI Server instantiated for rtgym
[2m[36m(RolloutWorker pid=8384)[0m simple reward system
[2m[36m(RolloutWorker pid=8384)[0m starting up on localhost port 9999
[2m[36m(RolloutWorker pid=8384)[0m Waiting for a connection
[2m[36m(RolloutWorker pid=8384)[0m Connection from ('127.0.0.1', 50589)
[2m[36m(RolloutWorker pid=8384)[0m reset triggered
[2m[36m(RolloutWorker pid=8384)[0m reload save for track : 3


2023-05-16 07:25:52,179	INFO trainable.py:172 -- Trainable.setup took 19.824 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


In [2]:
algo.train()

[2m[36m(RolloutWorker pid=8384)[0m Exception in thread Thread-3:
[2m[36m(RolloutWorker pid=8384)[0m Traceback (most recent call last):
[2m[36m(RolloutWorker pid=8384)[0m   File "python\ray\_raylet.pyx", line 921, in ray._raylet.execute_task
[2m[36m(RolloutWorker pid=8384)[0m   File "python\ray\_raylet.pyx", line 877, in ray._raylet.execute_task
[2m[36m(RolloutWorker pid=8384)[0m   File "python\ray\_raylet.pyx", line 881, in ray._raylet.execute_task
[2m[36m(RolloutWorker pid=8384)[0m   File "python\ray\_raylet.pyx", line 821, in ray._raylet.execute_task.function_executor
[2m[36m(RolloutWorker pid=8384)[0m   File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\_private\function_manager.py", line 670, in actor_method_executor
[2m[36m(RolloutWorker pid=8384)[0m     return method(__ray_actor, *args, **kwargs)
[2m[36m(RolloutWorker pid=8384)[0m   File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\util\tracing\tracing_helper.py", line 460, in

{'custom_metrics': {},
 'episode_media': {},
 'info': {'learner': {},
  'num_env_steps_sampled': 50544,
  'num_env_steps_trained': 50544,
  'num_agent_steps_sampled': 50544,
  'num_agent_steps_trained': 50544},
 'sampler_results': {'episode_reward_max': nan,
  'episode_reward_min': nan,
  'episode_reward_mean': nan,
  'episode_len_mean': nan,
  'episode_media': {},
  'episodes_this_iter': 0,
  'policy_reward_min': {},
  'policy_reward_max': {},
  'policy_reward_mean': {},
  'custom_metrics': {},
  'hist_stats': {'episode_reward': [], 'episode_lengths': []},
  'sampler_perf': {},
  'num_faulty_episodes': 0,
  'connector_metrics': {}},
 'episode_reward_max': nan,
 'episode_reward_min': nan,
 'episode_reward_mean': nan,
 'episode_len_mean': nan,
 'episodes_this_iter': 0,
 'policy_reward_min': {},
 'policy_reward_max': {},
 'policy_reward_mean': {},
 'hist_stats': {'episode_reward': [], 'episode_lengths': []},
 'sampler_perf': {},
 'num_faulty_episodes': 0,
 'connector_metrics': {},
 'num_