# Setup Gym Environment

```py
my_config["interface"] = MyGranTurismoRTGYM
my_config["time_step_duration"] = 0.05 # when to give up
my_config["start_obs_capture"] = 0.05 # when to capture 
my_config["time_step_timeout_factor"] = 1.0 # how late is OK
my_config["act_buf_len"] = 3 # how many past actions
my_config["reset_act_buf"] = True # resect action buffer on reset
my_config["benchmark"] = True
my_config["benchmark_polyak"] = 0.2
```

This section needs to be setup for any method

In [1]:
debugAsGym = False
testResult = False

from myRTClass import MyGranTurismoRTGYM, DEFAULT_CONFIG_DICT
import gymnasium

my_config = DEFAULT_CONFIG_DICT
my_config["interface"] = MyGranTurismoRTGYM
my_config["time_step_duration"] = 0.1
my_config["start_obs_capture"] = 0.1
my_config["time_step_timeout_factor"] = 1.0
my_config["ep_max_length"] = 224
my_config["act_buf_len"] = 3
my_config["reset_act_buf"] = False
my_config["benchmark"] = True
my_config["benchmark_polyak"] = 0.2

my_config["interface_kwargs"] = {
  'debugFlag': False, # do not use render() while True
  'img_hist_len': 3,
  'modelMode': 4,
  'agent' : 'A3C',
  #  [42, 42, K], [84, 84, K], [10, 10, K], [240, 320, K] and  [480, 640, K]
  'imageWidth' : 42, # there is a default Cov layer for PPO with 240 x 320
  'imageHeight' : 42,
  'trackChoice' : 3, # 1 is High Speed Ring, 2 is 0-400m in MR2, #3 is 0-400m in Supra, #4 is test ring
}

In [2]:
if debugAsGym:
    env = gymnasium.make("real-time-gym-v1", config=my_config)

In [3]:
if debugAsGym:
    env.reset()

# Register the environment in a way that RLlib is happy

In [4]:
if not debugAsGym and not testResult:
    def env_creator(env_config):
        env = gymnasium.make("real-time-gym-v1", config=env_config)
        return env  # return an env instance

    from ray.tune.registry import register_env
    register_env("gt-rtgym-env-v1", lambda config: env_creator(my_config)) # better way

In [5]:
if not debugAsGym and not testResult:
    import ray
    ray.shutdown()
    ray.init()

2023-05-23 22:37:19,034	INFO worker.py:1616 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


In [6]:
if not debugAsGym and not testResult:
    from ray.rllib.algorithms.sac import SACConfig
    from ray.rllib.algorithms.ppo import PPOConfig
    from ray.rllib.algorithms.a2c import A2CConfig
    from ray.rllib.algorithms.a3c import A3CConfig

    algo = (
        A3CConfig()
        .resources(
            num_gpus_per_learner_worker=1,
            num_cpus_for_local_worker=1,
            num_cpus_per_learner_worker=1,
            num_gpus=1
            )
        .rollouts(
            num_rollout_workers=1,
            enable_connectors=True,
            batch_mode="truncate_episodes",
            #rollout_fragment_length=256
            )
        .framework(
            framework="torch",
            #eager_tracing=True,
            )
        .environment(
            env="gt-rtgym-env-v1",
            disable_env_checking=True,
            render_env=False,
            )
        .training(
            #lr=tune.grid_search([0.01, 0.001, 0.0001])
            #lambda_=0.95,
            #gamma=0.99,
            #sgd_minibatch_size=128,
            train_batch_size=155,
            #num_sgd_iter=8,
            #clip_param=0.2,
            #model={"fcnet_hiddens": [1, 8]},
        )
        .build()
    )

2023-05-23 22:37:27,647	INFO algorithm.py:527 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(RolloutWorker pid=14456)[0m GT Real Time instantiated
[2m[36m(RolloutWorker pid=14456)[0m GT AI Server instantiated for rtgym
[2m[36m(RolloutWorker pid=14456)[0m still simple reward system
[2m[36m(RolloutWorker pid=14456)[0m starting up on localhost port 9999
[2m[36m(RolloutWorker pid=14456)[0m Waiting for a connection
[2m[36m(RolloutWorker pid=14456)[0m Connection from ('127.0.0.1', 59505)
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4


2023-05-23 22:37:39,402	INFO trainable.py:172 -- Trainable.setup took 11.756 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


In [7]:
if not debugAsGym and not testResult:
    N = 1000

    for n in range(N):
        result = algo.train()
        print("Loop: ", n)
        if n % 10 == 0:
            print("Saved", n)
            algo.save()
            
    algo.save()


Loop:  0
Saved 0
Loop:  1
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  2
Loop:  3




Loop:  4
Loop:  5
Loop:  6
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  7
Loop:  8
Loop:  9
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  10
Saved 10
Loop:  11
Loop:  12
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  13
Loop:  14
Loop:  15
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  16
Loop:  17
Loop:  18
Loop:  19
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  20
Saved 20
Loop:  21
Loop:  22
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  23
Loop:  24
Loop:  25
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  26
Loop:  27
Loop:  28
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  29
Loop:  30
Saved 30
Loop:  31
Loop:  32
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  33
Loop:  34
Loop:  35
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  36
Loop:  37
Loop:  38
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  39
Loop:  40
Saved 40
Loop:  41
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  42
Loop:  43
Loop:  44
Loop:  45
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  46
Loop:  47
Loop:  48
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  49
Loop:  50
Saved 50
Loop:  51
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  52
Loop:  53
Loop:  54
Loop:  55
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  56
Loop:  57
Loop:  58
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  59
Loop:  60
Saved 60
Loop:  61
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  62
Loop:  63
Loop:  64
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  65
Loop:  66
Loop:  67
Loop:  68
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  69
Loop:  70
Saved 70
Loop:  71
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  72
Loop:  73
Loop:  74
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  75
Loop:  76
Loop:  77
Loop:  78
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  79
Loop:  80
Saved 80
Loop:  81
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  82
Loop:  83
Loop:  84
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  85
Loop:  86
Loop:  87
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  88
Loop:  89
Loop:  90
Saved 90
Loop:  91
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  92
Loop:  93
Loop:  94
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  95
Loop:  96
Loop:  97
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  98
Loop:  99
Loop:  100
Saved 100
Loop:  101
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  102
Loop:  103
Loop:  104
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  105
Loop:  106
Loop:  107
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  108
Loop:  109
Loop:  110
Saved 110
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  111
Loop:  112
Loop:  113
Loop:  114
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  115
Loop:  116
Loop:  117
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  118
Loop:  119
Loop:  120
Saved 120
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  121
Loop:  122
Loop:  123
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4
Loop:  124




Loop:  125
Loop:  126
Loop:  127
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  128
Loop:  129
Loop:  130
Saved 130
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  131
Loop:  132
Loop:  133
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  134
Loop:  135
Loop:  136
Loop:  137
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  138
Loop:  139
Loop:  140
Saved 140
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  141
Loop:  142
Loop:  143
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  144
Loop:  145
Loop:  146
Loop:  147
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  148
Loop:  149
Loop:  150
Saved 150
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  151
Loop:  152
Loop:  153




[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  154
Loop:  155
Loop:  156
Loop:  157
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  158
Loop:  159
Loop:  160
Saved 160
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  161
Loop:  162
Loop:  163
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  164
Loop:  165
Loop:  166
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4
Loop:  167




Loop:  168
Loop:  169
Loop:  170
Saved 170
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  171
Loop:  172
Loop:  173
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  174
Loop:  175
Loop:  176
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  177
Loop:  178
Loop:  179
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  180
Saved 180
Loop:  181
Loop:  182
Loop:  183
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  184
Loop:  185
Loop:  186
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  187
Loop:  188
Loop:  189
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  190
Saved 190
Loop:  191
Loop:  192
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  193
Loop:  194
Loop:  195
Loop:  196
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  197
Loop:  198
Loop:  199
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  200
Saved 200
Loop:  201
Loop:  202
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  203
Loop:  204
Loop:  205
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4
Loop:  206




Loop:  207
Loop:  208
Loop:  209
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  210
Saved 210
Loop:  211
Loop:  212
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  213
Loop:  214
Loop:  215
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  216
Loop:  217
Loop:  218
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  219
Loop:  220
Saved 220
Loop:  221
Loop:  222
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  223
Loop:  224
Loop:  225
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  226
Loop:  227
Loop:  228
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  229
Loop:  230
Saved 230
Loop:  231
Loop:  232
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  233
Loop:  234
Loop:  235
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  236
Loop:  237
Loop:  238
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  239
Loop:  240
Saved 240




Loop:  241
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4
Loop:  242




Loop:  243
Loop:  244
Loop:  245
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4




Loop:  246
Loop:  247
Loop:  248
[2m[36m(RolloutWorker pid=14456)[0m reset triggered
[2m[36m(RolloutWorker pid=14456)[0m reload save for track : 4


2023-05-23 23:08:17,254	ERROR actor_manager.py:507 -- Ray error, taking actor 1 out of service. [36mray::RolloutWorker.apply()[39m (pid=14456, ip=127.0.0.1, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x00000148FDDD3220>)
  File "python\ray\_raylet.pyx", line 877, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 881, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 821, in ray._raylet.execute_task.function_executor
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\_private\function_manager.py", line 670, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\util\tracing\tracing_helper.py", line 460, in _resume_span
    return method(self, *_args, **_kwargs)
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\utils\actor_manager.py", line 185, in apply
    raise e
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib

Loop:  249


[2m[36m(RolloutWorker pid=14456)[0m Exception in thread Thread-3:
[2m[36m(RolloutWorker pid=14456)[0m Traceback (most recent call last):
[2m[36m(RolloutWorker pid=14456)[0m   File "python\ray\_raylet.pyx", line 921, in ray._raylet.execute_task
[2m[36m(RolloutWorker pid=14456)[0m   File "python\ray\_raylet.pyx", line 877, in ray._raylet.execute_task
[2m[36m(RolloutWorker pid=14456)[0m   File "python\ray\_raylet.pyx", line 881, in ray._raylet.execute_task
[2m[36m(RolloutWorker pid=14456)[0m   File "python\ray\_raylet.pyx", line 821, in ray._raylet.execute_task.function_executor
[2m[36m(RolloutWorker pid=14456)[0m   File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\_private\function_manager.py", line 670, in actor_method_executor
[2m[36m(RolloutWorker pid=14456)[0m     return method(__ray_actor, *args, **kwargs)
[2m[36m(RolloutWorker pid=14456)[0m   File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\util\tracing\tracing_helper.py", lin

RayTaskError(ValueError): [36mray::RolloutWorker.apply()[39m (pid=14456, ip=127.0.0.1, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x00000148FDDD3220>)
  File "python\ray\_raylet.pyx", line 877, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 881, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 821, in ray._raylet.execute_task.function_executor
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\_private\function_manager.py", line 670, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\util\tracing\tracing_helper.py", line 460, in _resume_span
    return method(self, *_args, **_kwargs)
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\utils\actor_manager.py", line 185, in apply
    raise e
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\utils\actor_manager.py", line 176, in apply
    return func(self, *args, **kwargs)
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\algorithms\a3c\a3c.py", line 204, in sample_and_compute_grads
    samples = worker.sample()
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\util\tracing\tracing_helper.py", line 460, in _resume_span
    return method(self, *_args, **_kwargs)
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 915, in sample
    batches = [self.input_reader.next()]
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\evaluation\sampler.py", line 92, in next
    batches = [self.get_data()]
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\evaluation\sampler.py", line 500, in get_data
    raise rollout
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\threading.py", line 973, in _bootstrap_inner
    self.run()
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\evaluation\sampler.py", line 440, in run
    raise e
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\evaluation\sampler.py", line 437, in run
    self._run()
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\evaluation\sampler.py", line 486, in _run
    item = next(env_runner)
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 323, in run
    outputs = self.step()
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 361, in step
    eval_results = self._do_policy_eval(to_eval=to_eval)
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 1049, in _do_policy_eval
    eval_results[policy_id] = policy.compute_actions_from_input_dict(
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\policy\torch_policy_v2.py", line 522, in compute_actions_from_input_dict
    return self._compute_action_helper(
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\utils\threading.py", line 24, in wrapper
    return func(self, *a, **k)
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\policy\torch_policy_v2.py", line 1153, in _compute_action_helper
    action_dist = dist_class(dist_inputs, self.model)
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\models\torch\torch_action_dist.py", line 250, in __init__
    self.dist = torch.distributions.normal.Normal(mean, torch.exp(log_std))
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\torch\distributions\normal.py", line 54, in __init__
    super(Normal, self).__init__(batch_shape, validate_args=validate_args)
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\torch\distributions\distribution.py", line 55, in __init__
    raise ValueError(
ValueError: Expected parameter loc (Tensor of shape (1, 3)) of distribution Normal(loc: torch.Size([1, 3]), scale: torch.Size([1, 3])) to satisfy the constraint Real(), but found invalid values:
tensor([[nan, nan, nan]])

2023-05-23 23:08:22,869	ERROR worker.py:408 -- Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::RolloutWorker.apply()[39m (pid=14456, ip=127.0.0.1, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x00000148FDDD3220>)
  File "python\ray\_raylet.pyx", line 877, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 881, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 821, in ray._raylet.execute_task.function_executor
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\_private\function_manager.py", line 670, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\util\tracing\tracing_helper.py", line 460, in _resume_span
    return method(self, *_args, **_kwargs)
  File "c:\Users\nadir\anaconda3\envs\GTAI2\lib\site-packages\ray\rllib\utils\actor_manager.py", line 185, in apply
    raise e
  File "c:\Users\nadir\anaconda

In [None]:
if not debugAsGym and not testResult:
    N = 1000

    for n in range(N):
        result = algo.train()
        print("Loop: ", n)
        if n % 50 == 0:
            print("Saved", n)
            algo.save()
            
    algo.save()

In [None]:
from myRTClass import MyGranTurismoRTGYM, DEFAULT_CONFIG_DICT
import gymnasium

my_config = DEFAULT_CONFIG_DICT
my_config["interface"] = MyGranTurismoRTGYM
my_config["time_step_duration"] = 0.1
my_config["start_obs_capture"] = 0.1
my_config["time_step_timeout_factor"] = 1.0
#my_config["ep_max_length"] = 224
my_config["act_buf_len"] = 3
my_config["reset_act_buf"] = False
my_config["benchmark"] = True
my_config["benchmark_polyak"] = 0.2

my_config["interface_kwargs"] = {
  'debugFlag': False, # do not use render() while True
  'img_hist_len': 3,
  'modelMode': 4,
  'agent' : 'PPO',
  #  [42, 42, K], [84, 84, K], [10, 10, K], [240, 320, K] and  [480, 640, K]
  'imageWidth' : 42, # there is a default Cov layer for PPO with 240 x 320
  'imageHeight' : 42,
  'trackChoice' : 3, # 1 is High Speed Ring, 2 is 0-400m, 
}

In [None]:
if not debugAsGym and testResult:
    def env_creator(env_config):
        env = gymnasium.make("real-time-gym-v1", config=env_config)
        return env  # return an env instance

    from ray.tune.registry import register_env
    register_env("gt-rtgym-env-v1", lambda config: env_creator(my_config)) 

In [None]:
if not debugAsGym and testResult:
    from ray.rllib.algorithms.algorithm import Algorithm
    algo = Algorithm.from_checkpoint("C:/Users/nadir/ray_results/PPO_gt-rtgym-env-v1_2023-05-19_07-37-37z3d6v2w2/checkpoint_000061")
    #algo = Algorithm.from_checkpoint("C:/Users/nadir/ray_results/PPO_gt-rtgym-env-v1_2023-05-19_07-37-37z3d6v2w2/checkpoint_002000")

In [None]:
if not debugAsGym and testResult:
    result = algo.train()

In [None]:
if not debugAsGym and testResult:

    policy = algo.get_policy()
    #print(policy.model)
    model = policy.model
    print(model)