# Setup Gym Environment

```py
my_config["interface"] = MyGranTurismoRTGYM
my_config["time_step_duration"] = 0.05 # when to give up
my_config["start_obs_capture"] = 0.05 # when to capture 
my_config["time_step_timeout_factor"] = 1.0 # how late is OK
my_config["act_buf_len"] = 3 # how many past actions
my_config["reset_act_buf"] = True # resect action buffer on reset
my_config["benchmark"] = True
my_config["benchmark_polyak"] = 0.2
```

This section needs to be setup for any method

In [1]:
import torch

if torch.cuda.is_available():
    print("GPU is available")
else:
    print("GPU is not available")

GPU is available


In [2]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [3]:
debugAsGym = False
testResult = False

from myRTClass import MyGranTurismoRTGYM, DEFAULT_CONFIG_DICT
import gymnasium

my_config = DEFAULT_CONFIG_DICT
my_config["interface"] = MyGranTurismoRTGYM
my_config["time_step_duration"] = 0.1
my_config["start_obs_capture"] = 0.1
my_config["time_step_timeout_factor"] = 1.0
my_config["ep_max_length"] = 224
my_config["act_buf_len"] = 3
my_config["reset_act_buf"] = False
my_config["benchmark"] = True
my_config["benchmark_polyak"] = 0.2

my_config["interface_kwargs"] = {
  'debugFlag': False, # do not use render() while True
  'img_hist_len': 3,
  'modelMode': 2,
  'agent' : 'PPO',
  #  [42, 42, K], [84, 84, K], [10, 10, K], [240, 320, K] and  [480, 640, K]
  'imageWidth' : 42, # there is a default Cov layer for PPO with 240 x 320
  'imageHeight' : 42,
  'trackChoice' : 3, # 1 is High Speed Ring, 2 is 0-400m, 
}

In [4]:
if debugAsGym:
    env = gymnasium.make("real-time-gym-v1", config=my_config)

In [5]:
if debugAsGym:
    env.reset()

# Register the environment in a way that RLlib is happy

In [6]:
if not debugAsGym and not testResult:
    def env_creator(env_config):
        env = gymnasium.make("real-time-gym-v1", config=env_config)
        return env  # return an env instance

    from ray.tune.registry import register_env
    register_env("gt-rtgym-env-v1", lambda config: env_creator(my_config)) # better way

In [7]:
if not debugAsGym and not testResult:
    import ray
    ray.shutdown()
    ray.init()

2023-09-25 23:27:11,663	INFO worker.py:1616 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


In [8]:
if not debugAsGym and not testResult:
    from ray.rllib.algorithms.ppo import PPOConfig

    algo = (
        PPOConfig()
        .resources(
            num_gpus=1
            )
        .rollouts(
            num_rollout_workers=1,
            #enable_connectors=True,
            batch_mode="truncate_episodes",
            )
        .framework(
            framework="tf2",
            )
        .environment(
            env="gt-rtgym-env-v1",
            disable_env_checking=True,
            render_env=False,
            )
        
        .training(
            train_batch_size=155,
        )
        .build()
    )

2023-09-25 23:27:15,244	INFO algorithm_config.py:3307 -- Executing eagerly (framework='tf2'), with eager_tracing=False. For production workloads, make sure to set eager_tracing=True  in order to match the speed of tf-static-graph (framework='tf'). For debugging purposes, `eager_tracing=False` is the best choice.
2023-09-25 23:27:15,289	INFO algorithm.py:527 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=12964)[0m Windows fatal exception: code 0xc0000139
[2m[36m(pid=12964)[0m 
[2m[36m(pid=12964)[0m Windows fatal exception: code 0xc0000139
[2m[36m(pid=12964)[0m 
[2m[36m(pid=12964)[0m Windows fatal exception: code 0xc0000139
[2m[36m(pid=12964)[0m 
[2m[36m(pid=12964)[0m Windows fatal exception: code 0xc0000139
[2m[36m(pid=12964)[0m 
[2m[36m(pid=12964)[0m Windows fatal exception: code 0xc0000139
[2m[36m(pid=12964)[0m 
[2m[36m(pid=12964)[0m Windows fatal exception: code 0xc0000139

[2m[36m(RolloutWorker pid=12964)[0m GT Real Time instantiated
[2m[36m(RolloutWorker pid=12964)[0m GT AI Server instantiated for rtgym
[2m[36m(RolloutWorker pid=12964)[0m still simple reward system
[2m[36m(RolloutWorker pid=12964)[0m starting up on localhost port 9999
[2m[36m(RolloutWorker pid=12964)[0m Waiting for a connection
[2m[36m(RolloutWorker pid=12964)[0m Connection from ('127.0.0.1', 57443)


2023-09-25 23:27:33,544	INFO trainable.py:172 -- Trainable.setup took 18.260 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


In [9]:
if not debugAsGym and not testResult:
    N = 1000

    for n in range(N):
        result = algo.train()
        print("Loop: ", n)
        if n % 10 == 0:
            print("Saved", n)
            algo.save()
            
    algo.save()


[2m[36m(RolloutWorker pid=12964)[0m reset triggered
[2m[36m(RolloutWorker pid=12964)[0m reload save for track : 4


KeyboardInterrupt: 

In [None]:
if not debugAsGym and not testResult:
    N = 1000

    for n in range(N):
        result = algo.train()
        print("Loop: ", n)
        if n % 50 == 0:
            print("Saved", n)
            algo.save()
            
    algo.save()

In [None]:
from myRTClass import MyGranTurismoRTGYM, DEFAULT_CONFIG_DICT
import gymnasium

my_config = DEFAULT_CONFIG_DICT
my_config["interface"] = MyGranTurismoRTGYM
my_config["time_step_duration"] = 0.1
my_config["start_obs_capture"] = 0.1
my_config["time_step_timeout_factor"] = 1.0
#my_config["ep_max_length"] = 224
my_config["act_buf_len"] = 3
my_config["reset_act_buf"] = False
my_config["benchmark"] = True
my_config["benchmark_polyak"] = 0.2

my_config["interface_kwargs"] = {
  'debugFlag': False, # do not use render() while True
  'img_hist_len': 3,
  'modelMode': 4,
  'agent' : 'PPO',
  #  [42, 42, K], [84, 84, K], [10, 10, K], [240, 320, K] and  [480, 640, K]
  'imageWidth' : 42, # there is a default Cov layer for PPO with 240 x 320
  'imageHeight' : 42,
  'trackChoice' : 3, # 1 is High Speed Ring, 2 is 0-400m, 
}

In [None]:
if not debugAsGym and testResult:
    def env_creator(env_config):
        env = gymnasium.make("real-time-gym-v1", config=env_config)
        return env  # return an env instance

    from ray.tune.registry import register_env
    register_env("gt-rtgym-env-v1", lambda config: env_creator(my_config)) 

In [None]:
if not debugAsGym and testResult:
    from ray.rllib.algorithms.algorithm import Algorithm
    algo = Algorithm.from_checkpoint("C:/Users/nadir/ray_results/PPO_gt-rtgym-env-v1_2023-05-19_07-37-37z3d6v2w2/checkpoint_000061")
    #algo = Algorithm.from_checkpoint("C:/Users/nadir/ray_results/PPO_gt-rtgym-env-v1_2023-05-19_07-37-37z3d6v2w2/checkpoint_002000")

In [None]:
if not debugAsGym and testResult:
    result = algo.train()

In [None]:
if not debugAsGym and testResult:

    policy = algo.get_policy()
    #print(policy.model)
    model = policy.model
    print(model)