# Setup Gym Environment

```py
my_config["interface"] = MyGranTurismoRTGYM
my_config["time_step_duration"] = 0.05 # when to give up
my_config["start_obs_capture"] = 0.05 # when to capture 
my_config["time_step_timeout_factor"] = 1.0 # how late is OK
my_config["act_buf_len"] = 3 # how many past actions
my_config["reset_act_buf"] = True # resect action buffer on reset
my_config["benchmark"] = True
my_config["benchmark_polyak"] = 0.2
```

This section needs to be setup for any method

In [1]:
debugAsGym = False

from myRTClass import MyGranTurismoRTGYM, DEFAULT_CONFIG_DICT
import gymnasium

my_config = DEFAULT_CONFIG_DICT
my_config["interface"] = MyGranTurismoRTGYM
my_config["time_step_duration"] = 0.05
my_config["start_obs_capture"] = 0.05
my_config["time_step_timeout_factor"] = 1.0
my_config["ep_max_length"] = 512
my_config["act_buf_len"] = 3
my_config["reset_act_buf"] = False
my_config["benchmark"] = True
my_config["benchmark_polyak"] = 0.2

my_config["interface_kwargs"] = {
  'debugFlag': False, # do not use render() while True
  'img_hist_len': 3,
  'modelMode': 3,
  'agent' : 'PPO',
  'imageWidth' : 320, # there is a default Cov layer for PPO with 240 x 320
  'imageHeight' : 240,
  'trackChoice' : 1, # 1 is High Speed Ring, # 2 is 0-400m
}

In [2]:
if debugAsGym:
    env = gymnasium.make("real-time-gym-v1", config=my_config)

In [3]:
if debugAsGym:
    obs,info = env.reset()

In [4]:
if debugAsGym:
    obs, info = env.reset()
    env.render()
    print("Obs_actual (shape)")
    [print(o.shape) for o in obs]

In [5]:
if debugAsGym:
    print("Obs_space (shape)")
    [print(o.shape) for o in env.observation_space]

In [6]:
if debugAsGym:
    import pprint
    obs, info = env.reset()
    obs, rew, terminated, truncated, info = env.step(env.action_space.sample())
    obs_space = env.observation_space
    while not (terminated or truncated):
        act = env.action_space.sample()
        obs, rew, terminated, truncated, info = env.step(act)
        env.render()


# Register the environment in a way that RLlib is happy

In [7]:
if not debugAsGym:
    def env_creator(env_config):
        env = gymnasium.make("real-time-gym-v1", config=env_config)
        return env  # return an env instance

    from ray.tune.registry import register_env
    register_env("gt-rtgym-env-v1", lambda config: env_creator(my_config)) # better way

In [8]:
if not debugAsGym:
    import ray
    ray.shutdown()
    ray.init()

2023-05-14 21:54:53,880	INFO worker.py:1544 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


In [9]:
if not debugAsGym:
    from ray.rllib.algorithms.ppo import PPOConfig

    algo = (
        PPOConfig()
        .resources(
            num_gpus=1
            )
        .rollouts(
            num_rollout_workers=1,
            enable_connectors=True,
            )
        .framework("torch")
        .environment(
            env="gt-rtgym-env-v1",
            disable_env_checking=True,
            render_env=False,
            )
        .build()
    )

2023-05-14 21:54:59,167	INFO algorithm.py:506 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(RolloutWorker pid=9392)[0m GT Real Time instantiated
[2m[36m(RolloutWorker pid=9392)[0m GT AI Server instantiated for rtgym
[2m[36m(RolloutWorker pid=9392)[0m starting up on localhost port 9999
[2m[36m(RolloutWorker pid=9392)[0m Waiting for a connection


In [None]:
if not debugAsGym:
    N = 10000

    for n in range(N):
        result = algo.train()
        print("Loop: ", n)
        if n % 10 == 0:
            print("Saved", n)
            algo.save()
            
    algo.save()
