# Initial AI Agent PPO

In [None]:
# to read later: https://raw.githubusercontent.com/ray-project/ray/master/rllib/examples/unity3d_env_local.py


from myRTClass import MyGranTurismoRTGYM, DEFAULT_CONFIG_DICT
import gymnasium
import ray
from ray.rllib.algorithms import ppo
from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.algorithms.sac import SACConfig
from ray import tune
import json
import numpy as np

ray.shutdown()

my_config = DEFAULT_CONFIG_DICT
my_config["interface"] = MyGranTurismoRTGYM
my_config["time_step_duration"] = 0.05
my_config["start_obs_capture"] = 0.05
my_config["time_step_timeout_factor"] = 1.0
my_config["ep_max_length"] = 1024
my_config["act_buf_len"] = 3
my_config["reset_act_buf"] = False
my_config["benchmark"] = True
my_config["benchmark_polyak"] = 0.2


def env_creator(env_config):
  env = gymnasium.make("real-time-gym-v1", config=my_config)
  return env  # return an env instance

from ray.tune.registry import register_env
register_env("gt-rtgym-env-v1", env_creator)

ray.init()

#agent = ppo.PPO(env="gt-rtgym-env-v1", config=ppoconfig)

algo = (
    PPOConfig()
    #SACConfig()
    .resources(
        num_gpus=1
        )
    .rollouts(
        num_rollout_workers=1,
        #batch_mode="truncate_episodes",
        #rollout_fragment_length=128
        )
    .framework("torch")
    .environment(
        env="gt-rtgym-env-v1",
        disable_env_checking=True,
        render_env=False,
        )
    .training(
            #lr=0.0003,
            #lambda_=0.95,
            #gamma=0.99,
            sgd_minibatch_size=128,
            train_batch_size=1024,
            #num_sgd_iter=8,
            #clip_param=0.2,
            model={"fcnet_hiddens": [12, 12]},
        )
    .build()
)

N = 500
results = []
episode_data = []
episode_json = []


for n in range(N):
    result = algo.train()
    results.append(result)
  
    episode = {
        "n": n,
        "episode_reward_mean": result["episode_reward_mean"], 
        "episode_reward_max":  result["episode_reward_max"],  
        "episode_len_mean":    result["episode_len_mean"],
    }

    episode_data.append(episode)
    episode_json.append(json.dumps(episode))
    
    print('Loop: ', n, 'Max reward: ', {episode["episode_reward_max"]})
    
path_to_checkpoint = algo.save()
print(
    "An Algorithm checkpoint has been created inside directory: "
    f"'{path_to_checkpoint}'."
)

ray.shutdown()

In [None]:
from ray.rllib.algorithms.algorithm import Algorithm
from myRTClass import MyGranTurismoRTGYM, DEFAULT_CONFIG_DICT
import gymnasium
import ray
from ray.rllib.algorithms import ppo
from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.algorithms.sac import SACConfig
import json
import numpy as np

my_config = DEFAULT_CONFIG_DICT
my_config["interface"] = MyGranTurismoRTGYM
my_config["time_step_duration"] = 0.05
my_config["start_obs_capture"] = 0.05
my_config["time_step_timeout_factor"] = 1.0
my_config["ep_max_length"] = 256
my_config["act_buf_len"] = 3
my_config["reset_act_buf"] = False
my_config["benchmark"] = True
my_config["benchmark_polyak"] = 0.2

def env_creator(env_config):
  env = gymnasium.make("real-time-gym-v1", config=my_config)
  return env  # return an env instance

from ray.tune.registry import register_env
register_env("gt-rtgym-env-v1", env_creator)

# Use the Algorithm's `from_checkpoint` utility to get a new algo instance
# that has the exact same state as the old one, from which the checkpoint was
# created in the first place:
loadedAlgo = Algorithm.from_checkpoint(path_to_checkpoint)

In [None]:

N = 300
results = []
episode_data = []
episode_json = []


for n in range(N):
    result = loadedAlgo.train()
    results.append(result)
  
    episode = {
        "n": n,
        "episode_reward_mean": result["episode_reward_mean"], 
        "episode_reward_max":  result["episode_reward_max"],  
        "episode_len_mean":    result["episode_len_mean"],
    }

    episode_data.append(episode)
    episode_json.append(json.dumps(episode))
    
    print('Loop: ', n, 'Max reward: ', {episode["episode_reward_max"]})

In [None]:
path_to_checkpoint = algo.save()
print(
    "An Algorithm checkpoint has been created inside directory: "
    f"'{path_to_checkpoint}'."
)

In [None]:
ray.shutdown()

# SAC Current Developing

In [2]:
# to read later: https://raw.githubusercontent.com/ray-project/ray/master/rllib/examples/unity3d_env_local.py
from myRTClass import MyGranTurismoRTGYM, DEFAULT_CONFIG_DICT
import gymnasium
import ray
from ray import tune, air
from ray.rllib.algorithms import ppo
from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.algorithms.sac import SACConfig
from ray.rllib.algorithms.a3c import A3CConfig
import json
import numpy as np
from time import sleep
ray.shutdown()

my_config = DEFAULT_CONFIG_DICT
my_config["interface"] = MyGranTurismoRTGYM
my_config["time_step_duration"] = 0.1
my_config["start_obs_capture"] = 0.1
my_config["time_step_timeout_factor"] = 1.0
my_config["ep_max_length"] = 128
my_config["act_buf_len"] = 3
my_config["reset_act_buf"] = False
my_config["benchmark"] = True
my_config["benchmark_polyak"] = 0.2

def env_creator(env_config):
  env = gymnasium.make("real-time-gym-v1", config=my_config)
  return env  # return an env instance

from ray.tune.registry import register_env
register_env("gt-rtgym-env-v1", env_creator)
ray.init()

algo = (
    #PPOConfig()
    SACConfig()
    .resources(
        num_gpus=1
        )
    .rollouts(
        num_rollout_workers=1,
        # batch_mode="truncate_episodes",
        # rollout_fragment_length=128,
        )
    .framework("torch")
    .environment(
        env="gt-rtgym-env-v1",
        disable_env_checking=True,
        render_env=False,
        )
    .training(
            #lr=tune.grid_search([0.01, 0.001, 0.0001])
            #lambda_=0.95,
            #gamma=0.99,
            #sgd_minibatch_size=128,
            #train_batch_size=1024,
            #num_sgd_iter=8,
            #clip_param=0.2,
            #model={"fcnet_hiddens": [12, 12]},
        )
    .build()
)

N = 50

for n in range(N):
    result = algo.train()
    if n % 5 == 0:
        print("Saved", n)
        algo.save()
path_to_checkpoint = algo.save()
print(
    "An Algorithm checkpoint has been created inside directory: "
    f"'{path_to_checkpoint}'."
)

ray.shutdown()

2023-05-08 23:34:47,275	INFO worker.py:1544 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


[2m[36m(RolloutWorker pid=10332)[0m GT Real Time instantiated
[2m[36m(RolloutWorker pid=10332)[0m GT AI Server instantiated for rtgym
[2m[36m(RolloutWorker pid=10332)[0m starting up on localhost port 9999
[2m[36m(RolloutWorker pid=10332)[0m Waiting for a connection
[2m[36m(RolloutWorker pid=10332)[0m Connection from ('127.0.0.1', 49279)
[2m[36m(RolloutWorker pid=10332)[0m default SAC action


2023-05-08 23:35:02,844	INFO trainable.py:172 -- Trainable.setup took 13.980 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


[2m[36m(RolloutWorker pid=10332)[0m reset triggered
[2m[36m(RolloutWorker pid=10332)[0m reload save




Saved 0




[2m[36m(RolloutWorker pid=10332)[0m reset triggered
[2m[36m(RolloutWorker pid=10332)[0m reload save




[2m[36m(RolloutWorker pid=10332)[0m reset triggered
[2m[36m(RolloutWorker pid=10332)[0m reload save




[2m[36m(RolloutWorker pid=10332)[0m reset triggered
[2m[36m(RolloutWorker pid=10332)[0m reload save




[2m[36m(RolloutWorker pid=10332)[0m reset triggered
[2m[36m(RolloutWorker pid=10332)[0m reload save




Saved 5
[2m[36m(RolloutWorker pid=10332)[0m reset triggered
[2m[36m(RolloutWorker pid=10332)[0m reload save




[2m[36m(RolloutWorker pid=10332)[0m reset triggered
[2m[36m(RolloutWorker pid=10332)[0m reload save




[2m[36m(RolloutWorker pid=10332)[0m reset triggered
[2m[36m(RolloutWorker pid=10332)[0m reload save




[2m[36m(RolloutWorker pid=10332)[0m reset triggered
[2m[36m(RolloutWorker pid=10332)[0m reload save




Saved 10
[2m[36m(RolloutWorker pid=10332)[0m reset triggered
[2m[36m(RolloutWorker pid=10332)[0m reload save




[2m[36m(RolloutWorker pid=10332)[0m reset triggered
[2m[36m(RolloutWorker pid=10332)[0m reload save




[2m[36m(RolloutWorker pid=10332)[0m reset triggered
[2m[36m(RolloutWorker pid=10332)[0m reload save




[2m[36m(RolloutWorker pid=10332)[0m reset triggered
[2m[36m(RolloutWorker pid=10332)[0m reload save




Saved 15




[2m[36m(RolloutWorker pid=10332)[0m reset triggered
[2m[36m(RolloutWorker pid=10332)[0m reload save




KeyboardInterrupt: 



# A3C Method

In [3]:
# to read later: https://raw.githubusercontent.com/ray-project/ray/master/rllib/examples/unity3d_env_local.py
from myRTClass import MyGranTurismoRTGYM, DEFAULT_CONFIG_DICT
import gymnasium
import ray
from ray import tune, air
from ray.rllib.algorithms import ppo
from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.algorithms.sac import SACConfig
from ray.rllib.algorithms.a3c import A3CConfig
import json
import numpy as np
from time import sleep
ray.shutdown()

my_config = DEFAULT_CONFIG_DICT
my_config["interface"] = MyGranTurismoRTGYM
my_config["time_step_duration"] = 0.1
my_config["start_obs_capture"] = 0.1
my_config["time_step_timeout_factor"] = 1.0
my_config["ep_max_length"] = 256
my_config["act_buf_len"] = 3
my_config["reset_act_buf"] = False
my_config["benchmark"] = True
my_config["benchmark_polyak"] = 0.2

def env_creator(env_config):
  env = gymnasium.make("real-time-gym-v1", config=my_config)
  return env  # return an env instance

from ray.tune.registry import register_env
register_env("gt-rtgym-env-v1", env_creator)
ray.init()

algo = (
    #PPOConfig()
    #SACConfig()
    A3CConfig()
    .resources(
        num_gpus=1
        )
    .rollouts(
        num_rollout_workers=1,
        # batch_mode="truncate_episodes",
        # rollout_fragment_length=128,
        )
    .framework("torch")
    .environment(
        env="gt-rtgym-env-v1",
        disable_env_checking=True,
        render_env=False,
        )
    .training(
            #lr=tune.grid_search([0.01, 0.001, 0.0001])
            #lambda_=0.95,
            #gamma=0.99,
            #sgd_minibatch_size=128,
            #train_batch_size=1024,
            #num_sgd_iter=8,
            #clip_param=0.2,
            #model={"fcnet_hiddens": [12, 12]},
        )
    .build()
)

N = 200

for n in range(N):
    result = algo.train()
    if n % 5 == 0:
        print("Saved", n)
        algo.save()
path_to_checkpoint = algo.save()
print(
    "An Algorithm checkpoint has been created inside directory: "
    f"'{path_to_checkpoint}'."
)

ray.shutdown()

2023-05-08 23:39:53,959	INFO worker.py:1544 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


[2m[36m(RolloutWorker pid=13724)[0m GT Real Time instantiated
[2m[36m(RolloutWorker pid=13724)[0m GT AI Server instantiated for rtgym
[2m[36m(RolloutWorker pid=13724)[0m starting up on localhost port 9999
[2m[36m(RolloutWorker pid=13724)[0m Waiting for a connection




[2m[36m(RolloutWorker pid=13724)[0m Connection from ('127.0.0.1', 49928)
[2m[36m(RolloutWorker pid=13724)[0m default SAC action
[2m[36m(RolloutWorker pid=13724)[0m reset triggered
[2m[36m(RolloutWorker pid=13724)[0m reload save
Saved 0
[2m[36m(RolloutWorker pid=13724)[0m reset triggered
[2m[36m(RolloutWorker pid=13724)[0m reload save




[2m[36m(RolloutWorker pid=13724)[0m reset triggered
[2m[36m(RolloutWorker pid=13724)[0m reload save




[2m[36m(RolloutWorker pid=13724)[0m reset triggered
[2m[36m(RolloutWorker pid=13724)[0m reload save




Saved 5
[2m[36m(RolloutWorker pid=13724)[0m reset triggered
[2m[36m(RolloutWorker pid=13724)[0m reload save




[2m[36m(RolloutWorker pid=13724)[0m reset triggered
[2m[36m(RolloutWorker pid=13724)[0m reload save




Saved 10
[2m[36m(RolloutWorker pid=13724)[0m reset triggered
[2m[36m(RolloutWorker pid=13724)[0m reload save




[2m[36m(RolloutWorker pid=13724)[0m reset triggered
[2m[36m(RolloutWorker pid=13724)[0m reload save




[2m[36m(RolloutWorker pid=13724)[0m reset triggered
[2m[36m(RolloutWorker pid=13724)[0m reload save




Saved 15
[2m[36m(RolloutWorker pid=13724)[0m reset triggered
[2m[36m(RolloutWorker pid=13724)[0m reload save




[2m[36m(RolloutWorker pid=13724)[0m reset triggered
[2m[36m(RolloutWorker pid=13724)[0m reload save




[2m[36m(RolloutWorker pid=13724)[0m reset triggered
[2m[36m(RolloutWorker pid=13724)[0m reload save




Saved 20
[2m[36m(RolloutWorker pid=13724)[0m reset triggered
[2m[36m(RolloutWorker pid=13724)[0m reload save




[2m[36m(RolloutWorker pid=13724)[0m reset triggered
[2m[36m(RolloutWorker pid=13724)[0m reload save




Saved 25




[2m[36m(RolloutWorker pid=13724)[0m reset triggered
[2m[36m(RolloutWorker pid=13724)[0m reload save
[2m[36m(RolloutWorker pid=13724)[0m reset triggered
[2m[36m(RolloutWorker pid=13724)[0m reload save




[2m[36m(RolloutWorker pid=13724)[0m reset triggered
[2m[36m(RolloutWorker pid=13724)[0m reload save




Saved 30
[2m[36m(RolloutWorker pid=13724)[0m reset triggered
[2m[36m(RolloutWorker pid=13724)[0m reload save




[2m[36m(RolloutWorker pid=13724)[0m reset triggered
[2m[36m(RolloutWorker pid=13724)[0m reload save




[2m[36m(RolloutWorker pid=13724)[0m reset triggered
[2m[36m(RolloutWorker pid=13724)[0m reload save




Saved 35
[2m[36m(RolloutWorker pid=13724)[0m reset triggered
[2m[36m(RolloutWorker pid=13724)[0m reload save




[2m[36m(RolloutWorker pid=13724)[0m reset triggered
[2m[36m(RolloutWorker pid=13724)[0m reload save




Saved 40
[2m[36m(RolloutWorker pid=13724)[0m reset triggered
[2m[36m(RolloutWorker pid=13724)[0m reload save




[2m[36m(RolloutWorker pid=13724)[0m reset triggered
[2m[36m(RolloutWorker pid=13724)[0m reload save




[2m[36m(RolloutWorker pid=13724)[0m reset triggered
[2m[36m(RolloutWorker pid=13724)[0m reload save




Saved 45
[2m[36m(RolloutWorker pid=13724)[0m reset triggered
[2m[36m(RolloutWorker pid=13724)[0m reload save




[2m[36m(RolloutWorker pid=13724)[0m reset triggered
[2m[36m(RolloutWorker pid=13724)[0m reload save




An Algorithm checkpoint has been created inside directory: 'C:\Users\nadir/ray_results\A3C_gt-rtgym-env-v1_2023-05-08_23-39-55p4qfi8ak\checkpoint_000050'.


In [None]:
# to read later: https://raw.githubusercontent.com/ray-project/ray/master/rllib/examples/unity3d_env_local.py
from myRTClass import MyGranTurismoRTGYM, DEFAULT_CONFIG_DICT
import gymnasium
import ray
from ray.rllib.algorithms import ppo
from ray.rllib.algorithms.algorithm import Algorithm
from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.algorithms.sac import SACConfig
from ray.rllib.algorithms.a3c import A3CConfig
import json
import numpy as np
from time import sleep

ray.shutdown()

my_config = DEFAULT_CONFIG_DICT
my_config["interface"] = MyGranTurismoRTGYM
my_config["time_step_duration"] = 0.05
my_config["start_obs_capture"] = 0.05
my_config["time_step_timeout_factor"] = 1.0
#my_config["ep_max_length"] = 512
my_config["act_buf_len"] = 3
my_config["reset_act_buf"] = False
my_config["benchmark"] = True
my_config["benchmark_polyak"] = 0.2


def env_creator(env_config):
  env = gymnasium.make("real-time-gym-v1", config=my_config)
  return env  # return an env instance

from ray.tune.registry import register_env
register_env("gt-rtgym-env-v1", env_creator)

loadedAlgo = Algorithm.from_checkpoint(path_to_checkpoint)

loadedAlgo.train()

In [None]:
import ray
from ray import air, tune
ray.shutdown()
ray.init()


config = PPOConfig().training(lr=tune.grid_search([0.01, 0.001, 0.0001])).environment(env="CartPole-v1")

tuner = tune.Tuner(
    "PPO",
    run_config=air.RunConfig(
        stop={"episode_reward_mean": 150},
    ),
    param_space=config,
)

tuner.fit()