# Initial AI Agent PPO w/o
NN (working) MR2 Drag Disc

In [None]:
import torch

if torch.cuda.is_available():
    print("GPU is available")
else:
    print("GPU is not available")

In [None]:
from myRTClass import MyGranTurismoRTGYM, DEFAULT_CONFIG_DICT
import numpy as np
import gymnasium
from time import sleep

my_config = DEFAULT_CONFIG_DICT
my_config["interface"] = MyGranTurismoRTGYM
my_config["time_step_duration"] = 0.05
my_config["start_obs_capture"] = 0.05
my_config["time_step_timeout_factor"] = 1.0
my_config["ep_max_length"] = 600
my_config["act_buf_len"] = 4
my_config["reset_act_buf"] = True
my_config["benchmark"] = False
my_config["benchmark_polyak"] = 0.2

my_config["interface_kwargs"] = {
  'debugFlag': False, # do not use render() while True
  'discreteAccel' : True,
  'accelAndBrake' : False,
  'discSteer' : True,
  'contAccelOnly' : False,
  'discAccelOnly' : False,
  'modelMode': 13,
  #  [42, 42, K], [84, 84, K], [10, 10, K], [240, 320, K] and  [480, 640, K]
  'imageWidth' : 42, # there is a default Cov layer for PPO with 240 x 320
  'imageHeight' : 42,
  'trackChoice' : 1, # 0 is HS, 1 is 400m
  'carChoice' : 0, # 0 is MR2, 1 is Supra, 2 is Civic
  'rewardMode' : "simplex"
}

In [None]:
def env_creator(env_config):
    env = gymnasium.make("real-time-gym-v1", config=env_config)
    return env  # return an env instance

from ray.tune.registry import register_env
register_env("gt-rtgym-env-v1", lambda config: env_creator(my_config)) # better way

In [None]:
import ray
ray.shutdown()
ray.init()

In [None]:
from ray.rllib.algorithms.ppo import PPOConfig
config = (
    PPOConfig()
    .resources(
        num_gpus=1
        )
    .rollouts(
        num_rollout_workers=1,
        enable_connectors=True,
        batch_mode="truncate_episodes",
        #batch_mode="completed_episodes",
        )
    .framework(
        framework="torch",
        )
    .environment(
        env="gt-rtgym-env-v1",
        disable_env_checking=True,
        render_env=False,
        )
    .training(
        train_batch_size=600,
        )
)

In [None]:
import json

class CustomEncoder(json.JSONEncoder):
    def default(self, obj):
        try:
            json.dumps(obj)
        except TypeError:
            return "Not serializable object!"
        return obj

config_dict = config.to_dict()

print(json.dumps(config_dict, sort_keys=True, indent=4, cls=CustomEncoder))

In [None]:
algo = config.build()

In [None]:
# algo.get_policy().model

In [None]:
N = 3000

for n in range(N):
    result = algo.train()
    print("Loop: ", n)
    if n % 10 == 0:
        print("Saved", n)
        algo.save()
        
algo.save()

#Works!

In [None]:
#from ray.rllib.algorithms.algorithm import Algorithm
#algo = Algorithm.from_checkpoint("C:/Users/nadir/ray_results/PPO_gt-rtgym-env-v1_2023-10-14_22-11-44ayp9l_kd/checkpoint_000881")

In [None]:
# result = algo.train() #single try