# Gym demo

Demonstrate the use of the the Gym interface

In [None]:
%matplotlib inline

import matplotlib.pyplot as plt
import sys
import os
import numpy as np
import pandas as pd
import glob as glob
import time
import pickle
from omegaconf import OmegaConf

import minari
from minari import DataCollector
import torch

# add custom paths
sys.path.extend([os.path.abspath('./assetto_corsa_gym'), './algorithm/discor'])
import AssettoCorsaEnv.assettoCorsa as assettoCorsa
from discor.agent_dataset import Agent
from discor.algorithm import SAC

# Configure the logging system
import logging
logger = logging.getLogger(__name__)
logging.basicConfig(
    level=logging.INFO,  # Set the logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',  # Format of the log messages
    datefmt='%Y-%m-%d %H:%M:%S',  # Format of the timestamp
)



## Load config file

In [None]:
config = OmegaConf.load("config.yml")
env = assettoCorsa.make_ac_env(cfg=config, work_dir="output")

# Set Agent

In [None]:
device = torch.device("cpu")
algo = SAC(
            state_dim=125,
            action_dim=3,
            device=device, seed=config.seed,
            **OmegaConf.to_container(config.SAC))
agent = Agent(env=env, test_env=env, algo=algo, log_dir="output",
                  device=device, seed=config.seed, **config.Agent, wandb_logger=None)


# Train Agent original

In [None]:
# os.environ["MINARI_DATASETS_PATH"] = "F:/code/assetto_corsa_gym-main/mydata"
dataset= None
while(True):
    agent.train_episode()
    if agent._steps > agent._num_steps:
        break
    if dataset is None:
        dataset = env.create_dataset(
            dataset_id="SAC/test-v1",
            algorithm_name="SAC-Policy",
            code_permalink="https://github.com/Farama-Foundation/Minari",
            author="Farama",
            author_email="contact@farama.org"
        )
    else:
        env.add_to_dataset(dataset)

env.close()

# Train Agent use ray

In [None]:
import ray
import copy
from ray.rllib.algorithms.ppo import PPOConfig
sys.path.append(r"F:/code/assetto_corsa_gym-main/assetto_corsa_gym")
from AssettoCorsaEnv.ac_env import AssettoCorsaEnv
env = AssettoCorsaEnv()
# ray.init()
# algo_config=PPOConfig()
# checkpoint_dir = f"./ray_checkpoints/PPO"
# os.makedirs(checkpoint_dir, exist_ok=True)
# algo_config = algo_config.training(gamma=0.9, lr=0.0001)
# algo_config = algo_config.resources(num_gpus=0)
# algo_config = algo_config.env_runners(num_env_runners=2) 
# algo_config = algo_config.environment(env=AssettoCorsaEnv)
# # algo_config.replay_buffer_config["capacity"] = 20000  # reduce replay buffer
# algo_config = algo_config.framework('torch')

# algo = algo_config.build()

# # checkpoint_path = "./checkpoints/SAC"
# # algo.restore(checkpoint_path)

# while(True):
#     result = algo.train()
#     checkpoint = algo.save(checkpoint_dir)
#     timesteps = result["timesteps_total"]
#     rwd_mean = result['episode_reward_mean']
#     len_mean = result['episode_len_mean']
#     print("=*=" * 10)
#     print(f"|| Episode Reward Mean: {rwd_mean}, Episode Length Mean: {len_mean} ||")

# Test Agent

In [None]:
agent.load("outputs/20250306_153141.469/model/checkpoints/step_00400000",False)
os.environ["MINARI_DATASETS_PATH"] = "F:/code/assetto_corsa_gym-main/mydata"
dataset= None
for _ in range(agent._num_eval_episodes):
    state,_ = agent._env.reset()
    done = False
    while (not done):
        action, _ = agent._algo.exploit(state)
        next_state, reward, done, truncated,info =agent._env.step(action)
        state = next_state
        if dataset is None:
            dataset = env.create_dataset(
                dataset_id="SAC/test-v2",
                algorithm_name="SAC-Policy",
                code_permalink="https://github.com/Farama-Foundation/Minari",
                author="Farama",
                author_email="contact@farama.org"
            )
        else:
            env.add_to_dataset(dataset)

env.close()

# Random Agent

In [None]:
env.reset()
for i in range(100):
    if i % 2 == 0:
        steer = .1
    else:
        steer = -.1
    original_env = env.env 
    original_env = original_env.env
    next_state, reward, done, _,info = env.step(action=np.array([steer, 0.5, -1.]))  # action is already applied
    time.sleep(0.01)
    if done:
        break

dataset = env.create_dataset(
    dataset_id="hello/test-v0",
    algorithm_name="Random-Policy",
    code_permalink="https://github.com/Farama-Foundation/Minari",
    author="Farama",
    author_email="contact@farama.org"
)
env.add_to_dataset(dataset)
original_env.recover_car()
env.close()

# Plot states

In [None]:
import minari
import os
os.environ["MINARI_DATASETS_PATH"] = "F:/code/assetto_corsa_gym-main/mydata"
dataset = minari.load_dataset("SAC/test-v0")

episodes = dataset.sample_episodes(n_episodes=1)
print(f"EPISODE ID'S SAMPLE: {episodes}")


# Test my