In [None]:
import omnisafe
from omnisafe.common.experiment_grid import ExperimentGrid

In [None]:
eg = ExperimentGrid(exp_name='CPO-benchmark')

# Set the algorithms.
unconstrained_policies = ['CPO']

# Set the environments.
safety_envs = [
    'SafetyPointGoal1-v0',
    'SafetyPointGoal2-v0',
    'SafetyPointButton1-v0',
    'SafetyPointButton2-v0',
    'SafetyPointPush1-v0',
    'SafetyPointPush2-v0',
]
eg.add('env_id', safety_envs)
eg.add('algo', unconstrained_policies)
eg.add('logger_cfgs:use_wandb', [True])
eg.add('logger_cfgs:wandb_project', ['CPO_benchmark'])
eg.add('train_cfgs:vector_env_nums', [1])
eg.add('train_cfgs:torch_threads', [1])
eg.add('train_cfgs:total_steps', [2048*100])
eg.add('algo_cfgs:steps_per_epoch', [2048])
# eg.add('algo_cfgs:gamma', [0.9, 0.95, 0.99])
# eg.add('algo_cfgs:cost_gamma', [0.9, 0.95, 0.99])
eg.add('seed', [0,5,10])

In [None]:
"""Example of training a policy from exp-x config with OmniSafe."""
import os, sys
from omnisafe.typing import NamedTuple, Tuple


def train(
    exp_id: str, algo: str, env_id: str, custom_cfgs: NamedTuple
) -> Tuple[float, float, float]:
    """Train a policy from exp-x config with OmniSafe.

    Args:
        exp_id (str): Experiment ID.
        algo (str): Algorithm to train.
        env_id (str): The name of test environment.
        custom_cfgs (NamedTuple): Custom configurations.
        num_threads (int, optional): Number of threads. Defaults to 6.
    """
    terminal_log_name = 'terminal.log'
    error_log_name = 'error.log'
    if 'seed' in custom_cfgs:
        terminal_log_name = f'seed{custom_cfgs["seed"]}_{terminal_log_name}'
        error_log_name = f'seed{custom_cfgs["seed"]}_{error_log_name}'
    sys.stdout = sys.__stdout__
    sys.stderr = sys.__stderr__
    print(f'exp-x: {exp_id} is training...')
    if not os.path.exists(custom_cfgs['logger_cfgs']['log_dir']):
        os.makedirs(custom_cfgs['logger_cfgs']['log_dir'], exist_ok=True)
    # pylint: disable-next=consider-using-with
    sys.stdout = open(
        os.path.join(f'{custom_cfgs["logger_cfgs"]["log_dir"]}', terminal_log_name),
        'w',
        encoding='utf-8',
    )
    # pylint: disable-next=consider-using-with
    sys.stderr = open(
        os.path.join(f'{custom_cfgs["logger_cfgs"]["log_dir"]}', error_log_name),
        'w',
        encoding='utf-8',
    )
    agent = omnisafe.Agent(algo, env_id, custom_cfgs=custom_cfgs)
    reward, cost, ep_len = agent.learn()
    return reward, cost, ep_len

In [None]:
import torch
import warnings

# Set the device.
avaliable_gpus = [num for num in range(torch.cuda.device_count())]
print(avaliable_gpus)
gpu_id = [0]
# if you want to use CPU, please set gpu_id = None
# gpu_id = None

if set(gpu_id) > set(avaliable_gpus):
    warnings.warn('The GPU ID is not available, use CPU instead.')
    gpu_id = None

In [None]:
eg.run(train, 1, gpu_id=gpu_id)

In [None]:
eg.analyze(parameter='algo_cfgs:gamma', values=None, compare_num=3, cost_limit=None)

In [None]:
# Single Python File
from omnisafe.common.statistics_tools import StatisticsTools

eg._statistical_tools.draw_graph(parameter='algo_cfgs:gamma', values=None, compare_num=3, cost_limit=None, show_image=True)