In [1]:
!wandb online

W&B online. Running your script from this directory will now sync to the cloud.


In [2]:
import warnings
import torch
from omnisafe.common.experiment_grid import ExperimentGrid

In [3]:
exp_name = 'Exps_Final_6'
wandb_name = 'Experiments_Final'
eg = ExperimentGrid(exp_name)

# Set the algorithms.
policies = ['TRPOLag', 'PPOLag']

# Set the environments.
safety_envs = [
    'SafetyPointButton2-v0'
]
eg.add('env_id', safety_envs)
eg.add('algo', policies)
eg.add('logger_cfgs:use_wandb', [True])
eg.add('logger_cfgs:wandb_project', [wandb_name])
eg.add('train_cfgs:vector_env_nums', [4])
# eg.add('train_cfgs:torch_threads', [8])
eg.add('algo_cfgs:steps_per_epoch', [20000])
eg.add('train_cfgs:total_steps', [20000*30])
eg.add('seed', [0])

In [4]:
avaliable_gpus = list(range(torch.cuda.device_count()))
gpu_id = [0]

if gpu_id and not set(gpu_id).issubset(avaliable_gpus):
    warnings.warn('The GPU ID is not available, use CPU instead.', stacklevel=1)
    gpu_id = None

In [5]:
"""Example of training a policy from exp-x config with OmniSafe."""
from omnisafe.typing import NamedTuple, Tuple
import omnisafe
import os, sys

def train(
    exp_id: str, algo: str, env_id: str, custom_cfgs: NamedTuple
) -> Tuple[float, float, float]:
    """Train a policy from exp-x config with OmniSafe.

    Args:
        exp_id (str): Experiment ID.
        algo (str): Algorithm to train.
        env_id (str): The name of test environment.
        custom_cfgs (NamedTuple): Custom configurations.
        num_threads (int, optional): Number of threads. Defaults to 6.
    """
    terminal_log_name = 'terminal.log'
    error_log_name = 'error.log'
    if 'seed' in custom_cfgs:
        terminal_log_name = f'seed{custom_cfgs["seed"]}_{terminal_log_name}'
        error_log_name = f'seed{custom_cfgs["seed"]}_{error_log_name}'
    sys.stdout = sys.__stdout__
    sys.stderr = sys.__stderr__
    print(f'exp-x: {exp_id} is training...')
    if not os.path.exists(custom_cfgs['logger_cfgs']['log_dir']):
        os.makedirs(custom_cfgs['logger_cfgs']['log_dir'], exist_ok=True)
    # pylint: disable-next=consider-using-with
    sys.stdout = open(
        os.path.join(f'{custom_cfgs["logger_cfgs"]["log_dir"]}', terminal_log_name),
        'w',
        encoding='utf-8',
    )
    # pylint: disable-next=consider-using-with
    sys.stderr = open(
        os.path.join(f'{custom_cfgs["logger_cfgs"]["log_dir"]}', error_log_name),
        'w',
        encoding='utf-8',
    )
    agent = omnisafe.Agent(algo, env_id, custom_cfgs=custom_cfgs)
    reward, cost, ep_len = agent.learn()
    return reward, cost, ep_len

In [6]:
eg.run(train, gpu_id=gpu_id)



[env] 

	"SafetyPointButton2-v0"



[alg] 

	"TRPOLag"
	"PPOLag"



[log-use] 

	true



[log-wan] 

	"Experiments_Final"



[tra-vec] 

	4



[alg-ste] 

	20000



[tra-tot] 

	600000



[see] 

	0

 Variants, counting seeds:               2
 Variants, not counting seeds:           2

value {'seed': 0, 'train_cfgs': {'total_steps': 600000, 'vector_env_nums': 4}, 'algo_cfgs': {'steps_per_epoch': 20000}, 'logger_cfgs': {'wandb_project': 'Experiments_Final', 'use_wandb': True}, 'algo': 'TRPOLag', 'env_id': 'SafetyPointButton2-v0'} key algo
value {'seed': 0, 'train_cfgs': {'total_steps': 600000, 'vector_env_nums': 4}, 'algo_cfgs': {'steps_per_epoch': 20000}, 'logger_cfgs': {'wandb_project': 'Experiments_Final', 'use_wandb': True}, 'algo': 'PPOLag', 'env_id': 'SafetyPointButton2-v0'} key algo



Exps_Final_6_algppolag
Exps_Final_6_algtrpolag

current_config {'seed': 0, 'train_cfgs': {'total_steps': 600000, 'vector_env_nums': 4}, 'algo_cfgs': {'steps_per_epoch': 20000}, 'logger_cfgs': {'wandb_project': 'Experiments_Final', 'use_wandb': True}, 'algo': 'TRPOLag', 'env_id': 'SafetyPointButton2-v0'}


0,1
Loss/Loss_cost_critic,▇▃▄▆█▅▅▃▂▃▃▂▂▂▃▂▄▂▂▂▃▃▂▁▂▂▁▁▃▃
Loss/Loss_cost_critic/Delta,█▁▃▄▃▁▂▂▂▃▃▂▃▂▃▂▃▂▃▃▃▃▂▂▃▃▂▃▃▃
Loss/Loss_pi,█▅▇▅▆▆▇▇▇▆▆▆▇▅▆▅▅▆▅▄▄▁▂▃▃▂▃▃▁▁
Loss/Loss_pi/Delta,▁▃█▅▇▇▆▆▆▆▇▆▇▅▆▆▆▇▅▆▆▄▇▇▇▅█▆▅▆
Loss/Loss_reward_critic,▁▁▁▁▂▃▃▃▄▄▄▄▄▅▆▄▇▆▅▆▆▆▇▅▆▇▆▇██
Loss/Loss_reward_critic/Delta,▅▂▃▄▅▄▅▄▄▄▄▂▄▆▄▁█▂▁▆▄▂▆▁▄▅▂▄▅▄
Metrics/EpCost,▁▂▃▆█▇▄▃▂▂▂▂▃▃▃▃▄▃▂▂▃▃▃▃▃▃▁▁▃▄
Metrics/EpLen,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Metrics/EpRet,▁▁▁▂▂▂▃▄▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇▆▇▇▇██
Metrics/LagrangeMultiplier,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███

0,1
Loss/Loss_cost_critic,2.05726
Loss/Loss_cost_critic/Delta,0.19618
Loss/Loss_pi,-0.02364
Loss/Loss_pi/Delta,-0.00093
Loss/Loss_reward_critic,0.05797
Loss/Loss_reward_critic/Delta,0.00271
Metrics/EpCost,125.22
Metrics/EpLen,1000.0
Metrics/EpRet,14.61381
Metrics/LagrangeMultiplier,0.98476


pause, refactor and run
- delete epoch 0
- double check log dir

In [None]:
for policy in policies:
    LOG_DIR = 'exp-x\TransferModels\SafetyPointGoal1-v0---f58ca1a463c0273cc1b6ca3902826cae538e180bd08f94f50e82a46844a78ecc\TRPOLag-{SafetyPointGoal1-v0}\seed-000-2024-05-19-17-50-17'
    evaluator = omnisafe.Evaluator()
    result = None
    for item in os.scandir(os.path.join(LOG_DIR, 'torch_save')):
        if item.is_file() and item.name.split('.')[-1] == 'pt':
            evaluator.load_saved(
                save_dir=LOG_DIR, model_name=item.name, camera_name='track', width=256, height=256
            )
            result = evaluator.evaluate(num_episodes=50)
            rewards, costs = torch.tensor(result[0]), torch.tensor(result[1])
            avg_rew, std_rew = rewards.mean(), rewards.std()
            avg_cost, std_cost = costs.mean(), costs.std()
            avg_rew, std_rew, avg_cost, std_cost 

In [None]:
rewards, costs = torch.tensor(result[0]), torch.tensor(result[1])
avg_rew, std_rew = rewards.mean(), rewards.std()
avg_cost, std_cost = costs.mean(), costs.std()
avg_rew, std_rew, avg_cost, std_cost 

In [None]:
env_id = 'SafetyPointGoal2-v0'
custom_cfgs = {
    'train_cfgs': {
        'total_steps': 20000*30,
        'vector_env_nums': 1,
        'parallel': 1,
    },
    'algo_cfgs': {
        'steps_per_epoch': 20000,
    },
    'model_cfgs': {
        'load_actor_critic': 'exp-x\TransferModels\SafetyPointGoal1-v0---f58ca1a463c0273cc1b6ca3902826cae538e180bd08f94f50e82a46844a78ecc\TRPOLag-{SafetyPointGoal1-v0}\seed-000-2024-05-19-17-50-17\\torch_save\epoch-30.pt',
    },
    'logger_cfgs': {
        'use_wandb': True,
        'use_tensorboard': True,
        'log_dir':'exp-x\TransferModels\SafetyPointGoal1-v0---f58ca1a463c0273cc1b6ca3902826cae538e180bd08f94f50e82a46844a78ecc',
    },
}

agent = omnisafe.Agent('TRPOLag', env_id, custom_cfgs=custom_cfgs)
agent.evaluate(1)
transferred_result = agent._evaluator.evaluate(50)

In [None]:
rewards, costs = torch.tensor(transferred_result[0]), torch.tensor(transferred_result[1])
avg_rew, std_rew = rewards.mean(), rewards.std()
avg_cost, std_cost = costs.mean(), costs.std()
avg_rew, std_rew, avg_cost, std_cost 

In [None]:
def evaluate(LOG_DIR: str) -> None:
    evaluator = omnisafe.Evaluator()
    result = None
    for item in os.scandir(os.path.join(LOG_DIR, 'torch_save')):
        if item.is_file() and item.name.split('.')[-1] == 'pt':
            evaluator.load_saved(
                save_dir=LOG_DIR, model_name=item.name, camera_name='track', width=256, height=256
            )
            # evaluator.render(num_episodes=5)
            result = evaluator.evaluate(num_episodes=50)
            rewards, costs = torch.tensor(result[0]), torch.tensor(result[1])
            avg_rew, std_rew = rewards.mean(), rewards.std()
            avg_cost, std_cost = costs.mean(), costs.std()
            print(avg_rew, std_rew, avg_cost, std_cost)

In [None]:
LOG_DIR = 'exp-x\BM_Safety_Unconstrained\SafetyPointGoal1-v0---dce575ce051f34bfb10e125b56a70286d41d91307da1cacad37e9d43f2725ea0\TRPO-{SafetyPointGoal1-v0}\seed-000-2024-04-24-01-04-38'
evaluate(LOG_DIR)