# Testing Safety Gymnasium Functionality

In [23]:
import gymnasium as gym
import safety_gymnasium  # noqa: F401
from tqdm import tqdm
import numpy as np
import torch

from rl_vcf.rl.utils.make_env import make_env_safety
from rl_vcf.rl.algos.sac.core import ReplayBuffer

In [7]:
env = safety_gymnasium.make("SafetyPointGoal1-v0", render_mode="rgb_array")
obs, info = env.reset(seed=0)

In [8]:
print(env.observation_space)

Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.], [inf inf inf inf inf inf inf inf inf inf inf inf  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.], (60,), float64)


In [9]:
print(env.obs_space_dict)

Dict('accelerometer': Box(-inf, inf, (3,), float64), 'velocimeter': Box(-inf, inf, (3,), float64), 'gyro': Box(-inf, inf, (3,), float64), 'magnetometer': Box(-inf, inf, (3,), float64), 'goal_lidar': Box(0.0, 1.0, (16,), float64), 'hazards_lidar': Box(0.0, 1.0, (16,), float64), 'vases_lidar': Box(0.0, 1.0, (16,), float64))


In [11]:
print(env.observation_space)

Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.], [inf inf inf inf inf inf inf inf inf inf inf inf  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.], (60,), float64)


In [12]:
print(obs)

[ 0.          0.          9.81        0.          0.          0.
  0.          0.          0.         -0.49791043  0.04566409  0.
  0.          0.          0.          0.          0.          0.
  0.          0.35358097  0.59876721  0.24518625  0.          0.
  0.          0.          0.          0.          0.37947285  0.42439038
  0.57955285  0.62418306  0.62667112  0.31488211  0.33488147  0.01999936
  0.          0.45109549  0.74245344  0.33189177  0.60044509  0.73198548
  0.40247368  0.78194652  0.          0.          0.          0.
  0.          0.          0.31184259  0.76483616  0.45299358  0.
  0.          0.          0.          0.          0.          0.        ]


In [13]:
print(env.action_space)

Box(-1.0, 1.0, (2,), float64)


In [None]:
# envs = safety_gymnasium.vector.make(
#     "SafetyPointGoal1-v0", render_mode="rgb_array", num_envs=1
# )
envs = safety_gymnasium.vector.SafetySyncVectorEnv(
    [
        make_env_safety(
            "SafetyPointGoal1-v0",
            i,
            0 + i,
            True,
            5,
            False,
        )
        for i in range(1)
    ]
)

NameError: name 'safety_gymnasium' is not defined

In [7]:
device = "cuda"
replay_buffer = ReplayBuffer(
    envs.single_observation_space,
    envs.single_action_space,
    11000,
    device,
)
obs = torch.Tensor(
    envs.reset(seed=[0 + i for i in range(1)])[
        0  # observations are first element of env reset output
    ]
)
for global_step in tqdm(range(11000)):
    act = torch.Tensor(np.array([envs.single_action_space.sample() for _ in range(1)]))
    next_obs, rew, cost, term, trunc, info = envs.step(act.detach().cpu().numpy())
    real_next_obs = next_obs.copy()
    for idx, truncated in enumerate(trunc):
        if truncated:
            real_next_obs[idx] = info["final_observation"][idx]
    replay_buffer.store(
        obs,
        act,
        torch.Tensor(rew).to(device),
        torch.Tensor(real_next_obs).to(device),
        torch.Tensor(term).to(device),
    )
    obs = torch.Tensor(next_obs).to(device)

  9%|▉         | 986/11000 [00:02<00:21, 458.58it/s]

Moviepy - Building video /home/jacques/projects/rl-vcf/notebooks/envs/videos/rl-video-episode-0.mp4.
Moviepy - Writing video /home/jacques/projects/rl-vcf/notebooks/envs/videos/rl-video-episode-0.mp4



  9%|▉         | 986/11000 [00:02<00:21, 458.58it/s]

Moviepy - Done !
Moviepy - video ready /home/jacques/projects/rl-vcf/notebooks/envs/videos/rl-video-episode-0.mp4


 54%|█████▍    | 5967/11000 [00:10<00:10, 478.38it/s] 

Moviepy - Building video /home/jacques/projects/rl-vcf/notebooks/envs/videos/rl-video-episode-5.mp4.
Moviepy - Writing video /home/jacques/projects/rl-vcf/notebooks/envs/videos/rl-video-episode-5.mp4



 54%|█████▍    | 5967/11000 [00:11<00:10, 478.38it/s]

Moviepy - Done !
Moviepy - video ready /home/jacques/projects/rl-vcf/notebooks/envs/videos/rl-video-episode-5.mp4


100%|█████████▉| 10972/11000 [00:19<00:00, 477.33it/s]

Moviepy - Building video /home/jacques/projects/rl-vcf/notebooks/envs/videos/rl-video-episode-10.mp4.
Moviepy - Writing video /home/jacques/projects/rl-vcf/notebooks/envs/videos/rl-video-episode-10.mp4



100%|█████████▉| 10972/11000 [00:19<00:00, 477.33it/s]

Moviepy - Done !
Moviepy - video ready /home/jacques/projects/rl-vcf/notebooks/envs/videos/rl-video-episode-10.mp4


100%|██████████| 11000/11000 [00:20<00:00, 543.36it/s]


In [32]:
env = safety_gymnasium.make(
    "SafetyPointGoal1-v0",
    render_mode="rgb_array",  # need to set render mode for video recording
    camera_name="fixednear",
)
env = safety_gymnasium.wrappers.SafetyGymnasium2Gymnasium(env)
env = gym.wrappers.RecordEpisodeStatistics(env)
env = gym.wrappers.RecordVideo(
    env,
    "videos",
    episode_trigger=lambda t: t % 5 == 0,
)

  logger.warn(


In [33]:
env.reset(seed=0)
for global_step in tqdm(range(11000)):
    act = env.action_space.sample()
    next_obs, rew, term, trunc, info = env.step(act)  # info contains cost
    if term or trunc:
        env.reset()  # need to manually reset if not in sync vector env

  9%|▊         | 945/11000 [00:01<00:18, 555.33it/s]

Moviepy - Building video /home/jacques/projects/rl-vcf/notebooks/envs/videos/rl-video-episode-0.mp4.
Moviepy - Writing video /home/jacques/projects/rl-vcf/notebooks/envs/videos/rl-video-episode-0.mp4



  9%|▊         | 945/11000 [00:02<00:18, 555.33it/s]

Moviepy - Done !
Moviepy - video ready /home/jacques/projects/rl-vcf/notebooks/envs/videos/rl-video-episode-0.mp4


 54%|█████▍    | 5982/11000 [00:08<00:08, 576.70it/s] 

Moviepy - Building video /home/jacques/projects/rl-vcf/notebooks/envs/videos/rl-video-episode-5.mp4.
Moviepy - Writing video /home/jacques/projects/rl-vcf/notebooks/envs/videos/rl-video-episode-5.mp4



 54%|█████▍    | 5982/11000 [00:09<00:08, 576.70it/s]

Moviepy - Done !
Moviepy - video ready /home/jacques/projects/rl-vcf/notebooks/envs/videos/rl-video-episode-5.mp4


100%|█████████▉| 10980/11000 [00:14<00:00, 557.07it/s]

Moviepy - Building video /home/jacques/projects/rl-vcf/notebooks/envs/videos/rl-video-episode-10.mp4.
Moviepy - Writing video /home/jacques/projects/rl-vcf/notebooks/envs/videos/rl-video-episode-10.mp4



100%|█████████▉| 10980/11000 [00:15<00:00, 557.07it/s]

Moviepy - Done !
Moviepy - video ready /home/jacques/projects/rl-vcf/notebooks/envs/videos/rl-video-episode-10.mp4


100%|██████████| 11000/11000 [00:16<00:00, 686.68it/s]


In [14]:
env = safety_gymnasium.make("SafetyPointZoneEnv0-v0", render_mode="rgb_array")
obs, info = env.reset(seed=0)

In [15]:
print(env.observation_space)

Dict('accelerometer': Box(-inf, inf, (3,), float64), 'velocimeter': Box(-inf, inf, (3,), float64), 'gyro': Box(-inf, inf, (3,), float64), 'magnetometer': Box(-inf, inf, (3,), float64), 'green_zones_lidar': Box(0.0, 1.0, (16,), float64))


In [16]:
print(env.observation_space.spaces)

OrderedDict([('accelerometer', Box(-inf, inf, (3,), float64)), ('velocimeter', Box(-inf, inf, (3,), float64)), ('gyro', Box(-inf, inf, (3,), float64)), ('magnetometer', Box(-inf, inf, (3,), float64)), ('green_zones_lidar', Box(0.0, 1.0, (16,), float64))])


In [17]:
print(env.obs_space_dict)

Dict('accelerometer': Box(-inf, inf, (3,), float64), 'velocimeter': Box(-inf, inf, (3,), float64), 'gyro': Box(-inf, inf, (3,), float64), 'magnetometer': Box(-inf, inf, (3,), float64), 'green_zones_lidar': Box(0.0, 1.0, (16,), float64))


In [18]:
print(env.action_space)

Box(-1.0, 1.0, (2,), float64)


In [19]:
print(obs)

{'accelerometer': array([0.  , 0.  , 9.81]), 'velocimeter': array([0., 0., 0.]), 'gyro': array([0., 0., 0.]), 'magnetometer': array([-0.06334726, -0.49597089,  0.        ]), 'green_zones_lidar': array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.19935754,
       0.20825497, 0.00889742, 0.06227048, 0.54252891, 0.48025843,
       0.        ])}


In [20]:
env = safety_gymnasium.make("SafetyPointZoneEnv0-v0", render_mode="rgb_array")
env = gym.wrappers.FlattenObservation(env)
obs, info = env.reset(seed=0)

In [21]:
print(env.observation_space)

Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.], [inf inf inf inf inf inf inf inf inf inf inf inf  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.], (28,), float64)


In [22]:
print(obs)
print(obs.dtype)

[ 0.00000000e+00  0.00000000e+00  9.81000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00 -6.33472631e-02 -4.95970891e-01  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  1.99357544e-01  2.08254969e-01  8.89742497e-03
  6.22704774e-02  5.42528908e-01  4.80258430e-01  0.00000000e+00]
float64
