In [None]:
import torch
print("CUDA available:", torch.cuda.is_available())
print("CUDA device count:", torch.cuda.device_count())
print("Current device:", torch.cuda.current_device())
print("Device name:", torch.cuda.get_device_name(torch.cuda.current_device()))

CUDA available: True
CUDA device count: 1
Current device: 0
Device name: NVIDIA GeForce RTX 3060


In [2]:
import mujoco
model = mujoco.MjModel.from_xml_string("""
<mujoco>
  <worldbody>
    <light pos="0 0 10"/>
    <geom type="plane" size="10 10 0.1"/>
    <body pos="0 0 1">
      <geom type="sphere" size="0.1"/>
    </body>
  </worldbody>
</mujoco>
""")
data = mujoco.MjData(model)
print("MuJoCo simulation loaded!")

MuJoCo simulation loaded!


In [3]:
import os
os.environ['LD_LIBRARY_PATH'] = '/home/user/.mujoco/mujoco210/bin'

In [4]:
import safety_gymnasium

env = safety_gymnasium.make("SafetyPointGoal1-v0")

Import error. Trying to rebuild mujoco_py.
running build_ext
building 'mujoco_py.cymj' extension
x86_64-linux-gnu-gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O2 -Wall -g -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -g -fwrapv -O2 -g -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wdate-time -D_FORTIFY_SOURCE=3 -fPIC -I/home/user/bachelor/RL/lib/python3.8/site-packages/mujoco_py -I/home/user/.mujoco/mujoco210/include -I/home/user/bachelor/RL/lib/python3.8/site-packages/numpy/core/include -I/home/user/bachelor/RL/include -I/usr/include/python3.8 -c /home/user/bachelor/RL/lib/python3.8/site-packages/mujoco_py/cymj.c -o /home/user/bachelor/RL/lib/python3.8/site-packages/mujoco_py/generated/_pyxbld_2.1.2.14_38_linuxcpuextensionbuilder/temp.linux-x86_64-3.8/home/user/b

# Train an initial agent with PPO and the Goal1 Setting to see how long it takes

In [6]:
from tqdm.notebook import tqdm

In [None]:
import omnisafe

env_id = 'SafetyPointGoal1-v0'
# Train for 100 epochs (default steps is 20.000, so i lowered the total steps from 10.000.000 to 2.000.000)
custom_cfgs = {
    'train_cfgs': {
        'total_steps': 2000000,
    },


}

agent = omnisafe.Agent("PPO", env_id=env_id, custom_cfgs=custom_cfgs)
agent.learn()


KeyboardInterrupt: 

# Train agent with PPOLag and save trajectories to create a dataset

In [1]:
import omnisafe
env_id = 'SafetyPointGoal1-v0'
# Train for 100 epochs (default steps is 20.000, so i lowered the total steps from 10.000.000 to 2.000.000)
custom_cfgs = {
    'train_cfgs': {
        'total_steps': 20000,
    },
    'lagrange_cfgs': {
        'cost_limit' : 25.0 # define cost budget (default is 25)
    }
}

agent = omnisafe.Agent("PPOLag", env_id=env_id, custom_cfgs=custom_cfgs)
reward, cost, epoch_len = agent.learn()

Loading PPOLag.yaml from /home/user/bachelor/RL/lib/python3.8/site-packages/omnisafe/utils/../configs/on-policy/PPOLag.yaml


Output()

Output()

In [None]:
import gymnasium as gym
import torch
import numpy as np
import pickle

from omnisafe.adapter.onpolicy_adapter import OnPolicyAdapter
from omnisafe.models import ActorCriticPPO
from omnisafe.utils.config import Config

# === Lade Config & Environment ===
config_path = 'runs/ppo_lagrangian/SafetyPointGoal1-v0/seed-000/config.yaml'
cfg = Config(config_path)
env = gym.make(cfg.task, render_mode=None)

# === Lade trainiertes Modell ===
model = ActorCriticPPO(cfg.obs_space, cfg.act_space, cfg)
model.load_checkpoint(f"{cfg.log_dir}/torch_save/model.pt")

# === Prepare Adapter ===
adapter = OnPolicyAdapter(env, cfg)

# === Dataset sammeln ===
dataset = []
num_episodes = 20

for ep in range(num_episodes):
    obs, _ = env.reset()
    done = False
    traj = []
    costs = []

    while not done:
        obs_tensor = torch.tensor(obs, dtype=torch.float32).unsqueeze(0)
        action, _, _ = model.explore(obs_tensor)
        action = action.squeeze(0).detach().numpy()

        next_obs, reward, terminated, truncated, info = env.step(action)
        cost = info.get("cost", 0.0)

        traj.append(obs)
        costs.append(cost)
        obs = next_obs
        done = terminated or truncated

    # Expected future cost
    for t in range(len(traj)):
        state = traj[t]
        future_cost = sum(costs[t:])
        dataset.append((state, future_cost))

# === Speichern ===
with open("expected_cost_dataset.pkl", "wb") as f:
    pickle.dump(dataset, f)

print(f"Gespeichert: {len(dataset)} Einträge.")

In [3]:
print(reward, cost, epoch_len)

-0.1470714509487152 77.6500015258789 1000.0
