# Methods for loading a trained agent provided by Markel

In [5]:
import json
import os

from typing import Any

from gymnasium.spaces import Box
import numpy as np

import torch

from omnisafe.common import Normalizer
from omnisafe.envs.wrapper import ActionRepeat, ActionScale, ObsNormalize, TimeLimit
from omnisafe.models.actor_critic.constraint_actor_q_critic import ConstraintActorQCritic
from omnisafe.utils.config import Config
from omnisafe.envs.core import CMDP, make
from omnisafe.algorithms.model_based.base.ensemble import EnsembleDynamicsModel
from omnisafe.models.actor import ActorBuilder
from typing import Dict, Tuple, Any


def _load_model_and_env(
    save_dir: str,
    model_name: str,
    cfgs: Config,
    env_kwargs: Dict[str, Any],
) -> None:
    """Load the model from the save directory.

    Args:
        save_dir (str): Directory where the model is saved.
        model_name (str): Name of the model.
        env_kwargs (dict[str, Any]): Keyword arguments for the environment.

    Raises:
        FileNotFoundError: If the model is not found.
    """
    # load the saved model
    model_path = os.path.join(save_dir, 'torch_save', model_name)
    try:
        model_params = torch.load(model_path)
    except FileNotFoundError as error:
        raise FileNotFoundError('The model is not found in the save directory.') from error

    # load the environment
    env = make(**env_kwargs)

    observation_space = env.observation_space
    action_space = env.action_space
    if 'Saute' in cfgs['algo'] or 'Simmer' in cfgs['algo']:
        safety_budget = (
            cfgs.algo_cfgs.safety_budget
            * (1 - cfgs.algo_cfgs.saute_gamma**cfgs.algo_cfgs.max_ep_len)
            / (1 - cfgs.algo_cfgs.saute_gamma)
            / cfgs.algo_cfgs.max_ep_len
            * torch.ones(1)
        )
    assert isinstance(observation_space, Box), 'The observation space must be Box.'
    assert isinstance(action_space, Box), 'The action space must be Box.'

    if cfgs['algo_cfgs']['obs_normalize']:
        obs_normalizer = Normalizer(shape=observation_space.shape, clip=5)
        obs_normalizer.load_state_dict(model_params['obs_normalizer'])
        env = ObsNormalize(env, device=torch.device('cpu'), norm=obs_normalizer)
    if env.need_time_limit_wrapper:
        env = TimeLimit(env, device=torch.device('cpu'), time_limit=1000)
    env = ActionScale(env, device=torch.device('cpu'), low=-1.0, high=1.0)

    if hasattr(cfgs['algo_cfgs'], 'action_repeat'):
        env = ActionRepeat(
            env,
            device=torch.device('cpu'),
            times=cfgs['algo_cfgs']['action_repeat'],
        )
    if hasattr(cfgs, 'algo') and cfgs['algo'] in [
        'LOOP',
        'SafeLOOP',
        'PETS',
        'CAPPETS',
        'RCEPETS',
        'CCEPETS',
    ]:
        dynamics_state_space = (
            env.coordinate_observation_space
            if env.coordinate_observation_space is not None
            else env.observation_space
        )
        assert env.action_space is not None and isinstance(
            env.action_space.shape,
            tuple,
        )
        if isinstance(env.action_space, Box):
            action_space = env.action_space
        else:
            raise NotImplementedError
        if cfgs['algo'] in ['LOOP', 'SafeLOOP']:
            actor_critic = ConstraintActorQCritic(
                obs_space=dynamics_state_space,
                act_space=action_space,
                model_cfgs=cfgs.model_cfgs,
                epochs=1,
            )
        if actor_critic is not None:
            actor_critic.load_state_dict(model_params['actor_critic'])
            actor_critic.to('cpu')
        dynamics = EnsembleDynamicsModel(
            model_cfgs=cfgs.dynamics_cfgs,
            device=torch.device('cpu'),
            state_shape=dynamics_state_space.shape,
            action_shape=action_space.shape,
            actor_critic=actor_critic,
            rew_func=None,
            cost_func=env.get_cost_from_obs_tensor,
            terminal_func=None,
        )
        dynamics.ensemble_model.load_state_dict(model_params['dynamics'])
        dynamics.ensemble_model.to('cpu')
        if cfgs['algo'] in ['CCEPETS', 'RCEPETS', 'SafeLOOP']:
            algo_to_planner = {
                'CCEPETS': (
                    'CCEPlanner',
                    {'cost_limit': cfgs['algo_cfgs']['cost_limit']},
                ),
                'RCEPETS': (
                    'RCEPlanner',
                    {'cost_limit': cfgs['algo_cfgs']['cost_limit']},
                ),
                'SafeLOOP': (
                    'SafeARCPlanner',
                    {
                        'cost_limit': cfgs['algo_cfgs']['cost_limit'],
                        'actor_critic': actor_critic,
                    },
                ),
            }
        elif cfgs['algo'] in ['PETS', 'LOOP']:
            algo_to_planner = {
                'PETS': ('CEMPlanner', {}),
                'LOOP': ('ARCPlanner', {'actor_critic': actor_critic}),
            }
        elif cfgs['algo'] in ['CAPPETS']:
            lagrange: torch.nn.Parameter = torch.nn.Parameter(
                model_params['lagrangian_multiplier'].to('cpu'),
                requires_grad=False,
            )
            algo_to_planner = {
                'CAPPETS': (
                    'CAPPlanner',
                    {
                        'cost_limit': cfgs['lagrange_cfgs']['cost_limit'],
                        'lagrange': lagrange,
                    },
                ),
            }
        planner_name = algo_to_planner[cfgs['algo']][0]
        planner_special_cfgs = algo_to_planner[cfgs['algo']][1]
        planner_cls = globals()[f'{planner_name}']
        planner = planner_cls(
            dynamics=dynamics,
            planner_cfgs=cfgs.planner_cfgs,
            gamma=float(cfgs.algo_cfgs.gamma),
            cost_gamma=float(cfgs.algo_cfgs.cost_gamma),
            dynamics_state_shape=dynamics_state_space.shape,
            action_shape=action_space.shape,
            action_max=1.0,
            action_min=-1.0,
            device='cpu',
            **planner_special_cfgs,
        )

    else:
        if 'Saute' in cfgs['algo'] or 'Simmer' in cfgs['algo']:
            observation_space = Box(
                low=np.hstack((observation_space.low, -np.inf)),
                high=np.hstack((observation_space.high, np.inf)),
                shape=(observation_space.shape[0] + 1,),
            )
        actor_type = cfgs['model_cfgs']['actor_type']
        pi_cfg = cfgs['model_cfgs']['actor']
        weight_initialization_mode = cfgs['model_cfgs']['weight_initialization_mode']
        actor_builder = ActorBuilder(
            obs_space=observation_space,
            act_space=action_space,
            hidden_sizes=pi_cfg['hidden_sizes'],
            activation=pi_cfg['activation'],
            weight_initialization_mode=weight_initialization_mode,
        )
        actor = actor_builder.build_actor(actor_type)
        actor.load_state_dict(model_params['pi'])

    return env, actor


def _load_cfgs(save_dir):
    cfg_path = os.path.join(save_dir, 'config.json')
    try:
        with open(cfg_path, encoding='utf-8') as file:
            kwargs = json.load(file)
    except FileNotFoundError as error:
        raise FileNotFoundError(
            f'The config file is not found in the save directory{save_dir}.',
        ) from error
    return Config.dict2config(kwargs)


# LOG_DIR should contain two things:
# 1. config.json
# 2. torch_save/{model_name}
#
# model_name usually looks like 'epoch-500.pt'
def load_guide(save_dir, model_name) -> Tuple[CMDP, ConstraintActorQCritic]:
    cfgs = _load_cfgs(save_dir)

    env_kwargs = {
        'env_id': cfgs['env_id'],
        'num_envs': 1,
    }

    env, actor = _load_model_and_env(save_dir, model_name, cfgs, env_kwargs)
    return env, actor


# Generating a dataset

In [6]:
from omnisafe.models.actor import GaussianLearningActor
import safety_gymnasium
import torch

env = safety_gymnasium.make('SafetyPointGoal1-v0')

def create_random_agent(env, hidden_layers=[255,255,255,255], activation='relu', weight_initialization_mode='orthogonal'):
    obs_space = env.observation_space
    act_space = env.action_space
    return GaussianLearningActor(obs_space, act_space, hidden_layers, activation=activation, weight_initialization_mode=weight_initialization_mode)

In [36]:
env.obs_space_dict.keys()

KeysView(Dict('accelerometer': Box(-inf, inf, (3,), float64), 'velocimeter': Box(-inf, inf, (3,), float64), 'gyro': Box(-inf, inf, (3,), float64), 'magnetometer': Box(-inf, inf, (3,), float64), 'goal_lidar': Box(0.0, 1.0, (16,), float64), 'hazards_lidar': Box(0.0, 1.0, (16,), float64), 'vases_lidar': Box(0.0, 1.0, (16,), float64)))

In [8]:
env.action_space

Box(-1.0, 1.0, (2,), float64)

In [9]:
import numpy as np
# old method
def run_trajectory(env, agent, num_data_points=100, cost_window=200, deterministic=True):
    observation, info = env.reset()
    episode_over = False
    data = []
    costs = []
    # gather data
    while not episode_over:
        obs_tensor = torch.from_numpy(observation).float()
        action = agent.predict(obs_tensor, deterministic=True).detach().numpy()
        data.append(np.append(observation, action))
        observation, reward, cost, terminated, truncated, info = env.step(action)
        costs.append(cost)
        episode_over = terminated or truncated
    env.close()
    # pick num_data_points out of the data and calculate cost in the next cost_window steps
    indices = np.random.choice(np.arange(len(data)), size=100)
    chosen_data = np.array(data)[indices]
    labels = []
    for i in indices:
        if i + cost_window >= len(costs):
            labels.append(sum(costs[i:]))
        else:
            labels.append(sum(costs[i:i+cost_window]))
    return chosen_data, np.array(labels)

In [10]:
import numpy as np

def run_trajectory(env, agent, safe_agent, probability_for_datapoint=0.02, cost_window=200, deterministic=True):
    observation, info = env.reset()
    episode_over = False
    is_sampling = False
    sampled_cost = 0
    sampling_step = 0
    data = []
    labels = []
    # gather data
    while not episode_over:
        if is_sampling:
            obs_tensor = torch.from_numpy(observation).float()
            action = safe_agent.predict(obs_tensor, deterministic=deterministic).detach().numpy()
            observation, reward, cost, terminated, truncated, info = env.step(action)
            episode_over = terminated or truncated
            sampled_cost += cost
            sampling_step += 1
            if sampling_step >= cost_window or episode_over:
                labels.append(sampled_cost)
                sampled_cost = 0
                is_sampling = False
        else:
            obs_tensor = torch.from_numpy(observation).float()
            action = agent.predict(obs_tensor, deterministic=deterministic).detach().numpy()
            if np.random.random()<probability_for_datapoint:
                # begin sampling with the safe agent
                data.append(np.append(observation, action))
                is_sampling = True
            observation, reward, cost, terminated, truncated, info = env.step(action)
            episode_over = terminated or truncated
    env.close()
    return np.array(data), np.array(labels)

In [11]:
# load my saved trained agent
safe_agent = load_guide("/home/user/bachelor/runs/PPOLag-{SafetyPointGoal1-v0}/seed-000-2025-05-13-17-51-08", "epoch-50.pt")[1]

  model_params = torch.load(model_path)


In [12]:
data, labels = run_trajectory(env, safe_agent, create_random_agent(env))
labels

array([23.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.])

In [13]:
data[:,0]

array([ 1.54310225,  2.89740133,  2.21179994,  1.296835  ,  2.30041678,
        2.20122368,  2.30978913,  0.33889688,  1.89279425, -2.97632192,
        2.08815778,  2.35499132, -3.26334542,  2.50058964, -2.82153629,
        2.70719839, -3.11423346, -3.11652612,  2.68679263])

In [14]:
len(data)

19

In [15]:
def generate_dataset(env, safe_agent, amount=1000):
    data = []
    labels = []
    while(sum(len(arr) for arr in data)<amount):
        print(sum(len(arr) for arr in data))
        data_i, labels_i = run_trajectory(env, safe_agent, create_random_agent(env))
        data.append(data_i)
        labels.append(labels_i)
    return np.concatenate(data, axis=0)[:amount], np.concatenate(labels, axis=0)[:amount]

In [16]:
data, labels = generate_dataset(env, safe_agent)

0
11
24
37
48
66
84
97
113
135
150
164
186
195
211
229
242
256
269
284
306
319
331
346
358
374
392
406
420
436
449
465
483
498
509
528
548
563
575
591
602
616
640
664
678
693
707
723
737
756
777
794
805
826
845
862
880
891
905
919
938
955
969
986
998


In [17]:
data

array([[ -3.38571269, -11.34621739,   9.81      , ...,   0.        ,
         -0.35020441,   0.1198318 ],
       [ -3.08165972, -13.19071865,   9.81      , ...,   0.        ,
         -0.45800877,   0.4303678 ],
       [ -4.48417682,   0.02711463,   9.81      , ...,   0.        ,
          0.29304135,   0.94153619],
       ...,
       [ -3.07680352,  -6.43339053,   9.81      , ...,   0.        ,
         -0.56834316,   0.41316915],
       [ -3.12711787, -12.70236795,   9.81      , ...,   0.        ,
         -0.28757346,   0.25157705],
       [  3.57710757,  -0.59030854,   9.81      , ...,   0.        ,
          0.57756644,   0.563375  ]])

In [18]:
data.shape

(1000, 62)

In [19]:
labels

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  1.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,
        0.,  1.,  1.,  0.,  0.,  0., 82.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0., 31.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0., 12.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0

In [20]:
labels.shape

(1000,)

# Preprocessing the dataset for better NN performance

In [3]:
%pip install scikit-learn scipy pandas

Collecting scikit-learn
  Downloading scikit_learn-1.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting scipy
  Downloading scipy-1.10.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (58 kB)
Collecting joblib>=1.1.1 (from scikit-learn)
  Downloading joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=2.0.0 (from scikit-learn)
  Downloading threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.1/11.1 MB[0m [31m12.4 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading scipy-1.10.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m34.5/34.5 MB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading joblib-1.4.2-py3-none-any.whl (301 k

In [37]:
obs_column_names = ['accelerometer0', 'accelerometer1', 'accelerometer2', 'velocimeter0', 'velocimeter1', 'velocimeter2', 'gyro0', 'gyro1', 'gyro2', 'magnetometer0', 'magnetometer1', 'magnetometer2']
for key in ['goal_lidar', 'hazards_lidar', 'vases_lidar']:
    for i in range(16):
        obs_column_names.append(key+str(i))
obs_column_names.append("action0")
obs_column_names.append("action1")
len(obs_column_names)

62

In [40]:
import pandas as pd
df = pd.DataFrame(data=data, columns=obs_column_names)
df['exp_cost'] = labels
df.head()

Unnamed: 0,accelerometer0,accelerometer1,accelerometer2,velocimeter0,velocimeter1,velocimeter2,gyro0,gyro1,gyro2,magnetometer0,...,vases_lidar9,vases_lidar10,vases_lidar11,vases_lidar12,vases_lidar13,vases_lidar14,vases_lidar15,action0,action1,exp_cost
0,-3.385713,-11.346217,9.81,0.257907,-0.381378,0.0,0.0,-0.0,-0.336429,-0.356442,...,0.835229,0.585476,0.0,0.0,0.0,0.0,0.0,-0.350204,0.119832,0.0
1,-3.08166,-13.190719,9.81,0.101284,0.263441,0.0,-0.0,0.0,-0.477036,0.044698,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.458009,0.430368,0.0
2,-4.484177,0.027115,9.81,0.90469,-0.379274,0.0,0.0,0.0,2.791862,0.468844,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.293041,0.941536,0.0
3,1.259589,-16.137908,9.81,0.872087,-0.289942,0.0,0.0,0.0,-1.601928,0.437584,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.451446,0.85403,0.0
4,-3.883219,0.193685,9.81,0.601074,-0.207702,0.0,0.0,0.0,2.935188,-0.499633,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.143386,1.035945,0.0


In [42]:
df.describe()

Unnamed: 0,accelerometer0,accelerometer1,accelerometer2,velocimeter0,velocimeter1,velocimeter2,gyro0,gyro1,gyro2,magnetometer0,...,vases_lidar9,vases_lidar10,vases_lidar11,vases_lidar12,vases_lidar13,vases_lidar14,vases_lidar15,action0,action1,exp_cost
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,...,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,0.253579,-0.130024,9.81,0.403071,-0.125936,0.0,0.0,0.0,0.650656,0.021647,...,0.03332,0.029037,0.025669,0.020083,0.01348,0.014244,0.019568,0.23928,0.235761,0.635
std,2.658362,7.680362,2.846369e-15,0.248428,0.172703,0.0,0.0,0.0,2.01403,0.349448,...,0.12144,0.114966,0.112751,0.09262,0.074695,0.07329,0.099117,0.60804,0.802438,4.776043
min,-5.489688,-16.904864,9.81,-1.198794,-0.88481,0.0,0.0,0.0,-3.02453,-0.499999,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.090503,-1.850756,0.0
25%,-3.151098,-5.475194,9.81,0.243816,-0.169552,0.0,0.0,0.0,-0.476922,-0.334823,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.365954,0.059218,0.0
50%,1.83279,0.311742,9.81,0.397694,-0.098705,0.0,-0.0,0.0,1.235234,0.059375,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.460624,0.473361,0.0
75%,2.247297,1.237658,9.81,0.557229,-0.032962,0.0,0.0,0.0,2.335685,0.355685,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.748922,0.710377,0.0
max,5.37184,18.630952,9.81,1.385797,0.725624,0.0,0.0,0.0,3.001426,0.5,...,0.907548,0.914774,0.931628,0.911094,0.933788,0.91373,0.911663,1.274909,1.735126,82.0


In [48]:
# Seperate the dataset into data and label again
X = df.drop(columns=["exp_cost"])
y = df.exp_cost

In [51]:
# Scaling the data with MinMaxScaler
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler((0,1))
scaled = scaler.fit_transform(X)
scaled_X = pd.DataFrame(scaled, columns=obs_column_names)
scaled_X.head()

Unnamed: 0,accelerometer0,accelerometer1,accelerometer2,velocimeter0,velocimeter1,velocimeter2,gyro0,gyro1,gyro2,magnetometer0,...,vases_lidar8,vases_lidar9,vases_lidar10,vases_lidar11,vases_lidar12,vases_lidar13,vases_lidar14,vases_lidar15,action0,action1
0,0.193709,0.156424,0.5,0.56361,0.312606,0.0,0.0,0.0,0.446087,0.143557,...,0.279264,0.920314,0.640023,0.0,0.0,0.0,0.0,0.0,0.312968,0.549541
1,0.221703,0.104518,0.0,0.503011,0.713007,0.0,0.0,0.0,0.422753,0.544698,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.267393,0.63614
2,0.092576,0.476476,0.5,0.813856,0.313913,0.0,0.0,0.0,0.965223,0.968844,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.584906,0.77869
3,0.621393,0.021583,0.5,0.801242,0.369383,0.0,0.0,0.0,0.236079,0.937584,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.270167,0.754288
4,0.147905,0.481164,0.5,0.696384,0.420451,0.0,0.0,0.0,0.989008,0.000366,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.521638,0.805018


In [52]:
# Divide data into train, test and validation set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(scaled_X, y, test_size=0.2)
X_train, X_validate, y_train, y_validate = train_test_split(X_train, y_train, test_size=0.2)

In [53]:
X_train.shape

(640, 62)

In [55]:
X_validate.shape

(160, 62)

In [54]:
X_test.shape

(200, 62)

# Building a model 

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

# Training a neural network on the dataset

# Evaluation of the model