In [140]:
from omnisafe.models.actor import GaussianLearningActor
import safety_gymnasium
import torch

env = safety_gymnasium.make('SafetyRacecarGoal1-v0')

def create_random_agent(env, hidden_layers=[255,255,255,255], activation='relu', weight_initialization_mode='orthogonal'):
    obs_space = env.observation_space
    act_space = env.action_space
    return GaussianLearningActor(obs_space, act_space, hidden_layers, activation=activation, weight_initialization_mode=weight_initialization_mode)

In [141]:
env.obs_space_dict

Dict('accelerometer': Box(-inf, inf, (3,), float64), 'velocimeter': Box(-inf, inf, (3,), float64), 'gyro': Box(-inf, inf, (3,), float64), 'magnetometer': Box(-inf, inf, (3,), float64), 'goal_lidar': Box(0.0, 1.0, (16,), float64), 'hazards_lidar': Box(0.0, 1.0, (16,), float64), 'vases_lidar': Box(0.0, 1.0, (16,), float64))

In [142]:
env.action_space

Box([-20.          -0.78500003], [20.          0.78500003], (2,), float64)

In [None]:
import numpy as np

def run_trajectory(env, agent, num_data_points=100, cost_window=200, deterministic=True):
    observation, info = env.reset()
    episode_over = False
    data = []
    costs = []
    # gather data
    while not episode_over:
        obs_tensor = torch.from_numpy(observation).float()
        action = agent.predict(obs_tensor, deterministic=True).detach().numpy()
        data.append(np.append(observation, action))
        observation, reward, cost, terminated, truncated, info = env.step(action)
        costs.append(cost)
        episode_over = terminated or truncated
    env.close()
    # pick num_data_points out of the data and calculate cost in the next cost_window steps
    indices = np.random.choice(np.arange(len(data)), size=100)
    chosen_data = np.array(data)[indices]
    labels = []
    for i in indices:
        if i + cost_window >= len(costs):
            labels.append(sum(costs[i:]))
        else:
            labels.append(sum(costs[i:i+cost_window]))
    return chosen_data, np.array(labels)

In [144]:
data, labels = run_trajectory(env, create_random_agent(env))
labels

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [99]:
print(data.shape)

(100, 62)


In [145]:
def generate_dataset(env, amount=1000):
    data = []
    labels = []
    for i in range(amount//100):
        data_i, labels_i = run_trajectory(env, create_random_agent(env))
        data.append(data_i)
        labels.append(labels_i)
    return np.concatenate(data, axis=0), np.concatenate(labels, axis=0)

In [146]:
data, labels = generate_dataset(env)

In [147]:
data

array([[-6.40900740e-04, -2.41544288e-04,  9.80999921e+00, ...,
         0.00000000e+00, -3.47902983e-01, -5.97463608e-01],
       [-8.55559871e-04, -3.28932875e-04,  9.81000000e+00, ...,
         0.00000000e+00, -3.46855611e-01, -4.95012730e-01],
       [-1.14086488e-03, -1.85794782e-04,  9.80999556e+00, ...,
         0.00000000e+00, -3.89784485e-01, -6.53875232e-01],
       ...,
       [-8.03893582e-04,  3.07035883e-04,  9.80999981e+00, ...,
         0.00000000e+00, -7.69861460e-01,  1.09641820e-01],
       [-3.63334463e-04, -1.32780864e-03,  9.81003199e+00, ...,
         0.00000000e+00, -7.71608353e-01,  1.13461465e-01],
       [-1.24598062e-03,  4.59677830e-04,  9.80999980e+00, ...,
         0.00000000e+00, -8.52879584e-01,  9.54091549e-02]])

In [148]:
data.shape

(1000, 62)

In [149]:
labels

array([  0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
         0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
         0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
         0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
         0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
         0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
         0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
         0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
         0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
         0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
         0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
         0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
         0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
         0.,   0.,   0.,   0.,   0.,   0.,   0.,   

In [150]:
labels.shape

(1000,)