# OmniSafe Basics

This notebook is used to test how various parts of how the omnisafe library works.

In [20]:
import torch
import omnisafe
from omnisafe.utils.model import build_mlp_network
import json

In [24]:
# epoch-0.pt is a saved policy from omnisafe - saved as a dict of paramters
model_params = torch.load('epoch-0.pt')
print(model_params.keys())

dict_keys(['pi', 'obs_normalizer'])


  model_params = torch.load('epoch-0.pt')


In [18]:
# it comes with a json file detailing the model config
with open('config.json') as json_file:
    config = json.load(json_file)

print(config.keys())
model_config = config['model_cfgs']
print(model_config.keys())
actor_config = model_config['actor']
print(actor_config)

dict_keys(['seed', 'train_cfgs', 'algo_cfgs', 'logger_cfgs', 'model_cfgs', 'exp_name', 'env_id', 'algo'])
dict_keys(['weight_initialization_mode', 'actor_type', 'linear_lr_decay', 'exploration_noise_anneal', 'std_range', 'actor', 'critic'])
{'hidden_sizes': [64, 64], 'activation': 'tanh', 'lr': 0.0003}


In [47]:
# Policy is a dict of parameters
policy_params = model_params['pi'].copy()
print(policy_params.keys())

odict_keys(['log_std', 'mean.0.weight', 'mean.0.bias', 'mean.2.weight', 'mean.2.bias', 'mean.4.weight', 'mean.4.bias'])


In [4]:
# log_stds
print(policy_params['log_std'].size())

torch.Size([2])


In [5]:
# first hidden layer W and b
print(policy_params['mean.0.weight'].size())
print(policy_params['mean.0.bias'].size())

torch.Size([64, 60])
torch.Size([64])


In [6]:
# second hidden layer W and b
print(policy_params['mean.2.weight'].size())
print(policy_params['mean.2.bias'].size())

torch.Size([64, 64])
torch.Size([64])


In [19]:
# output layer W and b
print(policy_params['mean.4.weight'].size())
print(policy_params['mean.4.bias'].size())

torch.Size([2, 64])
torch.Size([2])


In [48]:
# To recreate the policy, we must instantiate the model and then load in the parameters

# we would be given the obs and act dims
obs_dim = 60
act_dim = 2

mean_base: torch.nn.Module = build_mlp_network(
    sizes=[obs_dim, *actor_config['hidden_sizes'], act_dim],
    activation=actor_config['activation'],
    weight_initialization_mode='kaiming_uniform',
)

log_std_base = torch.nn.Parameter(policy_params.pop('log_std'), requires_grad=False)
print(log_std_base)

policy_params = {k.replace('mean.', ''):policy_params[k] for k in policy_params}
print(policy_params.keys())

mean_base.load_state_dict(policy_params)
mean_base.requires_grad_(False)
print(mean_base)

Parameter containing:
tensor([0., 0.])
dict_keys(['0.weight', '0.bias', '2.weight', '2.bias', '4.weight', '4.bias'])
Sequential(
  (0): Linear(in_features=60, out_features=64, bias=True)
  (1): Tanh()
  (2): Linear(in_features=64, out_features=64, bias=True)
  (3): Tanh()
  (4): Linear(in_features=64, out_features=2, bias=True)
  (5): Identity()
)


In [63]:
# We need to somehow load this in through the omnisafe system
# The easiest way would be to use configs:
custom_cfgs = {
    'model_cfgs': {
        'actor_type': 'projected_gaussian_learning',
        'cp_beta': 1.1,
        'base_policy_dir': 'tests/saved_source/PPO-{SafetyPointGoal1-v0}/seed-000-2023-03-16-12-08-52',
    },
}
# Assume the base policy dir is relative to the main omnisafe folder
# The loading will then get done from within the projected gaussian learning actor class

In [64]:
print(custom_cfgs)

{'model_cfgs': {'actor_type': 'projected_gaussian_learning', 'cp_beta': 1.1, 'base_policy_dir': 'tests/saved_source/PPO-{SafetyPointGoal1-v0}/seed-000-2023-03-16-12-08-52'}}


In [None]:
# Don't try to run this, doesn't work yet
agent = omnisafe.Agent('PPO', 'SafetyPointGoal1-v0', custom_cfgs=custom_cfgs)

In [58]:
x = ''
y = 'f'
print(bool(x))
print(bool(y))

False
True


In [61]:
assert x, "string should be non-empty"

AssertionError: string should be non-empty

In [62]:
assert y, "string should be non-empty"