In [1]:
import os
from pathlib import Path
project_root = os.path.join(str(Path.home()), 'diffusion_models')
os.chdir(project_root)
%pwd # should be PPGA root dir

'/home/sumeet/diffusion_models'

In [2]:
import pickle
import torch
import numpy as np

from autoencoders.policy.resnet3d import ResNet3DAutoEncoder
from autoencoders.policy.hypernet import HypernetAutoEncoder
from attrdict import AttrDict
from RL.actor_critic import Actor
from envs.brax_custom.brax_env import make_vec_env_brax
from IPython.display import HTML, Image
from IPython.display import display
from brax.io import html, image
from dataset.policy_dataset import preprocess_model, postprocess_model

In [3]:
# params to config
device = torch.device('cuda')
env_name = 'halfcheetah'
seed = 1111
normalize_obs = True
normalize_rewards = False
obs_shape = 18
action_shape = 6
mlp_shape = (128, 128, 6)

env_cfg = AttrDict({
    'env_name': env_name,
    'env_batch_size': None,
    'num_dims': 2,
    'seed': seed,
    'num_envs': 1
})

In [4]:
archive_df_path = 'data/archive_100x100_seed1111.pkl'
with open(archive_df_path, 'rb') as f:
    archive_df = pickle.load(f)

scheduler_path = 'data/scheduler_100x100_seed1111.pkl'
with open(scheduler_path, 'rb') as f:
    scheduler = pickle.load(f)



In [5]:
# make the env
env = make_vec_env_brax(env_cfg)

2023-03-21 16:58:27.256407: W external/org_tensorflow/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc:497] The NVIDIA driver's CUDA version is 11.8 which is older than the ptxas CUDA version (12.0.76). Because the driver is older than the ptxas version, XLA is disabling parallel compilation, which may slow down compilation. You should update your NVIDIA driver or use the NVIDIA-provided CUDA forward compatibility packages.


In [6]:
def get_best_elite():
    best_elite = scheduler.archive.best_elite
    agent = Actor(obs_shape, action_shape, True, True).deserialize(best_elite.solution).to(device)
    if normalize_obs:
        agent.obs_normalizer = best_elite.metadata['obs_normalizer']
    return agent

In [7]:
def get_random_elite():
    elite = scheduler.archive.sample_elites(1)
    agent = Actor(obs_shape, action_shape, True, True).deserialize(elite.solution_batch.flatten()).to(device)
    if normalize_obs:
        agent.obs_normalizer = elite.metadata_batch[0]['obs_normalizer']
    return agent

In [8]:
def enjoy_brax(agent, render=True, deterministic=True):
    if normalize_obs:
        obs_mean, obs_var = agent.obs_normalizer.obs_rms.mean, agent.obs_normalizer.obs_rms.var
        print(f'{obs_mean=}')

    obs = env.reset()
    rollout = [env.unwrapped._state]
    total_reward = 0
    measures = torch.zeros(env_cfg.num_dims).to(device)
    done = False
    while not done:
        with torch.no_grad():
            obs = obs.unsqueeze(dim=0).to(device)
            if normalize_obs:
                obs = (obs - obs_mean) / torch.sqrt(obs_var + 1e-8)

            if deterministic:
                act = agent.actor_mean(obs)
            else:
                act, _, _ = agent.get_action(obs)
            act = act.squeeze()
            obs, rew, done, info = env.step(act.cpu())
            measures += info['measures']
            rollout.append(env.unwrapped._state)
            total_reward += rew
    if render:
        i = HTML(html.render(env.unwrapped._env.sys, [s.qp for s in rollout]))
        display(i)
    print(f'{total_reward=}')
    print(f' Rollout length: {len(rollout)}')
    measures /= len(rollout)
    print(f'Measures: {measures.cpu().numpy()}')
    return total_reward.detach().cpu().numpy()

In [10]:
agent = get_random_elite()
# make sure pre and post-processing are working correctly. This should return
# the exact same agent as the previous line
# agent = postprocess_model(agent, preprocess_model(agent, mlp_shape), mlp_shape, deterministic=False).to(device)
enjoy_brax(agent, render=False)

obs_mean=tensor([ 0.4838,  0.8245,  0.0529, -0.0847,  0.3163, -0.1195, -0.2334, -0.4301,
        -0.2169,  4.5125, -0.0123,  0.0546, -0.2033,  0.2391, -0.1372,  0.0401,
        -0.1360,  0.1825], device='cuda:0')
total_reward=tensor(1293.6501, device='cuda:0')
 Rollout length: 1001
Measures: [0.7112887  0.15584415]


array(1293.6501, dtype=float32)

In [11]:
# load the VAE model
autoencoder_cp_path = 'checkpoints/autoencoder.pt'
vae_model = HypernetAutoEncoder(emb_channels=8, z_channels=4)
vae_model.load_state_dict(torch.load(autoencoder_cp_path))
vae_model.to(device)

HypernetAutoEncoder(
  (quant_conv): Conv2d(8, 16, kernel_size=(1, 1), stride=(1, 1))
  (post_quant_conv): Conv2d(8, 4, kernel_size=(1, 1), stride=(1, 1))
  (encoder): ModelEncoder(
    (cnns): ModuleDict(
      (actor_logstd): Sequential(
        (fc1): Linear(in_features=6, out_features=256, bias=True)
        (relu1): ReLU(inplace=True)
        (fc2): Linear(in_features=256, out_features=256, bias=True)
        (relu2): ReLU(inplace=True)
        (fc3): Linear(in_features=256, out_features=256, bias=True)
        (relu3): ReLU(inplace=True)
      )
      (actor_mean_0_weight): Sequential(
        (cnn_block_0): Sequential(
          (conv0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (batchnorm0): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu0): ReLU(inplace=True)
          (maxpool_0): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        )
        (cnn_block_1): Sequentia

In [12]:
# get the policy input tensor
policy_tensor = preprocess_model(agent, (128, 128, 6)).to(device).unsqueeze(dim=0)
policy_tensor.shape

torch.Size([1, 1, 6, 128, 128])

In [13]:
# get the policy weights dict for shank's ghn based VAE
input_weights_dict = {}
for key, param in agent.named_parameters():
    if 'weight' in key or 'bias' in key or 'logstd' in key:
        input_weights_dict[key] = param.unsqueeze(0)

In [14]:
# get the reconstructed model
out, _ = vae_model(input_weights_dict)

# this is the 'policy as a tensor' way of doing reconstruction
# model_in = Actor(obs_shape, action_shape, True, True).to(device)
# rec_agent = postprocess_model(model_in, out, (128, 128, 6), deterministic=False)
# rec_agent.obs_normalizer = agent.obs_normalizer
# rec_agent.to(device)

# this is the 'weights dict -> Actor' method of reconstruction i.e. out is already an Actor object
rec_agent = out[0]
rec_agent.obs_normalizer = agent.obs_normalizer
rec_agent.to(device)

  if param.grad is not None:


Actor(
  (actor_mean): Sequential(
    (0): Linear(in_features=18, out_features=128, bias=True)
    (1): Tanh()
    (2): Linear(in_features=128, out_features=128, bias=True)
    (3): Tanh()
    (4): Linear(in_features=128, out_features=6, bias=True)
  )
  (obs_normalizer): NormalizeObservation(
    (obs_rms): RunningMeanStd()
  )
)

In [18]:

enjoy_brax(rec_agent, render=True, deterministic=True)

obs_mean=tensor([ 0.4838,  0.8245,  0.0529, -0.0847,  0.3163, -0.1195, -0.2334, -0.4301,
        -0.2169,  4.5125, -0.0123,  0.0546, -0.2033,  0.2391, -0.1372,  0.0401,
        -0.1360,  0.1825], device='cuda:0')


total_reward=tensor(1750.8484, device='cuda:0')
 Rollout length: 1001
Measures: [0.65434563 0.19280718]


array(1750.8484, dtype=float32)