In [None]:
%load_ext autoreload
%autoreload 2

from gflower.config.flow_matching import FlowMatchingEvaluationConfig
from gflower.datasets.sequence import SequenceDataset
from gflower.models_flow.flow_policy import FlowPolicy
from gflower.models_flow.transformer import TransformerFlow
from gflower.models_value.transformer import Transformer as ValueTransformer
import torch
import os
import tqdm
from gflower.config.flow_matching import TransformerConfig
import matplotlib.pyplot as plt
import numpy as np

# get dataset only for its normalizer
ENV_NAME = "hopper-medium-expert-v2"
STATE_DIM = 11
ACTION_DIM = 3
HORIZON = 20
GUIDE_SCALE = 0.05 # NOTE: when this is small, MC is more accurate. When it is increased, MC distribution saturates at some point.


In [None]:

dataset = SequenceDataset(
    env=ENV_NAME,
    horizon=20,
    normalizer="GaussianNormalizer",
    preprocess_fns=[],
    max_path_length=100000,
    max_n_episodes=100000,
    termination_penalty=0,
    seed=0,
)

def prepare_evaluation(cfg: FlowMatchingEvaluationConfig):
    # get policy

    normalizer = dataset.normalizer

    flow_transformer = TransformerFlow(
        seq_len=cfg.horizon,
        in_channels=cfg.state_dim + cfg.action_dim,
        out_channels=cfg.state_dim + cfg.action_dim,
        hidden_size=cfg.transformer_config.hidden_size,
        depth=cfg.transformer_config.depth,
        num_heads=cfg.transformer_config.num_heads,
        mlp_ratio=cfg.transformer_config.mlp_ratio,
        x_emb_proj=cfg.transformer_config.x_emb_proj,
        x_emb_proj_conv_k=cfg.transformer_config.x_emb_proj_conv_k,
    ).to(cfg.device)
    flow_transformer.load_state_dict(torch.load(os.path.join(
        cfg.log_folder, cfg.env, 'flow', cfg.flow_exp_name, f'model_ema_{cfg.flow_cp}.pth'
    )))

    # get value model
    if cfg.guidance_method not in ['no']:
        value_model = ValueTransformer(
            input_dim=cfg.state_dim + cfg.action_dim,
            output_dim=1,
            model_dim=cfg.value_transformer_config.model_dim,
            num_heads=cfg.value_transformer_config.num_heads,
            num_layers=cfg.value_transformer_config.num_layers,
            dropout=cfg.value_transformer_config.dropout,
        ).to(cfg.device)
        value_model.load_state_dict(torch.load(os.path.join(
            cfg.log_folder, cfg.env, 'value', cfg.value_exp_name, f'model_{cfg.value_cp}.pth'
        )))
    else:
        value_model = None

    # get learned guidance model
    if cfg.guidance_method == 'guidance_matching':
        guide_model = TransformerFlow(
            seq_len=cfg.horizon,
            in_channels=cfg.state_dim + cfg.action_dim,
            out_channels=(cfg.state_dim + cfg.action_dim) if cfg.guide_matching_type != 'grad_z' else 1,
            hidden_size=cfg.guide_model_transformer_config.hidden_size,
            depth=cfg.guide_model_transformer_config.depth,
            num_heads=cfg.guide_model_transformer_config.num_heads,
            mlp_ratio=cfg.guide_model_transformer_config.mlp_ratio,
            x_emb_proj=cfg.guide_model_transformer_config.x_emb_proj,
            x_emb_proj_conv_k=cfg.guide_model_transformer_config.x_emb_proj_conv_k,
        ).to(cfg.device)
        if cfg.guide_matching_type != 'grad_z':
            guide_model.load_state_dict(torch.load(os.path.join(
                cfg.log_folder, cfg.env, 'guidance', cfg.guide_model_exp_name, f'model_{cfg.guide_matching_type}_{cfg.guide_model_cp}.pth'
            )))
        else:
            guide_model.load_state_dict(torch.load(os.path.join(
                cfg.log_folder, cfg.env, 'guidance', cfg.guide_model_exp_name, f'model_z_{cfg.guide_model_cp}.pth'
            )))
    else:
        guide_model = None
    
    flow_policy = FlowPolicy(
        flow_model=flow_transformer,
        value_model=value_model,
        guide_model=guide_model,
        normalizer=normalizer,
        action_dim=cfg.action_dim,
        state_dim=cfg.state_dim,
        horizon=cfg.horizon,
        cfg=cfg
    )
    return dataset.env, flow_policy



### Unconditioned

In [None]:

from run.utils import deterministic


deterministic(1)
cfg = FlowMatchingEvaluationConfig(
    device='cuda:0',
    seed=0,
    random_repeat=5,
    exp_name="ablation_study",
    log_folder="../logs",
    env=ENV_NAME,
    state_dim=STATE_DIM,
    action_dim=ACTION_DIM,
    horizon=HORIZON,
    flow_exp_name="H20_1e6steps",
    flow_cp=19,
    flow_matching_type="cfm",
    value_exp_name="H20_inf",
    value_cp=2,
    ode_t_steps=30,
    guidance_method="guidance_matching",
    guide_matching_type="grad_z",
    guide_scale=0,
    guide_model_exp_name=f"H20_scale_1.0_v3",
    guide_model_cp=2,
    guide_inference_scale=0,
    guide_model_transformer_config=TransformerConfig(
        depth=4,
        num_heads=4,
        hidden_size=64
    )
)

env, policy = prepare_evaluation(cfg)
# simulate 1 step
observation = env.reset()
state = env.state_vector().copy()
conditions = {0: observation}
action, samples = policy(conditions, batch_size=2048) # this policy is "replan-1" in CL_DiffPhyCon

unconditioned = samples.values[:, -1, 0].detach().cpu().numpy()


### $\nabla_{x_t} \log Z_t$

In [None]:

cfg = FlowMatchingEvaluationConfig(
    device='cuda:0',
    seed=0,
    random_repeat=5,
    exp_name="ablation_study",
    log_folder="../logs",
    env=ENV_NAME,
    state_dim=STATE_DIM,
    action_dim=ACTION_DIM,
    horizon=HORIZON,
    flow_exp_name="H20_1e6steps",
    flow_cp=19,
    flow_matching_type="cfm",
    value_exp_name="H20_inf",
    value_cp=2,
    ode_t_steps=30,
    guidance_method="guidance_matching",
    guide_matching_type="grad_z",
    guide_scale=1.0,
    guide_model_exp_name=f"H20_scale_10.0_v3",
    guide_model_cp=2,
    guide_inference_scale=100.0,
    guide_model_transformer_config=TransformerConfig(
        depth=4,
        num_heads=4,
        hidden_size=64
    )
)
env, policy = prepare_evaluation(cfg)
# simulate 1 step
observation = env.reset()
state = env.state_vector().copy()
conditions = {0: observation}
action, samples = policy(conditions, batch_size=1024) # this policy is "replan-1" in CL_DiffPhyCon

values_logz = samples.values[:, -1, 0].detach().cpu().numpy()


### Grad xt x1

In [4]:

cfg = FlowMatchingEvaluationConfig(
    device='cuda:0',
    seed=0,
    random_repeat=5,
    exp_name="ablation_study",
    log_folder="../logs",
    env=ENV_NAME,
    state_dim=STATE_DIM,
    action_dim=ACTION_DIM,
    horizon=HORIZON,
    flow_exp_name="H20_1e6steps",
    flow_cp=19,
    flow_matching_type="cfm",
    value_exp_name="H20_inf",
    value_cp=2,
    ode_t_steps=30,
    guidance_method="gradient",
    grad_compute_at="x_1",
    grad_wrt="x_t",
    grad_schedule="cosine_decay",
    grad_scale=2,
)
env, policy = prepare_evaluation(cfg)
# simulate 1 step
observation = env.reset()
state = env.state_vector().copy()
conditions = {0: observation}
action, samples = policy(conditions, batch_size=1024) # this policy is "replan-1" in CL_DiffPhyCon

grad_xt_x1 = samples.values[:, -1, 0].detach().cpu().numpy()


### Grad x1 x1

In [5]:

cfg = FlowMatchingEvaluationConfig(
    device='cuda:0',
    seed=0,
    random_repeat=5,
    exp_name="ablation_study",
    log_folder="../logs",
    env=ENV_NAME,
    state_dim=STATE_DIM,
    action_dim=ACTION_DIM,
    horizon=HORIZON,
    flow_exp_name="H20_1e6steps",
    flow_cp=19,
    flow_matching_type="cfm",
    value_exp_name="H20_inf",
    value_cp=2,
    ode_t_steps=30,
    guidance_method="gradient",
    grad_compute_at="x_1",
    grad_wrt="x_1",
    grad_schedule="cosine_decay",
    grad_scale=1,
)
env, policy = prepare_evaluation(cfg)
# simulate 1 step
observation = env.reset()
state = env.state_vector().copy()
conditions = {0: observation}
action, samples = policy(conditions, batch_size=1024) # this policy is "replan-1" in CL_DiffPhyCon

grad_x1_x1 = samples.values[:, -1, 0].detach().cpu().numpy()


In [6]:
# cuda clear cache
torch.cuda.empty_cache()

### MC

In [None]:
deterministic(0)
mc_all = np.array([])
for _ in tqdm.tqdm(range(256 // 64)):
    cfg = FlowMatchingEvaluationConfig(
        device='cuda:0',
        seed=0,
        random_repeat=5,
        exp_name="ablation_study",
        log_folder="../logs",
        env=ENV_NAME,
        state_dim=STATE_DIM,
        action_dim=ACTION_DIM,
        horizon=HORIZON,
        flow_exp_name="H20_1e6steps",
        flow_cp=19,
        flow_matching_type="cfm",
        value_exp_name="H20_inf",
        value_cp=2,
        ode_t_steps=30,
        guidance_method="mc",
        mc_batch_size=256,
        mc_scale=GUIDE_SCALE,
        mc_ep=1e-2,
        mc_ss=1,
        mc_self_normalize=False, # we open this in actual experiment for more stable performance, but False is exat MC guidance
    )
    env, policy = prepare_evaluation(cfg)
    # simulate 1 step
    observation = env.reset()
    state = env.state_vector().copy()
    conditions = {0: observation}
    action, samples = policy(conditions, batch_size=64) # this policy is "replan-1" in CL_DiffPhyCon

    mc = samples.values
    mc_all = np.append(mc_all, mc)


### Guidance Matching

In [99]:

cfg = FlowMatchingEvaluationConfig(
    device='cuda:0',
    seed=0,
    random_repeat=5,
    exp_name="ablation_study",
    log_folder="../logs",
    env=ENV_NAME,
    state_dim=STATE_DIM,
    action_dim=ACTION_DIM,
    horizon=HORIZON,
    flow_exp_name="H20_1e6steps",
    flow_cp=19,
    flow_matching_type="cfm",
    value_exp_name="H20_inf",
    value_cp=2,
    ode_t_steps=30,
    guidance_method="guidance_matching",
    guide_matching_type="direct",
    guide_scale=7.0,
    guide_model_exp_name=f"H20_scale_10.0_g_direct_v5",
    guide_model_cp=2,
    guide_inference_scale=10.0,
    guide_model_transformer_config=TransformerConfig(
        depth=2,
        num_heads=2,
        hidden_size=64
    )
)
env, policy = prepare_evaluation(cfg)
# simulate 1 step
observation = env.reset()
state = env.state_vector().copy()
conditions = {0: observation}
action, samples = policy(conditions, batch_size=1024) # this policy is "replan-1" in CL_DiffPhyCon

gm = samples.values[:, -1, 0].detach().cpu().numpy()


### VGM

In [100]:

cfg = FlowMatchingEvaluationConfig(
    device='cuda:0',
    seed=0,
    random_repeat=5,
    exp_name="ablation_study",
    log_folder="../logs",
    env=ENV_NAME,
    state_dim=STATE_DIM,
    action_dim=ACTION_DIM,
    horizon=HORIZON,
    flow_exp_name="H20_1e6steps",
    flow_cp=19,
    flow_matching_type="cfm",
    value_exp_name="H20_inf",
    value_cp=2,
    ode_t_steps=30,
    guidance_method="guidance_matching",
    guide_matching_type="use_learned_v",
    guide_scale=1.0,
    guide_model_exp_name=f"H20_scale_1.0_g_use_learned_v_v5",
    guide_model_cp=2,
    guide_inference_scale=10.0,
    guide_model_transformer_config=TransformerConfig(
        depth=2,
        num_heads=2,
        hidden_size=64
    )
)
env, policy = prepare_evaluation(cfg)
# simulate 1 step
observation = env.reset()
state = env.state_vector().copy()
conditions = {0: observation}
action, samples = policy(conditions, batch_size=1024) # this policy is "replan-1" in CL_DiffPhyCon

vgm = samples.values[:, -1, 0].detach().cpu().numpy()


### RGM

In [101]:

cfg = FlowMatchingEvaluationConfig(
    device='cuda:0',
    seed=0,
    random_repeat=5,
    exp_name="ablation_study",
    log_folder="../logs",
    env=ENV_NAME,
    state_dim=STATE_DIM,
    action_dim=ACTION_DIM,
    horizon=HORIZON,
    flow_exp_name="H20_1e6steps",
    flow_cp=19,
    flow_matching_type="cfm",
    value_exp_name="H20_inf",
    value_cp=2,
    ode_t_steps=30,
    guidance_method="guidance_matching",
    guide_matching_type="rw_use_learned_z",
    guide_scale=1.0,
    guide_model_exp_name=f"H20_scale_1.0_g_rw_use_learned_z_v5",
    guide_model_cp=2,
    guide_inference_scale=1.0,
    guide_model_transformer_config=TransformerConfig(
        depth=2,
        num_heads=2,
        hidden_size=64
    )
)
env, policy = prepare_evaluation(cfg)
# simulate 1 step
observation = env.reset()
state = env.state_vector().copy()
conditions = {0: observation}
action, samples = policy(conditions, batch_size=1024) # this policy is "replan-1" in CL_DiffPhyCon

rgm = samples.values[:, -1, 0].detach().cpu().numpy()


### MRGM

In [102]:

cfg = FlowMatchingEvaluationConfig(
    device='cuda:0',
    seed=0,
    random_repeat=5,
    exp_name="ablation_study",
    log_folder="../logs",
    env=ENV_NAME,
    state_dim=STATE_DIM,
    action_dim=ACTION_DIM,
    horizon=HORIZON,
    flow_exp_name="H20_1e6steps",
    flow_cp=19,
    flow_matching_type="cfm",
    value_exp_name="H20_inf",
    value_cp=2,
    ode_t_steps=30,
    guidance_method="guidance_matching",
    guide_matching_type="rw",
    guide_scale=1.0,
    guide_model_exp_name=f"H20_scale_1.0_g_rw_v5",
    guide_model_cp=2,
    guide_inference_scale=1.0,
    guide_model_transformer_config=TransformerConfig(
        depth=2,
        num_heads=2,
        hidden_size=64
    )
)
env, policy = prepare_evaluation(cfg)
# simulate 1 step
observation = env.reset()
state = env.state_vector().copy()
conditions = {0: observation}
action, samples = policy(conditions, batch_size=1024) # this policy is "replan-1" in CL_DiffPhyCon

mrgm = samples.values[:, -1, 0].detach().cpu().numpy()


### Plot

In [None]:
print(samples.values.shape)

nbins = 20

fig, ax = plt.subplots(1, 1)
# Plot histogram value using curves instead of bars
unconditioned_height = ax.hist(unconditioned, bins=nbins, alpha=0.5, label='$p(R)$', density=True)

# reweight unconditioned height with exp values
values = np.linspace(unconditioned.min(), unconditioned.max(), nbins)
unconditioned_height = unconditioned_height[0] * np.exp(GUIDE_SCALE * values)
unconditioned_height = unconditioned_height / unconditioned_height.sum() / (values[1] - values[0])
ax.plot(values, unconditioned_height, label='$\\frac{1}{Z}p(R)e^{R(x_1)}$', ls='--', lw=4, color='grey', zorder=100)

ax.hist(mc_all, bins=nbins, alpha=0.5, label='$g^{MC}$', density=True, zorder=10)
# ax.hist(values_logz, bins=nbins, alpha=0.5, label='Learned $\log Z_t$', density=True)
ax.hist(grad_xt_x1, bins=nbins, alpha=0.5, label='$g^{cov-G}$', density=True)
ax.hist(grad_x1_x1, bins=nbins, alpha=0.5, label='$g^{cov-A}$', density=True)
# ax.hist(gm, bins=nbins, alpha=0.5, label='GM', density=True, zorder=10)
# ax.hist(vgm, bins=nbins, alpha=0.5, label='VGM', density=True, zorder=10)
# ax.hist(rgm, bins=nbins, alpha=0.5, label='RGM', density=True, zorder=10)
# ax.hist(mrgm, bins=nbins, alpha=0.5, label='MRGM', density=True, zorder=10)

ax.legend(bbox_to_anchor=(1.05, 0.2, 0.33, 0.2), loc="lower left",
          mode="expand", borderaxespad=0, ncol=1)

ax.set_xlabel('Estimated $R$')
ax.set_ylabel('Probability Density of \n Generated Samples')
# ax.set_title('Distribution of $R$')

fig.set_size_inches(5, 3.5)
plt.show()

In [50]:
fig.savefig(f'../images/ablation_generated_return_{ENV_NAME}_scale_{GUIDE_SCALE}_small.pdf', bbox_inches='tight')



In [None]:
print(samples.values.shape)

nbins = 20

fig, ax = plt.subplots(1, 1)
# Plot histogram value using curves instead of bars
unconditioned_height = ax.hist(unconditioned, bins=nbins, alpha=0.5, label='$p(R)$ (w/o guidance)', density=True)

# reweight unconditioned height with exp values
values = np.linspace(unconditioned.min(), unconditioned.max(), nbins)
unconditioned_height = unconditioned_height[0] * np.exp(GUIDE_SCALE * values)
unconditioned_height = unconditioned_height / unconditioned_height.sum() / (values[1] - values[0])
ax.plot(values, unconditioned_height, label='$\\frac{1}{Z}p(R)e^{R(x_1)}$', ls='--', lw=4, color='grey', zorder=100)

ax.hist(mc_all, bins=nbins, alpha=0.5, label='$g^{MC}$', density=True, zorder=10)
# ax.hist(values_logz, bins=nbins, alpha=0.5, label='Learned $\log Z_t$', density=True)
ax.hist(grad_xt_x1, bins=nbins, alpha=0.5, label='$g^{cov-G}$', density=True)
ax.hist(grad_x1_x1, bins=nbins, alpha=0.5, label='$g^{cov-A}$', density=True)
# ax.hist(gm, bins=nbins, alpha=0.5, label='GM', density=True, zorder=10)
# ax.hist(vgm, bins=nbins, alpha=0.5, label='VGM', density=True, zorder=10)
# ax.hist(rgm, bins=nbins, alpha=0.5, label='RGM', density=True, zorder=10)
# ax.hist(mrgm, bins=nbins, alpha=0.5, label='MRGM', density=True, zorder=10)

ax.legend(bbox_to_anchor=(0, 1.15, 1.0, 0.2), loc="lower left",
          mode="expand", borderaxespad=0, ncol=3)

ax.set_xlabel('Estimated $R$')
ax.set_ylabel('Probability Density of \n Generated Samples')
ax.set_title('Distribution of $R$')

fig.set_size_inches(5, 3)
plt.show()

In [38]:
fig.savefig(f'../images/ablation_generated_return_{ENV_NAME}_scale_{GUIDE_SCALE}.pdf', bbox_inches='tight')
