In [1]:
import sys
from pathlib import Path
SRC_ROOT = "../src"
if str(SRC_ROOT) not in sys.path:
    sys.path.insert(0, str(SRC_ROOT))

In [2]:
from twc.twc_io import mcc_obs_encoder, twc_out_2_mcc_action
from twc.twc_builder import build_twc, TWC
from td3 import TD3Config
import json
import gymnasium as gym
import torch
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm
2025-11-28 21:15:12.258332: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
env = gym.make("MountainCarContinuous-v0")
env.observation_space

Box([-1.2  -0.07], [0.6  0.07], (2,), float32)

In [4]:
config_path = Path("../out/runs/bptt_trials/v2_td3_OU_20251128.json")
print(config_path)
cfg = TD3Config()
if config_path.exists:
    with open(config_path, 'r') as f:
        config_data =  json.load(f)
    cfg = cfg.load(config_data)

print(cfg.to_json())

../out/runs/bptt_trials/v2_td3_OU_20251128.json
{
    "max_train_steps": 200000,
    "max_episode_steps": 999,
    "warmup_steps": 10000,
    "batch_size": 256,
    "num_update_loops": 2,
    "update_every": 1,
    "device": "cuda",
    "seed": 42,
    "eval_interval_episodes": 10,
    "eval_episodes": 10,
    "use_bptt": true,
    "sequence_length": 8,
    "burn_in_length": 4,
    "actor_lr": 0.0002239407231090426,
    "critic_lr": 0.0001828306017572226,
    "gamma": 0.9823522271023871,
    "tau": 0.007693135327059323,
    "policy_delay": 2,
    "target_noise": 0.28415959581368067,
    "noise_clip": 0.31789035300173857,
    "exp_noise": 0.1,
    "ou_sigma_init": 0.39609107327435644,
    "ou_sigma_end": 0.08244881107627974,
    "twc_internal_steps": 1,
    "twc_trhesholds": [
        -0.5,
        0.0,
        0.0
    ],
    "twc_decays": [
        0.1,
        0.1,
        0.1
    ],
    "rnd_init": true,
    "use_v2": true,
    "steepness_fire": 14.434533089746672,
    "steepness_gj"

In [9]:
v2_params = {}
if cfg.use_v2:
    v2_params = {
        'steepness_fire': cfg.steepness_fire,
        'steepness_gj': cfg.steepness_gj,
        'steepness_input': cfg.steepness_input,
        'input_thresh': cfg.input_thresh,
        'leaky_slope': cfg.leaky_slope,
        }

actor1 = build_twc(
    obs_encoder=mcc_obs_encoder,
    action_decoder=twc_out_2_mcc_action,
    internal_steps=cfg.twc_internal_steps,
    initial_thresholds=cfg.twc_trhesholds,
    initial_decays=cfg.twc_decays,
    rnd_init=cfg.rnd_init,
    use_V2=cfg.use_v2,
    log_stats=False,
    **v2_params
)

actor2 = build_twc(
    obs_encoder=mcc_obs_encoder,
    action_decoder=twc_out_2_mcc_action,
    internal_steps=cfg.twc_internal_steps,
    initial_thresholds=cfg.twc_trhesholds,
    initial_decays=cfg.twc_decays,
    rnd_init=cfg.rnd_init,
    use_V2=cfg.use_v2,
    log_stats=False,
    **v2_params
)

In [10]:
model1_path = Path("../out/runs/td3_OU/twc_mcc_VTrue_OU_20251128_110857/twc_td3_PER_OU_actor_best_20251128_110857.pth")
model2_path = Path("../out/runs/td3_OU/twc_mcc_VTrue_OU_20251128_110857/twc_td3_PER_OU_actor_final_bptt_20251128_110857.pth")

sd_1 = torch.load(model1_path)
sd_2 = torch.load(model2_path)

actor1.load_state_dict(state_dict=sd_1)
actor2.load_state_dict(state_dict=sd_2)

<All keys matched successfully>

In [11]:
import pandas as pd

# Extract parameters from both models
params_comparison = []

for name, param in actor1.named_parameters():
    actor1_val = param.detach().cpu().numpy().flatten()
    actor2_param = dict(actor2.named_parameters())[name]
    actor2_val = actor2_param.detach().cpu().numpy().flatten()
    
    # For display, show shape and a sample of values
    params_comparison.append({
        'Parameter': name,
        'Actor1 Shape': str(param.shape),
        'Actor1 Sample': str(actor1_val[:3]),
        'Actor2 Sample': str(actor2_val[:3]),
        'Mean Diff': float(np.abs(actor1_val - actor2_val).mean())
    })

df_comparison = pd.DataFrame(params_comparison)
print(df_comparison.to_string())

              Parameter        Actor1 Shape                          Actor1 Sample                          Actor2 Sample  Mean Diff
0    in_layer.threshold     torch.Size([4])     [1.6468078  1.7450118  0.14859128]     [1.6468078  1.7450118  0.14859128]   0.000000
1        in_layer.decay     torch.Size([4])     [0.4209258  0.62085056 0.29374385]     [0.4209258  0.62085056 0.29374385]   0.000000
2   hid_layer.threshold     torch.Size([5])  [ 2.7238908  -3.4675682  -0.43376815]     [ 1.5374659 -1.9731901  1.121392 ]   1.495082
3       hid_layer.decay     torch.Size([5])     [0.58932954 2.1169004  0.6520392 ]     [0.58932954 3.9928794  3.2492979 ]   2.121823
4   out_layer.threshold     torch.Size([2])                  [10.644872 -2.021256]                  [11.603736 -2.021256]   0.479432
5       out_layer.decay     torch.Size([2])              [16.998411    0.43383226]              [28.790312    0.43383226]   5.895950
6           in2hid_IN.w  torch.Size([4, 5])  [-0.41014877  0.72668654