In [1]:
import sys
from pathlib import Path

# 假设 notebook/ 在 "Deep Learning Pack/notebook"
ROOT = Path.cwd().parent  # -> Deep Learning Pack
sys.path.insert(0, str(ROOT))

import torch
from dlphys.config.base import ExperimentConfig
from dlphys.config.registry import build_model
import dlphys.models  # 关键：触发模型注册（side effect）

In [2]:
from dlphys.models.toy_attention import ToyAttentionConfig, ToyAttentionDynamics

cfg_m = ToyAttentionConfig(d_model=16, d_k=32, L=4, num_heads=1, gamma=0.5, phi="identity")
m = ToyAttentionDynamics(cfg_m)

s0 = torch.randn(2, cfg_m.L + 1, cfg_m.d_model)  # B=2
s1 = m(s0)

print("s0", s0.shape, "s1", s1.shape)
print("finite?", torch.isfinite(s1).all().item())

s0 torch.Size([2, 5, 16]) s1 torch.Size([2, 5, 16])
finite? True


In [3]:
import dlphys.models  # 重新 import 一次（如果你改完文件但 kernel 没重启，需要 reload）
from importlib import reload
reload(dlphys.models)

cfg = ExperimentConfig(
    project_name="toy",
    device="cpu",
    extra={
        "model_name": "toy_attention",
        "model_kwargs": dict(d_model=16, d_k=32, L=3, num_heads=1, gamma=0.5, phi="identity"),
    }
)

m = build_model(cfg)
print(type(m))

s0 = torch.randn(2, 4, 16)
s1 = m(s0)
s1.shape

<class 'dlphys.models.toy_attention.ToyAttentionDynamics'>


torch.Size([2, 4, 16])

In [7]:
from dlphys.core.run_dynamics import rollout
import torch

cfg = ExperimentConfig(
    project_name="toy",
    device="cpu",
    extra={
        "model_name": "toy_attention",
        "model_kwargs": dict(d_model=16, d_k=32, L=5, num_heads=1, gamma=0.5, phi="identity"),
    }
)

s0 = torch.randn(2, 6, 16)
out = rollout(cfg, s0=s0, T=20)
len(out["states"]), out["states"][0].shape, out["states"][-1].shape

(21, torch.Size([2, 6, 16]), torch.Size([2, 6, 16]))

In [11]:
from dlphys.analysis.jvp import jvp_F
import torch

B = 1
d_model = 16

L = m.cfg.L                      # <-- 关键：从模型读
s = torch.randn(B, L+1, d_model)
v = torch.zeros_like(s)

# 扰动第 1 个 memory block（x_{t-1}）
v[:, 1, :] = torch.randn_like(v[:, 1, :])

jvp = jvp_F(lambda _s: m(_s), s, v)

# companion shift 预测：输出的第2个记忆位应该等于输入第1个记忆位
# （注意：你的 state 定义是 [x_t, x_{t-1}, x_{t-2}, ...]）
err = (jvp[:, 2, :] - v[:, 1, :]).norm() / (v[:, 1, :].norm() + 1e-12)
print("relative shift error:", err.item())

relative shift error: 0.0


In [43]:
import torch
import dlphys.models
from dlphys.config.registry import build_model
from dlphys.analysis.lyapunov import lyapunov_max_benettin
from dlphys.config.base import ExperimentConfig

cfg = ExperimentConfig(
    project_name="toy",
    device="cpu",
    extra={
        "model_name": "toy_attention",
        "model_kwargs": dict(d_model=16, d_k=32, L=5, num_heads=1, gamma=0.5, phi="identity"),
    }
)

m = build_model(cfg).eval()

# 初态：B=2 条轨迹
s0 = torch.randn(2, 6, 16)

# 定义 F(s)=m(s)
F = lambda s: m(s)

out = lyapunov_max_benettin(F, s0, burn_in=200, T=600, return_traj=True)

print("lambda_hat (per batch):", out["lambda_hat"])
print("lambda_mean:", out["lambda_mean"])
print("final_state shape:", out["final_state"].shape)

lambda_hat (per batch): tensor([-0.1551, -0.1551])
lambda_mean: tensor(-0.1551)
final_state shape: torch.Size([2, 6, 16])
