In [1]:
import torch
from pathlib import Path
from transformers import AutoTokenizer, AutoModelForCausalLM, DynamicCache

CKPT   = "stabilityai/stablelm-3b-4e1t"
REV    = "fa4a6a9"
DEVICE = "cpu"
OUT = Path("dist"); OUT.mkdir(exist_ok=True)

tok   = AutoTokenizer.from_pretrained(CKPT, revision=REV, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(CKPT, revision=REV).to(DEVICE).eval()

In [None]:
msg = "AI2025{############}"
ids = tok(msg, return_tensors="pt", add_special_tokens=False)["input_ids"].to(DEVICE)

In [3]:
with torch.no_grad():
    cache = DynamicCache(config=model.config)
    out   = model(input_ids=ids, use_cache=True, past_key_values=cache, return_dict=True)

k0, v0 = out.past_key_values[0]
print("k0 shape:", k0.shape, "v0 shape:", v0.shape)

T = ids.shape[1]
K_rot = k0.squeeze(0).contiguous()

k0 shape: torch.Size([1, 32, 16, 80]) v0 shape: torch.Size([1, 32, 16, 80])


In [4]:
torch.save({
    "K_rot": K_rot,
    "T": T,
    "H": K_rot.shape[0],
    "Dh": K_rot.shape[2],
    "model": CKPT,
    "revision": REV,
}, OUT / "kv_cache.pt")