In [1]:
import torch
import time
from transformers import AutoTokenizer
from model.modeling_llada import LLaDAModelLM
from generate import generate_with_dual_cache, generate_with_dual_cache_tokenskip

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# 加载模型
device = 'cuda'
model = LLaDAModelLM.from_pretrained('GSAI-ML/LLaDA-8B-Instruct', torch_dtype=torch.bfloat16).to(device).eval()
tokenizer = AutoTokenizer.from_pretrained('GSAI-ML/LLaDA-8B-Instruct')

Loading checkpoint shards: 100%|██████████| 6/6 [00:00<00:00,  8.61it/s]


In [3]:
# 准备输入
prompt = "Who is Newton, physics?"
m = [{"role": "user", "content": prompt}]
text = tokenizer.apply_chat_template(m, add_generation_prompt=True, tokenize=False)
input_ids = torch.tensor(tokenizer(text)['input_ids']).to(device).unsqueeze(0)

In [4]:
# 测试 baseline
start = time.time()
out1, nfe1 = generate_with_dual_cache(model, input_ids, steps=128, gen_length=128, block_length=32, threshold=0.9)
t1 = time.time() - start
ans1 = tokenizer.decode(out1[0, input_ids.shape[1]:], skip_special_tokens=True)
print(f"Baseline: {t1:.2f}s, NFE={nfe1}")
print(ans1)

Baseline: 4.96s, NFE=71
Isaac Newton was an English physicist and mathematician who made significant contributions to the development of classical mechanics and optics. He is best known for his laws of motion, which describe the motion of objects, and his law of universal gravitation, which explains the force of gravity between objects. Newton's work laid the foundation for modern physics and is considered one of the most influential figures in the history of science.


In [5]:
# 测试 tokenskip（超参可调）
SKIP_LAYER_K = 18       # 判定用的前 K 层
SKIP_THRESHOLD = 1  # 平均 cos sim 阈值
SKIP_OUTLIER = 0.7     # 任意层低于此值则强制计算

start = time.time()
out2, nfe2 = generate_with_dual_cache_tokenskip(
    model, input_ids, steps=128, gen_length=128, block_length=32, threshold=0.9,
    skip_layer_k=SKIP_LAYER_K, skip_threshold=SKIP_THRESHOLD, skip_outlier=SKIP_OUTLIER
)
t2 = time.time() - start
ans2 = tokenizer.decode(out2[0, input_ids.shape[1]:], skip_special_tokens=True)
print(f"TokenSkip: {t2:.2f}s, NFE={nfe2}")
print(ans2)
# 检查输出形状
print(f"out1.shape: {out1.shape}")  # baseline
print(f"out2.shape: {out2.shape}")  # tokenskip
print(f"input_ids.shape: {input_ids.shape}")
print(f"预期 gen_length: 128")
print(f"实际生成长度: {out2.shape[1] - input_ids.shape[1]}")

TokenSkip: 4.73s, NFE=71
Isaac Newton was an English physicist and mathematician who made significant contributions to the development of classical mechanics and optics. He is best known for his laws of motion, which describe the motion of objects, and his law of universal gravitation, which explains the force of gravity between objects. Newton's work laid the foundation for modern physics and is considered one of the most influential figures in the history of science.
out1.shape: torch.Size([1, 147])
out2.shape: torch.Size([1, 147])
input_ids.shape: torch.Size([1, 19])
预期 gen_length: 128
实际生成长度: 128


In [6]:
# 测试 tokenskip（超参可调）
SKIP_LAYER_K = 16       # 判定用的前 K 层
SKIP_THRESHOLD = 0.95  # 平均 cos sim 阈值
SKIP_OUTLIER = 0.8     # 任意层低于此值则强制计算

start = time.time()
out2, nfe2 = generate_with_dual_cache_tokenskip(
    model, input_ids, steps=128, gen_length=128, block_length=32, threshold=0.9,
    skip_layer_k=SKIP_LAYER_K, skip_threshold=SKIP_THRESHOLD, skip_outlier=SKIP_OUTLIER
)
t2 = time.time() - start
ans2 = tokenizer.decode(out2[0, input_ids.shape[1]:], skip_special_tokens=True)
print(f"TokenSkip: {t2:.2f}s, NFE={nfe2}")
print(ans2)
# 检查输出形状
print(f"out1.shape: {out1.shape}")  # baseline
print(f"out2.shape: {out2.shape}")  # tokenskip
print(f"input_ids.shape: {input_ids.shape}")
print(f"预期 gen_length: 128")
print(f"实际生成长度: {out2.shape[1] - input_ids.shape[1]}")

TokenSkip: 4.66s, NFE=76
Isaac Newton was an English physicist, mathematician, and astronomer who is widelyblockList as one of theblockList most influentialblockList�� in theblockList historyblockList of science. He is best known for his workblockList on theblockList laws of motionblockList and theblockList lawblockList ofblockList universal gravitationblockList (funnelsblockListawaitedblockList
out1.shape: torch.Size([1, 147])
out2.shape: torch.Size([1, 147])
input_ids.shape: torch.Size([1, 19])
预期 gen_length: 128
实际生成长度: 128


In [7]:
ans2

'Isaac Newton was an English physicist, mathematician, and astronomer who is widelyblockList as one of theblockList most influentialblockList�� in theblockList historyblockList of science. He is best known for his workblockList on theblockList laws of motionblockList and theblockList lawblockList ofblockList universal gravitationblockList (funnelsblockListawaitedblockList'

In [8]:
# 对比
print(f"Speedup: {t1/t2:.2f}x")
print(f"NFE: {nfe1} -> {nfe2}")

Speedup: 1.06x
NFE: 71 -> 76
