In [None]:
# 加载模型 & 创建 MaseGraph

from pathlib import Path
import torch
import chop.passes as passes
from chop import MaseGraph
from chop.tools import get_tokenized_dataset, get_trainer
from transformers import AutoModelForSequenceClassification

# 量化+训练时常用日志输出
torch.backends.cudnn.benchmark = True

# 1) 指定模型、数据、tokenizer
checkpoint = "prajjwal1/bert-tiny"
tokenizer_checkpoint = "bert-base-uncased"
dataset_name = "imdb"

# 2) 加载模型，并构建一个 MaseGraph
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
model.config.problem_type = "single_label_classification"

mg = MaseGraph(
    model,
    hf_input_names=["input_ids", "attention_mask", "labels"],
)

# 3) 运行 metadata passes
mg, _ = passes.init_metadata_analysis_pass(mg)
mg, _ = passes.add_common_metadata_analysis_pass(mg)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
`past_key_values` were not specified as input names, but model.config.use_cache = True. Setting model.config.use_cache = False.
[32mINFO    [0m [34mGetting dummy input for prajjwal1/bert-tiny.[0m


tensor([[ 101, 9932, 2089, 2202, 2058, 1996, 2088, 2028, 2154,  102],
        [ 101, 2023, 2003, 2339, 2017, 2323, 4553, 4748, 4877,  102]])
tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
tensor([[ 101, 9932, 2089, 2202, 2058, 1996, 2088, 2028, 2154,  102],
        [ 101, 2023, 2003, 2339, 2017, 2323, 4553, 4748, 4877,  102]])
tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
tensor([[[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]],


        [[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]]])
tensor([[[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
          [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
          [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
          [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
          [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
          [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
          [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
          [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       

In [2]:
# 准备数据 & 构建 Trainer

# 获取已经tokenize好的IMDB数据
dataset, tokenizer = get_tokenized_dataset(
    dataset=dataset_name,
    checkpoint=tokenizer_checkpoint,
    return_tokenizer=True,
)

# 构建Trainer
trainer = get_trainer(
    model=mg.model,
    tokenized_dataset=dataset,
    tokenizer=tokenizer,
    evaluate_metric="accuracy",  # 评估指标
)

# 先测试一下预训练模型在IMDB上的初始精度
eval_results = trainer.evaluate()
print(f"Initial eval_accuracy: {eval_results['eval_accuracy']}")


[32mINFO    [0m [34mTokenizing dataset imdb with AutoTokenizer for bert-base-uncased.[0m
  trainer = Trainer(


Initial eval_accuracy: 0.50896


In [3]:
# 配置并执行 QAT Pass

quantization_config = {
    "by": "type",
    "default": {
        "config": {
            "name": None,
        }
    },
    "linear": {
        "config": {
            "name": "integer",
            # data
            "data_in_width": 8,
            "data_in_frac_width": 4,
            # weight
            "weight_width": 8,
            "weight_frac_width": 4,
            # bias
            "bias_width": 8,
            "bias_frac_width": 4,
        }
    },
}

# 对MaseGraph执行 QAT pass
mg, _ = passes.quantize_transform_pass(
    mg,
    pass_args=quantization_config,
)

# 重新构建Trainer，让它拿到QAT后的模型
trainer = get_trainer(
    model=mg.model,
    tokenized_dataset=dataset,
    tokenizer=tokenizer,
    evaluate_metric="accuracy",
)

# 在IMDB数据上进行若干epoch的QAT训练（示例只跑1-2 epoch, 视硬件而定）
trainer.train()

# 查看QAT后模型精度
eval_results = trainer.evaluate()
print(f"Post-QAT eval_accuracy: {eval_results['eval_accuracy']}")


  trainer = Trainer(


Step,Training Loss
500,0.6373
1000,0.4828
1500,0.4328
2000,0.3862
2500,0.3717
3000,0.3693


Post-QAT eval_accuracy: 0.85908


In [13]:
# 导出并保存 QAT 模型

export_path = f"{Path.home()}/quant_test1_qat"
mg.export(export_path)
print(f"QAT model exported to: {export_path}")


[32mINFO    [0m [34mExporting MaseGraph to /root/quant_test1_qat.pt, /root/quant_test1_qat.mz[0m
[32mINFO    [0m [34mExporting GraphModule to /root/quant_test1_qat.pt[0m
[32mINFO    [0m [34mExporting MaseMetadata to /root/quant_test1_qat.mz[0m


QAT model exported to: /root/quant_test1_qat


In [14]:
# 重新加载导出的 QAT 模型
import torch
from transformers import AutoConfig, AutoModelForSequenceClassification
import torch_tensorrt
from torch_tensorrt.ts import ptq
from pathlib import Path
import dill

with open(f"{Path.home()}/quant_test1_qat", "rb") as f:
    qat_model = dill.load(f)

qat_model.eval()
qat_model.cuda()


FileNotFoundError: [Errno 2] No such file or directory: '/root/quant_test1_qat'

In [21]:
from pathlib import Path

# 1) 只保留 input_ids、attention_mask
mg_infer = MaseGraph(
    mg.model,
    hf_input_names=["input_ids", "attention_mask"]
)

# 2) 再做一次最基本的元数据分析
mg_infer, _ = passes.init_metadata_analysis_pass(mg_infer)
mg_infer, _ = passes.add_common_metadata_analysis_pass(mg_infer)

# 3) 导出到本地 (无labels版本)
export_path = f"{Path.home()}/bert_qat_infer"
mg_infer.export(export_path)

print(f"Inference model exported to: {export_path}")


<class 'torch.fx.graph_module.GraphModule.__new__.<locals>.GraphModuleImpl'>


ValueError: dummy_in must be provided for add_common_metadata_analysis_pass.

In [17]:
qat_model = mg.model
qat_model.eval()
qat_model.cuda()

# 假设 BERT forward: forward(input_ids, attention_mask)
# min_shape/opt_shape/max_shape 表示 [batch_size, seq_len]
inputs = [
    torch_tensorrt.Input(
        min_shape=[1, 16],
        opt_shape=[8, 32],
        max_shape=[16, 128],
        dtype=torch.int64,
    ),
    torch_tensorrt.Input(
        min_shape=[1, 16],
        opt_shape=[8, 32],
        max_shape=[16, 128],
        dtype=torch.int64,
    ),
]

# 编译为 TRT engine
# 如果 QAT 后的权重是INT8，可以开启 {torch.float, torch.int8} 
# 若要保底支持半精度，就加上 torch.half
import torch_tensorrt
from torch_tensorrt.ts import ptq

compiled_trt_mod = torch_tensorrt.compile(
    qat_model,
    inputs=inputs,
    enabled_precisions={torch.float, torch.half, torch.int8},
    device=torch_tensorrt.Device(
        device_type=torch_tensorrt.DeviceType.GPU,
        gpu_id=0,
        dla_core=0,
        allow_gpu_fallback=False,
        disable_tf32=False
    ),
    require_full_compilation=False,  # 允许不支持的算子fallback
    # debug=True,  # 若需要调试输出
)

print("Torch-TensorRT compile done!")


TypeError: missing a required argument: 'labels'

In [16]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

dummy_input = tokenizer(
    ["Torch-TensorRT is awesome!", "Mase QAT on BERT works!"],
    return_tensors="pt",
)

# 注意多输入时需要拆分成 (input_ids, attention_mask)
input_ids = dummy_input["input_ids"].cuda()
attention_mask = dummy_input["attention_mask"].cuda()

with torch.no_grad():
    trt_output = compiled_trt_mod(input_ids, attention_mask)
    print("TRT output shape:", trt_output.logits.shape)  # shape=[batch_size, num_labels]


ValueError: Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' 'truncation=True' to have batched tensors with the same length. Perhaps your features (`input_ids` in this case) have excessive nesting (inputs type `list` where type `int` is expected).

In [None]:
mg.model = trt_mod # 存储的模型替换为 TensorRT 优化模型

In [None]:
mg.fx_graph #显示转换后的 torch.fx 图

AttributeError: 'MaseGraph' object has no attribute 'onnx_model'

In [19]:
mg.fx_graph

'torch.fx.graph'