In [24]:
# 导入AutoModel类，该类允许自动从预训练模型库加载模型
from modelscope import AutoModel, AutoModelForCausalLM, AutoTokenizer, AutoConfig
import torch
import torch.nn as nn
# 设置预训练模型的检查点名称，这里使用THUDM维护的ChatGLM3-6B模型
check_point = "Qwen/Qwen2.5-0.5B-Instruct"
# 将可选的本地模型路径注释掉，如果需要从本地加载模型，则取消注释并指定正确的本地路径
# model_path = "/home/egcs/models/chatglm3-6b"
# 使用AutoModel的from_pretrained方法加载模型表示信任远程代码，允许从模型仓库执行未验证的代码
model: nn.Module = AutoModel.from_pretrained(pretrained_model_name_or_path=check_point, trust_remote_code=True, dtype="auto").half().cuda()
# 加载模型对应的分词器
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=check_point)
# 加载模型对应的配置
conf = AutoConfig.from_pretrained(pretrained_model_name_or_path=check_point)
model

Downloading Model from https://www.modelscope.cn to directory: /root/.cache/modelscope/hub/models/Qwen/Qwen2.5-0.5B-Instruct


2026-01-21 11:15:59,659 - modelscope - INFO - Target directory already exists, skipping creation.


Downloading Model from https://www.modelscope.cn to directory: /root/.cache/modelscope/hub/models/Qwen/Qwen2.5-0.5B-Instruct


2026-01-21 11:16:02,562 - modelscope - INFO - Target directory already exists, skipping creation.


Downloading Model from https://www.modelscope.cn to directory: /root/.cache/modelscope/hub/models/Qwen/Qwen2.5-0.5B-Instruct


2026-01-21 11:16:04,043 - modelscope - INFO - Target directory already exists, skipping creation.


Qwen2Model(
  (embed_tokens): Embedding(151936, 896)
  (layers): ModuleList(
    (0-23): 24 x Qwen2DecoderLayer(
      (self_attn): Qwen2Attention(
        (q_proj): Linear(in_features=896, out_features=896, bias=True)
        (k_proj): Linear(in_features=896, out_features=128, bias=True)
        (v_proj): Linear(in_features=896, out_features=128, bias=True)
        (o_proj): Linear(in_features=896, out_features=896, bias=False)
      )
      (mlp): Qwen2MLP(
        (gate_proj): Linear(in_features=896, out_features=4864, bias=False)
        (up_proj): Linear(in_features=896, out_features=4864, bias=False)
        (down_proj): Linear(in_features=4864, out_features=896, bias=False)
        (act_fn): SiLUActivation()
      )
      (input_layernorm): Qwen2RMSNorm((896,), eps=1e-06)
      (post_attention_layernorm): Qwen2RMSNorm((896,), eps=1e-06)
    )
  )
  (norm): Qwen2RMSNorm((896,), eps=1e-06)
  (rotary_emb): Qwen2RotaryEmbedding()
)

In [25]:
model.add_module(module=nn.Linear(896, 2), name="classification_layer") # 兼容PyTorch操作
model_from_conf = AutoModel.from_config(config=conf)
model_from_conf

Qwen2Model(
  (embed_tokens): Embedding(151936, 896)
  (layers): ModuleList(
    (0-23): 24 x Qwen2DecoderLayer(
      (self_attn): Qwen2Attention(
        (q_proj): Linear(in_features=896, out_features=896, bias=True)
        (k_proj): Linear(in_features=896, out_features=128, bias=True)
        (v_proj): Linear(in_features=896, out_features=128, bias=True)
        (o_proj): Linear(in_features=896, out_features=896, bias=False)
      )
      (mlp): Qwen2MLP(
        (gate_proj): Linear(in_features=896, out_features=4864, bias=False)
        (up_proj): Linear(in_features=896, out_features=4864, bias=False)
        (down_proj): Linear(in_features=4864, out_features=896, bias=False)
        (act_fn): SiLUActivation()
      )
      (input_layernorm): Qwen2RMSNorm((896,), eps=1e-06)
      (post_attention_layernorm): Qwen2RMSNorm((896,), eps=1e-06)
    )
  )
  (norm): Qwen2RMSNorm((896,), eps=1e-06)
  (rotary_emb): Qwen2RotaryEmbedding()
)

In [26]:
tokenizer.decode(tokenizer.encode("“你好”"))
# 编码使用
inputs = tokenizer(["你好，你是谁？", "我谁也不是，我是你", "出口成章"], padding=True, truncation=True, return_tensors="pt")
inputs_ids = inputs['input_ids'].to("cuda")
inputs_ids.shape
inputs_ids
# 解码使用
tokenizer.decode(inputs_ids[2])
inputs

{'input_ids': tensor([[108386,   3837, 105043, 100165,  11319, 151643],
        [ 35946, 100165, 104993,   3837, 104198,  56568],
        [102048,  12857,  44928, 151643, 151643, 151643]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 0],
        [1, 1, 1, 1, 1, 1],
        [1, 1, 1, 0, 0, 0]])}

# 检视模型（Qwen/Qwen2.5-0.5B-Instruct）

## embed_tokens：
Embedding(151936, 896)，词嵌入层，输入维度为vocab_size=151936，转为896维向量
## layers（ModuleList）：
一个标准的Decoder Only Transformers架构模型
1. 自注意力层重复24次：
    1. query：；
    2. key：；
    3. value：；
    4. output：；
2. MLP（Qwen2MLP，多层感知机层）：
    1. 

In [27]:
# 检视模型
model

Qwen2Model(
  (embed_tokens): Embedding(151936, 896)
  (layers): ModuleList(
    (0-23): 24 x Qwen2DecoderLayer(
      (self_attn): Qwen2Attention(
        (q_proj): Linear(in_features=896, out_features=896, bias=True)
        (k_proj): Linear(in_features=896, out_features=128, bias=True)
        (v_proj): Linear(in_features=896, out_features=128, bias=True)
        (o_proj): Linear(in_features=896, out_features=896, bias=False)
      )
      (mlp): Qwen2MLP(
        (gate_proj): Linear(in_features=896, out_features=4864, bias=False)
        (up_proj): Linear(in_features=896, out_features=4864, bias=False)
        (down_proj): Linear(in_features=4864, out_features=896, bias=False)
        (act_fn): SiLUActivation()
      )
      (input_layernorm): Qwen2RMSNorm((896,), eps=1e-06)
      (post_attention_layernorm): Qwen2RMSNorm((896,), eps=1e-06)
    )
  )
  (norm): Qwen2RMSNorm((896,), eps=1e-06)
  (rotary_emb): Qwen2RotaryEmbedding()
  (classification_layer): Linear(in_features=896, out