In [1]:
## 设置hf-mirror镜像站，用于下载Qwen/Qwen2.5-0.5B-Instruct模型
import os
os.environ['HF_ENDPOINT']='https://hf-mirror.com/'

In [2]:
from transformers import Qwen2ForCausalLM,AutoTokenizer
import torch

In [2]:
model_name = "Qwen/Qwen2.5-0.5B-Instruct"
device='cuda:0'
model = Qwen2ForCausalLM.from_pretrained(
    pretrained_model_name_or_path=model_name,
    device_map=device
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [8]:
model.config

Qwen2Config {
  "_attn_implementation_autoset": true,
  "_name_or_path": "Qwen/Qwen2.5-0.5B-Instruct",
  "architectures": [
    "Qwen2ForCausalLM"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 151643,
  "eos_token_id": 151645,
  "hidden_act": "silu",
  "hidden_size": 896,
  "initializer_range": 0.02,
  "intermediate_size": 4864,
  "max_position_embeddings": 32768,
  "max_window_layers": 21,
  "model_type": "qwen2",
  "num_attention_heads": 14,
  "num_hidden_layers": 24,
  "num_key_value_heads": 2,
  "rms_norm_eps": 1e-06,
  "rope_scaling": null,
  "rope_theta": 1000000.0,
  "sliding_window": null,
  "tie_word_embeddings": true,
  "torch_dtype": "bfloat16",
  "transformers_version": "4.46.1",
  "use_cache": true,
  "use_sliding_window": false,
  "vocab_size": 151936
}

In [3]:
model.generation_config

GenerationConfig {
  "bos_token_id": 151643,
  "do_sample": true,
  "eos_token_id": [
    151645,
    151643
  ],
  "pad_token_id": 151643,
  "repetition_penalty": 1.1,
  "temperature": 0.7,
  "top_k": 20,
  "top_p": 0.8
}

In [4]:
# Qwen官方例子

prompt = "Give me a short introduction to large language model."
messages = [
    {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
    {"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True # 加上<|im_start|>assistant\n
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
# 这里的内部实现应该是一个for循环，一个token一个token地生成
with torch.inference_mode():
    model_output_ids = model.generate(
        **model_inputs,
        max_new_tokens=512
    )
generated_ids = [
    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, model_output_ids)
]

response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
print(response)

Sure! A large language model (LLM) is an artificial intelligence system that can generate human-like text based on the input it receives. These models are typically trained using large amounts of textual data and can be used for a wide range of tasks such as natural language processing, machine translation, summarization, and more.

The development of LLMs has been driven by the increasing availability of text-based knowledge, the growing complexity of natural language, and the need for more efficient ways to process and generate information. With advancements in computing power and computational resources, LLMs have become increasingly powerful and capable of performing complex tasks with high accuracy and speed.


In [6]:
# 通过tokenizer的chat模板把messages更改为对话形式。
# 让LLM在这个文本上做续写，就是chat版本模型干的事
text 

'<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n<|im_start|>user\nGive me a short introduction to large language model.<|im_end|>\n<|im_start|>assistant\n'

In [14]:
# 通过tokenizer把刚刚的text分词并转化为数字id
# 得到input_ids和attention_mask
# 这里的attention_mask全是1
model_inputs

{'input_ids': tensor([[151644,   8948,    198,   2610,    525,   1207,  16948,     11,   3465,
            553,  54364,  14817,     13,   1446,    525,    264,  10950,  17847,
             13, 151645,    198, 151644,    872,    198,  35127,    752,    264,
           2805,  16800,    311,   3460,   4128,   1614,     13, 151645,    198,
         151644,  77091,    198]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], device='cuda:0')}

In [62]:
# decoder模型输出的ids
# 可以发现前面的ids是一样的
print(model_output_ids)
print(tokenizer.eos_token_id)

tensor([[151644,   8948,    198,   2610,    525,   1207,  16948,     11,   3465,
            553,  54364,  14817,     13,   1446,    525,    264,  10950,  17847,
             13, 151645,    198, 151644,    872,    198,  35127,    752,    264,
           2805,  16800,    311,   3460,   4128,   1614,     13, 151645,    198,
         151644,  77091,    198,     32,   3460,   4128,   1614,    374,    264,
            943,    315,  20443,  11229,    429,    646,   8193,   3738,  12681,
           1467,   3118,    389,  11127,   3897,    311,    432,     13,   4220,
           4119,    525,   6188,    311,  55359,    279,   5810,   4128,   8692,
          16928,    315,  12677,    323,    646,   6923,  55787,    323,   2266,
           1832,   8311,  14507,    304,   5257,  30476,   1741,    438,   1467,
           9471,     11,  28285,   2022,     11,   3405,  35764,     11,    323,
            803,     13,  20286,   4128,   4119,    614,   1012,  13570,   1483,
            304,   5043,   1

In [8]:
# 把模型的输出decode出来看看
print(tokenizer.decode(model_output_ids[0]))
print(f'\neos_token:  {tokenizer.eos_token}')

<|im_start|>system
You are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>
<|im_start|>user
Give me a short introduction to large language model.<|im_end|>
<|im_start|>assistant
Large language models (LLMs) are artificial intelligence systems that can generate human-like text based on a pre-defined set of rules or patterns. These models are designed to mimic the complexity and creativity of human language, allowing them to understand context, learn from previous interactions, and produce coherent responses. LLMs have become increasingly popular in fields such as natural language processing, machine translation, chatbots, and virtual assistants due to their ability to process vast amounts of data quickly and generate meaningful outputs. They have also been used for tasks like image captioning, sentiment analysis, and question answering, demonstrating their potential to automate complex tasks and improve efficiency across various industries.<|im_end|>

eos_token:  

In [9]:
model_inputs['input_ids'].shape

torch.Size([1, 39])

In [10]:
# 模型forward出来的shape是(batch_size,seq_len,vocab_size)
with torch.inference_mode():
    model.forward(**model_inputs)[0].shape

In [12]:
# 一个简单的预测函数，一个token一个token地生成，直到达到最大长度或eos_token。
# 这里没有用到temperature、topk、top-p等解码参数
@torch.inference_mode()
def my_predict(inputs:torch.Tensor,model:Qwen2ForCausalLM,max_len:int=1024,max_new_tokens:int=512):
    for i in range(max_new_tokens):
        logits=model.forward(input_ids=inputs)[0]
        output_token=torch.argmax(logits[0,-1]).view(-1,1)
        inputs=torch.concat([inputs,output_token],dim=1)
        if output_token.item()==tokenizer.eos_token_id or inputs.shape[1]>=max_len:
            break
    return inputs
input_ids=model_inputs['input_ids']
#print(input_ids)
print(f'输入的sentence：\n{tokenizer.batch_decode(input_ids)[0]}')
output_ids=my_predict(inputs=input_ids,model=model)[0][len(input_ids[0]):]
output_sentence=tokenizer.decode(output_ids)
print(f'输出的sentence：\n{output_sentence}')

输入的sentence：
<|im_start|>system
You are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>
<|im_start|>user
Give me a short introduction to large language model.<|im_end|>
<|im_start|>assistant

输出的sentence：
Certainly! A large language model, or LLM, is a type of artificial intelligence that can generate human-like text based on a given input. These models are designed to be highly accurate, creative, and adaptable, making them useful in a wide range of applications, including but not limited to:

1. **Chatbots**: Large language models are often used in chatbots to provide customer service, answer questions, and engage in conversations with users.

2. **Text Generation**: They can generate text that is coherent, creative, and even original, which is useful for tasks such as writing articles, creating stories, or even writing books.

3. **Sentiment Analysis**: Large language models can analyze and interpret the sentiment of text, which is useful in areas like social 

In [3]:
# 试一下base模型
from transformers import Qwen2ForCausalLM,AutoTokenizer
import torch
model_name = "Qwen/Qwen2.5-0.5B"
device='cuda:0'
model = Qwen2ForCausalLM.from_pretrained(
    pretrained_model_name_or_path=model_name,
    device_map=device
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [9]:
# 若用base模型做chat任务

prompt = "Give me a short introduction to large language model."
messages = [
    {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
    {"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True # 加上<|im_start|>assistant\n
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
# 这里的内部实现应该是一个for循环，一个token一个token地生成
with torch.inference_mode():
    model_output_ids = model.generate(
        **model_inputs,
        max_new_tokens=256
    )
generated_ids = [
    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, model_output_ids)
]

response = tokenizer.batch_decode(generated_ids)[0]
print(response)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Large language models (LLMs) are artificial intelligence systems that can generate human-like text. They are designed to understand and generate text based on a large amount of data, and they are often used for tasks such as text generation, summarization, and translation. LLMs are particularly useful for tasks that require high-level reasoning and creativity, such as writing, writing reviews, and writing articles. They are also used in natural language processing (NLP) tasks, such as sentiment analysis and question answering. Overall, LLMs are a powerful tool for automating tasks that require human-like reasoning and creativity.
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Come

In [4]:
#直接续写
inputs=tokenizer(["窗前明月"],return_tensors='pt').to(model.device)
model_output_ids=model.generate(
    **inputs,
    max_new_tokens=128
)
# 但好像每次都会生成到指定的最大长度，没有生成到eos_token
print(model_output_ids.shape)
output_text=tokenizer.batch_decode(model_output_ids)[0]
print(output_text)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


torch.Size([1, 132])
窗前明月光，____。
A. 不是花中偏爱菊
B. 举头望明月
C. 举杯邀明月
D. 举杯邀明月
答案:
B

下列关于“三会一课”制度说法正确的是____。
A. 党支部应当组织党员按期参加党员大会、党小组会和上党课，定期召开党支部委员会会议
B. 党支部应当组织党员按期参加民主评议
C. 党支部应当指定1名以上有表决权的党员作评议对象
D. 党支部应当每月开展
