### 首先我们回顾一下Chapter1的内容
### 如何使用transformer下载部署一个大模型
### 如何加载这个大模型，如何在交互前预设

https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct

In [43]:
import torch 
import transformers
from transformers import AutoModelForCausalLM,AutoTokenizer,pipeline

### 1. 使用model 和 tokenizer 的基础方法

In [None]:
model_name = "Qwen/Qwen2.5-0.5B-Instruct"
HF_TOKEN = "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"

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map = "cuda",                           # 使用gpu
    torch_dtype = "auto",
    trust_remote_code = True,                       # 调用外部模型（非transformers内预设的）
    #token = HF_TOKEN                               # 第一次下载模型时使用

)

In [48]:
print(model)

Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(151936, 896)
    (layers): ModuleList(
      (0-23): 24 x Qwen2DecoderLayer(
        (self_attn): Qwen2Attention(
          (q_proj): Linear(in_features=896, out_features=896, bias=True)
          (k_proj): Linear(in_features=896, out_features=128, bias=True)
          (v_proj): Linear(in_features=896, out_features=128, bias=True)
          (o_proj): Linear(in_features=896, out_features=896, bias=False)
        )
        (mlp): Qwen2MLP(
          (gate_proj): Linear(in_features=896, out_features=4864, bias=False)
          (up_proj): Linear(in_features=896, out_features=4864, bias=False)
          (down_proj): Linear(in_features=4864, out_features=896, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): Qwen2RMSNorm((896,), eps=1e-06)
        (post_attention_layernorm): Qwen2RMSNorm((896,), eps=1e-06)
      )
    )
    (norm): Qwen2RMSNorm((896,), eps=1e-06)
    (rotary_emb): Qwen2RotaryEmbe

In [76]:
tokenizer = AutoTokenizer.from_pretrained(model_name)#,token = HF_TOKEN)


tokenizer.special_tokens_map

{'eos_token': '<|im_end|>',
 'pad_token': '<|endoftext|>',
 'additional_special_tokens': ['<|im_start|>',
  '<|im_end|>',
  '<|object_ref_start|>',
  '<|object_ref_end|>',
  '<|box_start|>',
  '<|box_end|>',
  '<|quad_start|>',
  '<|quad_end|>',
  '<|vision_start|>',
  '<|vision_end|>',
  '<|vision_pad|>',
  '<|image_pad|>',
  '<|video_pad|>']}

In [62]:
system_prompt = "你是一个历史学家，精通中国历史上下五千年"
user_prompt = "请问诗仙是谁？简单介绍一下他，并且给出一首他的诗"


messages = [
    {"role":"system","content":system_prompt},
    {"role":"user", "content":user_prompt}
]

text = tokenizer.apply_chat_template(
    messages,
    tokenize = False,
    add_generation_prompt = True
)

print(text)


<|im_start|>system
你是一个历史学家，精通中国历史上下五千年<|im_end|>
<|im_start|>user
请问诗仙是谁？简单介绍一下他，并且给出一首他的诗<|im_end|>
<|im_start|>assistant



In [63]:
model_inputs = tokenizer([text],return_tensors = "pt").to(model.device)
print(model_inputs)

{'input_ids': tensor([[151644,   8948,    198,  56568, 101909, 100022, 108026,   3837, 114806,
          58695, 100022, 102285,  75108, 106705, 151645,    198, 151644,    872,
            198, 109194, 100045, 100717, 105518,  11319, 100405, 109432,  42411,
          90395, 100136, 107485, 108462, 100648, 100045, 151645,    198, 151644,
          77091,    198]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], device='cuda:0')}


In [65]:
generaeted_ids = model.generate(
    **model_inputs,
    max_new_tokens = 512,    
)

generaeted_ids = [
    output_ids[len(input_ids):] for input_ids,output_ids in zip(model_inputs.input_ids,generaeted_ids)

]

response = tokenizer.batch_decode(generated_ids,skip_special_tokens = True)[0]
print(response)


当然可以，以下是一首出自唐朝诗人李白的《静夜思》：

床前明月光，疑是地上霜。
举头望明月，低头思故乡。


### 2. 使用transformers的pipeline来简化流程

In [67]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype = "auto",
    device_map = "cuda",
    trust_remote_code = True
)

In [68]:
print(model)

Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(151936, 896)
    (layers): ModuleList(
      (0-23): 24 x Qwen2DecoderLayer(
        (self_attn): Qwen2Attention(
          (q_proj): Linear(in_features=896, out_features=896, bias=True)
          (k_proj): Linear(in_features=896, out_features=128, bias=True)
          (v_proj): Linear(in_features=896, out_features=128, bias=True)
          (o_proj): Linear(in_features=896, out_features=896, bias=False)
        )
        (mlp): Qwen2MLP(
          (gate_proj): Linear(in_features=896, out_features=4864, bias=False)
          (up_proj): Linear(in_features=896, out_features=4864, bias=False)
          (down_proj): Linear(in_features=4864, out_features=896, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): Qwen2RMSNorm((896,), eps=1e-06)
        (post_attention_layernorm): Qwen2RMSNorm((896,), eps=1e-06)
      )
    )
    (norm): Qwen2RMSNorm((896,), eps=1e-06)
    (rotary_emb): Qwen2RotaryEmbe

In [69]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [70]:
generator = pipeline(
    "text-generation",
    max_new_tokens = 512,
    model  = model,
    tokenizer = tokenizer,
    return_full_text = False,
    do_sample = True
)

Device set to use cuda


In [73]:
output = generator(messages)
print(output[0]['generated_text'])

李白（701年2月8日－762年），字太白，号青莲居士，唐代伟大的浪漫主义诗人，被后人称为“诗仙”。其作品以七言绝句和三首七言律诗最著名。代表作有《将进酒》、《庐山谣》等。
