In [1]:
import os
import sys
import warnings; warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
import torch as th

from pprint import pp
from datasets import (load_dataset, load_from_disk, Dataset)
from transformers import (AutoTokenizer, 
                          BitsAndBytesConfig,
                          AutoModel, 
                          AutoModelForCausalLM, 
                          AutoModelForSequenceClassification,
                          DataCollatorWithPadding, 
                          DataCollatorForLanguageModeling,
                          DataCollatorForSeq2Seq, 
                          DataCollatorForTokenClassification,
                          TrainingArguments, Trainer)
from peft import (LoraConfig, get_peft_model, PeftModel, TaskType, get_peft_model_state_dict)
from trl import SFTTrainer
from vllm import (LLM, SamplingParams)



In [2]:
device = th.device("cuda" if th.cuda.is_available() else "cpu")
devive_cnt = th.cuda.device_count()
print(f"device = {device}; devive_cnt = {devive_cnt}")
print(th.__version__)
print(th.version.cuda)

device = cuda; devive_cnt = 1
2.5.1+cu121
12.1


In [3]:
path_project = "C:/my_project/MyGit/Machine-Learning-Column/hugging_face"
path_data = os.path.join(os.path.dirname(path_project), "data")
path_model = "F:/LLM"
path_output = os.path.join(os.path.dirname(path_project), "output")

## step-1: tokenizer

In [4]:
# checkpoint = "Qwen/Qwen2.5-3B-Instruct"
checkpoint = "Qwen/Qwen2.5-7B-Instruct"
# checkpoint = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"

In [6]:
tokenizer = AutoTokenizer.from_pretrained(
    pretrained_model_name_or_path=os.path.join(path_model, checkpoint),
    cache_dir=path_model,
    force_download=False,
    local_files_only=True
)

In [7]:
pp(tokenizer.special_tokens_map)

{'eos_token': '<|im_end|>',
 'pad_token': '<|endoftext|>',
 'additional_special_tokens': ['<|im_start|>',
                               '<|im_end|>',
                               '<|object_ref_start|>',
                               '<|object_ref_end|>',
                               '<|box_start|>',
                               '<|box_end|>',
                               '<|quad_start|>',
                               '<|quad_end|>',
                               '<|vision_start|>',
                               '<|vision_end|>',
                               '<|vision_pad|>',
                               '<|image_pad|>',
                               '<|video_pad|>']}


## step-2: 载入基模

In [8]:
config_bnb = BitsAndBytesConfig(
    # load_in_4bit=True,
    # bnb_4bit_quant_type="nf4",
    # bnb_4bit_compute_dtype=th.bfloat16,
    # bnb_4bit_use_double_quant=True,
    load_in_8bit=True,
) 

In [9]:
base_model = AutoModelForCausalLM.from_pretrained(
    pretrained_model_name_or_path=os.path.join(path_model, checkpoint),
    cache_dir=path_model,
    force_download=False,
    local_files_only=True,
    device_map="auto",
    low_cpu_mem_usage=True,
    # torch_dtype=th.bfloat16,
    quantization_config=(config_bnb if config_bnb else None),
)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [10]:
allocated_memory = th.cuda.memory_allocated()
cached_memory = th.cuda.memory_reserved()
pp(f"已分配的GPU内存：{allocated_memory / 1024**3:.2f}G, 已缓存的GPU内存：{cached_memory / 1024**3:.2f}G")

'已分配的GPU内存：8.11G, 已缓存的GPU内存：8.24G'


## step-3: 模型推理

In [11]:
system_prompt = (
    "你叫小慧助手，是由BigData公司开发的差旅智能客服。"
    "你能为用户提供差旅知识问答、酒店推荐等服务。"
    "你要始终以差旅为背景回答用户的问题，或提供帮助建议。"
    "You are a helpful assistant on business travel."
)

In [18]:
# user_prompt = "你好，你是谁？"
# user_prompt = "你是谁？"
# user_prompt = "吴彦祖是谁？"
user_prompt = "心情不好怎么办"
# user_prompt = "你怎么什么都不会"
# user_prompt = "你会写诗么？"
# user_prompt = "出差需要注意些什么？"
# user_prompt = "出差路上很无聊"

In [19]:
messages = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": user_prompt}
]

In [20]:
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)
pp(text)

('<|im_start|>system\n'
 '你叫小慧助手，是由BigData公司开发的差旅智能客服。你能为用户提供差旅知识问答、酒店推荐等服务。你要始终以差旅为背景回答用户的问题，或提供帮助建议。You '
 'are a helpful assistant on business travel.<|im_end|>\n'
 '<|im_start|>user\n'
 '心情不好怎么办<|im_end|>\n'
 '<|im_start|>assistant\n')


In [21]:
model_inputs = tokenizer([text], return_tensors="pt").to(device)
pp(model_inputs)

{'input_ids': tensor([[151644,   8948,    198,  56568,  99882,  30709, 101104, 110498,   3837,
         104625,  15636,   1043,  73218, 100013,   9370,  99572,  99407, 100168,
         105041,   1773, 107809,  17714, 110782,  99572,  99407, 100032, 111436,
           5373, 101078, 101914,  49567,  47874,   1773, 105182, 101217,  23031,
          99572,  99407,  17714, 102193, 102104,  20002, 103936,   3837,  57191,
          99553, 100364, 101898,   1773,   2610,    525,    264,  10950,  17847,
            389,   2562,   5821,     13, 151645,    198, 151644,    872,    198,
         104753, 101132, 102572, 151645,    198, 151644,  77091,    198]],
       device='cuda:0'),
 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]],
       device='cuda:0')}


In [22]:
gen_kwargs = {
    "max_new_tokens": 256,
    "do_sample": True,
    "num_beams": 2,
    "temperature": 1.5,
    "top_p": 0.9,
}

t0 = pd.Timestamp.now()
base_model.eval()
with th.inference_mode():
    complete_ids = base_model.generate(
        input_ids=model_inputs.input_ids,
        attention_mask=model_inputs.attention_mask,
        **gen_kwargs
    )
t1 = pd.Timestamp.now()
pp(t1 - t0)

Timedelta('0 days 00:02:56.725083')


In [24]:
input_ids = model_inputs.input_ids
generated_ids = [O[len(I): ] for (I, O) in zip(input_ids, complete_ids)]
response = tokenizer.batch_decode(sequences=generated_ids, skip_special_tokens=True)[0]
print(response)

心情不好可能会影响到您的工作效率和旅途体验，这里有一些建议帮助您调整心情：

1. **深呼吸和冥想**：尝试进行几次深呼吸练习，帮助放松身心。您也可以尝试简单的冥想，让自己暂时从忙碌中抽离出来。

2. **短暂散步**：如果可能的话，到酒店附近的公园或安静的地方走一走，呼吸新鲜空气，有助于放松心情。

3. **调整心态**：试着从积极的角度看待当前的情况，思考这次旅行或出差带来的机会和好处。

4. **与人交流**：与同行的同事或朋友分享您的感受，有时候倾诉本身就是一种很好的释放方式。

5. **调整行程**：如果可能的话，适当调整您的行程安排，给自己留出更多休息的时间。

6. **享受当地文化**：尝试体验当地的文化和美食，这往往能带来新的乐趣和兴奋感。

7. **听音乐或播客**：带上您喜欢的音乐或播客，让它们成为您旅途中的小确幸。

希望这些建议能帮助您调整心情，享受旅程！如果需要更多关于差旅的信息或建议，随时欢迎提问。
