## 加载LLM模型

In [None]:
from modelscope import AutoModelForCausalLM, AutoTokenizer
if torch.cuda.is_available():
    device = torch.device('cuda')
    print('llm using cuda')
else:
    device = torch.device('cpu')  
    print('llm using cpu')  

llm_model = AutoModelForCausalLM.from_pretrained(
    "qwen/Qwen1.5-4B-Chat",
    device_map="auto"
).to(device)
tokenizer = AutoTokenizer.from_pretrained("qwen/Qwen1.5-4B-Chat")


## 调用LLM模型

In [None]:
prompt = "请根据以下背景来回答问题：上海有什么特色小吃？\n背景：上海的特色小吃有麻辣烫和糖葫芦。武汉的特色小吃有热干面，烤鸭"
messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)
print('增加模板后的输入:', text)

model_inputs = tokenizer([text], return_tensors="pt").to(device)
print('tokenizer后的模型输入:', model_inputs)

# 模型计算并输出
generated_ids = model.generate(
    model_inputs.input_ids,
    max_new_tokens=512)
generated_ids = [
    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]

response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]  # 提取输出结果
print('模型返回结果:', response)