In [None]:
!pip install -q transformers accelerate gradio torch

Model page: https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat

⚠️ If the generated code snippets do not work, please open an issue on either the model repo and/or on huggingface.js 🙏

In [None]:
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = torch.float16 if device == "cuda" else torch.float32

In [None]:
import os
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# 自动选择设备和精度
device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = torch.float16 if device == "cuda" else torch.float32

# 模型名称
model_name = "Qwen/Qwen1.5-1.8B-Chat"
hf_token = os.getenv("HF_TOKEN")  # 可选：如果你有私有模型访问权限

# 加载 tokenizer 和模型
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=hf_token, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto" if device == "cuda" else None,
    torch_dtype=dtype,
    use_auth_token=hf_token,
    trust_remote_code=True
).to(device)

# 聊天函数（支持多轮）
def chat(message, history):
    try:
        # 将 history 从元组转换为 OpenAI 风格格式
        messages = []
        for user_msg, bot_msg in history:
            messages.append({"role": "user", "content": user_msg})
            messages.append({"role": "assistant", "content": bot_msg})

        # 添加当前用户输入
        messages.append({"role": "user", "content": message})

        # 构建输入文本
        prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        inputs = tokenizer(prompt, return_tensors="pt").to(device)

        # 生成回复
        outputs = model.generate(
            input_ids=inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            max_new_tokens=512,
            do_sample=True,
            temperature=0.7
        )

        response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

        # 更新历史（仍使用元组格式供 Gradio 显示）
        history.append((message, response))
        return "", history, history

    except Exception as e:
        print(f"❌ 错误信息：{e}")
        error_msg = f"发生错误：{str(e)}"
        history.append((message, error_msg))
        return "", history, history

# Gradio 界面
with gr.Blocks() as demo:
    gr.Markdown("## 🤖 Qwen1.5-1.8B 中文 Chatbot")
    chatbot = gr.Chatbot(label="对话记录", type="tuples")
    msg = gr.Textbox(label="请输入你的问题", placeholder="例如：你好，今天天气怎么样？", lines=2)
    submit_btn = gr.Button("提交")
    state = gr.State([])

    submit_btn.click(chat, [msg, state], [msg, chatbot, state])
    msg.submit(chat, [msg, state], [msg, chatbot, state])

demo.launch(debug=True)