In [None]:
# 安装必要库
!pip install transformers accelerate bitsandbytes gradio

# 登录 Hugging Face（如果你还没登录）
from huggingface_hub import login
login(new_session=False)

# 加载模型和 tokenizer（使用 4-bit 量化以节省显存）
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
import torch

model_id = "mistralai/Mistral-7B-Instruct-v0.2"

# 设置量化配置以提升推理速度
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

# 加载 tokenizer 和模型
tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=True)
tokenizer.pad_token = tokenizer.eos_token  # 避免 pad_token 警告

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.float16,
    quantization_config=bnb_config,
    use_auth_token=True
)

# 创建对话 pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

# 定义对话函数（支持多轮对话）
def chat_interface(user_input, history):
    # 构建 prompt（包含历史）
    prompt = "<s>"
    for i in range(0, len(history), 2):
        user_msg = history[i]["content"]
        assistant_msg = history[i+1]["content"] if i+1 < len(history) else ""
        prompt += f"[INST] {user_msg} [/INST] {assistant_msg} "
    prompt += f"[INST] {user_input} [/INST]"

    # 模型生成
    output = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7)[0]['generated_text']
    response = output.split("[/INST]")[-1].strip()

    # 更新历史
    history.append({"role": "user", "content": user_input})
    history.append({"role": "assistant", "content": response})
    return history, history

# 创建 Gradio 界面
import gradio as gr

with gr.Blocks() as demo:
    gr.Markdown("## 🤖 Mistral-7B 对话机器人")
    chatbot = gr.Chatbot(label="对话窗口", type="messages")
    msg = gr.Textbox(label="请输入你的问题")
    clear = gr.Button("清除对话")
    
    state = gr.State([])

    msg.submit(chat_interface, [msg, state], [chatbot, state])
    clear.click(lambda: ([], []), None, [chatbot, state])

# 启动界面
demo.launch(share=True, debug=True)
