# LIME 手動中文空格分詞版本

In [None]:
from lime.lime_text import LimeTextExplainer
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import numpy as np

# 加載模型和分詞器
peft_model_id = "CMLM/ZhongJing-2-1_8b"
base_model_id = "Qwen/Qwen1.5-1.8B-Chat"
model = AutoModelForCausalLM.from_pretrained(base_model_id, device_map="auto")
model.load_adapter(peft_model_id)
tokenizer = AutoTokenizer.from_pretrained(
    "CMLM/ZhongJing-2-1_8b", padding_side="right", trust_remote_code=True, pad_token=""
)


def predictor(texts):
    results = []
    for text in texts:
        # 創建消息格式
        messages = [
            {"role": "system", "content": "You are a helpful medical assistant."},
            {"role": "user", "content": text},
        ]

        # 準備輸入
        formatted_text = tokenizer.apply_chat_template(
            messages, tokenize=False, add_generation_prompt=True
        )
        inputs = tokenizer([formatted_text], return_tensors="pt").to(model.device)

        with torch.no_grad():
            # 獲取模型輸出
            outputs = model(**inputs)
            logits = outputs.logits

            # 計算最後一個token的概率分布
            probs = torch.softmax(logits[0, -1], dim=-1)

            # 取前兩個最高概率
            top_probs = torch.topk(probs, 2)
            prob_array = np.array(
                [top_probs.values[0].item(), top_probs.values[1].item()]
            )

            # 標準化概率
            prob_array = prob_array / prob_array.sum()
            results.append(prob_array)

    return np.array(results)


# 創建解釋器
explainer = LimeTextExplainer(
    class_names=["中医相关", "非中医相关"],
    split_expression=lambda x: x.split(),  # 使用空格分詞
    bow=True,
    random_state=42,
)

# 測試文本（使用中文）
test_text = "喉咙痛 的 中医 疗法"

# 生成解釋
explanation = explainer.explain_instance(
    text_instance=test_text,
    classifier_fn=predictor,
    num_features=10,
    num_samples=1000,
    top_labels=1,
)

# 顯示解釋結果
print("\n=== 詞語重要性分析 ===")
for word, importance in explanation.as_list(label=0):
    print(f"詞語: {word:<20} 影響度: {importance:.4f}")

# 生成HTML可視化
html = explanation.as_html()
with open("zhongjing_explanation.html", "w", encoding="utf-8") as f:
    f.write(html)