In [None]:
"""
请在下面的配置区设置你的API_KEY，
以及在RETRAIN_MODEL设置是否要重新训练整个模型，
RETRAIN_MODEL为True代表重新训练，为None代表不训练
"""
import os

# =========================
# 配置区
# =========================
API_KEY = "..."

RETRAIN_MODEL = True

DATASET_PATH = "data/deceptive-opinion.csv"
MODEL_PATH = "model/SVM.joblib"
GRAPH_DIR = "graph"

# =========================
# 模块导入
# =========================
from src.model_training import train_and_save_model
from src.llm_image_comment_recognition import extract_reviews_from_folder
from src.llm_translate import translate_text_to_english
from src.model_inference import load_svm_pipeline, svm_predict
from src.llm_analysis import analyze_hotels_and_choose


# =========================
# 工具函数
# =========================
def label_to_zh(label: str) -> str:
    return "真实" if label == "truthful" else "虚假"


def score_desc_zh(label: str, score: float) -> str:
    if label == "truthful":
        return "越负代表越可信"
    else:
        return "越正代表越可信"


# =========================
# 模型准备
# =========================
def prepare_model():
    if RETRAIN_MODEL:
        print("[信息] 重新训练 SVM 模型中...")
        train_and_save_model(
            csv_path=DATASET_PATH,
            model_save_path=MODEL_PATH
        )
    else:
        print("[信息] 使用已有 SVM 模型")

    return load_svm_pipeline(MODEL_PATH)


# =========================
# OCR：图片 → 中文
# =========================
def extract_hotel_reviews():
    hotel_reviews = {}

    for hotel in os.listdir(GRAPH_DIR):
        hotel_path = os.path.join(GRAPH_DIR, hotel)
        if not os.path.isdir(hotel_path):
            continue

        print(f"[信息] 正在提取 {hotel} 的图片评论")

        results = extract_reviews_from_folder(
            api_key=API_KEY,
            folder_path=hotel_path,
            max_workers=4
        )

        zh_texts = []
        for _, texts in results:
            for t in texts:
                t = t.strip()
                if t and not t.startswith("[ERROR]"):
                    zh_texts.append(t)

        if zh_texts:
            hotel_reviews[hotel] = zh_texts

    return hotel_reviews


# =========================
# 主函数
# =========================
def main():
    pipe = prepare_model()
    hotel_reviews = extract_hotel_reviews()

    print("\n================ 单酒店判别结果（展示样本） ================\n")

    # ⭐ 给 LLM 的“完整聚合输入”
    analysis_inputs = {}

    for hotel, zh_list in hotel_reviews.items():

        hotel_all_results = []

        # ---- 每个酒店：处理所有评论 ----
        for zh in zh_list:
            en = translate_text_to_english(
                api_key=API_KEY,
                text=zh
            )

            pred = svm_predict([en], pipe)[0]

            hotel_all_results.append({
                "zh": zh,
                "label": pred["label"],
                "score": pred["score"]
            })

        # 保存给 LLM 用
        analysis_inputs[hotel] = hotel_all_results

        # ---- 终端只展示一条代表性评论 ----
        sample = hotel_all_results[0]

        print(f"[酒店] {hotel}")
        print(f"中文评论：{sample['zh']}")
        print(f"判定结果：{label_to_zh(sample['label'])}")
        print(
            f"置信度分数：{sample['score']:.2f}"
            f"（{score_desc_zh(sample['label'], sample['score'])}）"
        )
        print("-" * 60)

    # =========================
    # LLM 综合分析（多评论聚合）
    # =========================
    print("\n================ 大模型综合分析与最终决策 ================\n")

    report = analyze_hotels_and_choose(
        api_key=API_KEY,
        hotel_reviews=analysis_inputs,
        model="qwen3-max"
    )

    print(report)


if __name__ == "__main__":
    main()
