In [None]:
# Cell1:  Shared Cache Bootstrap
import os, pathlib, torch
import sys
from datetime import datetime

# Shared cache configuration (複製到每本 notebook)
AI_CACHE_ROOT = os.getenv("AI_CACHE_ROOT", "../ai_warehouse/cache")

for k, v in {
    "HF_HOME": f"{AI_CACHE_ROOT}/hf",
    "TRANSFORMERS_CACHE": f"{AI_CACHE_ROOT}/hf/transformers",
    "HF_DATASETS_CACHE": f"{AI_CACHE_ROOT}/hf/datasets",
    "HUGGINGFACE_HUB_CACHE": f"{AI_CACHE_ROOT}/hf/hub",
    "TORCH_HOME": f"{AI_CACHE_ROOT}/torch",
}.items():
    os.environ[k] = v
    pathlib.Path(v).mkdir(parents=True, exist_ok=True)
print("[Cache]", AI_CACHE_ROOT, "| GPU:", torch.cuda.is_available())

In [None]:
# Cell 2: 導入與環境檢查
import json, yaml, re
from typing import Dict, List, Tuple, Optional
from dataclasses import dataclass, asdict
from pathlib import Path
import difflib
from collections import defaultdict

# Import our agent components
import sys

sys.path.append("../shared_utils")
from agents.orchestrator import Orchestrator
from agents.blackboard import Blackboard
from adapters.llm_adapter import LLMAdapter

print("✓ Agent consistency & style module ready")

In [None]:
# Cell 3: Style Dictionary 與術語表
@dataclass
class StyleGuide:
    """Style guide with terminology and formatting rules"""

    name: str
    tone: str  # formal, casual, technical
    terminology: Dict[str, str]  # EN -> ZH mapping
    avoid_phrases: List[str]
    format_rules: List[str]
    citation_format: str = "brackets"  # [1], [2]

    @classmethod
    def from_yaml(cls, yaml_path: str) -> "StyleGuide":
        with open(yaml_path, "r", encoding="utf-8") as f:
            data = yaml.safe_load(f)
        return cls(**data)


@dataclass
class ConsistencyCheck:
    """Result of consistency checking"""

    passed: bool
    issues: List[str]
    suggestions: List[str]
    auto_fixes: Dict[str, str]  # original -> corrected


# Sample style guide
TECH_STYLE = StyleGuide(
    name="tech_writing",
    tone="formal-technical",
    terminology={
        "RAG": "檢索增強生成",
        "LLM": "大型語言模型",
        "embedding": "嵌入向量",
        "chunk": "文本片段",
        "groundedness": "有據可依性",
    },
    avoid_phrases=["以下是", "作為一個AI", "總的來說"],
    format_rules=[
        "Use Traditional Chinese for content",
        "Technical terms in parentheses: 檢索增強生成（RAG）",
        "Citations as [1], [2] with Sources section",
        "No more than 7 bullet points per list",
    ],
)

print("Style guide loaded:", TECH_STYLE.name)
print("Terminology entries:", len(TECH_STYLE.terminology))

In [None]:
# Cell 4: 術語一致性檢查器
class TerminologyChecker:
    """Check and enforce terminology consistency"""

    def __init__(self, style_guide: StyleGuide):
        self.style = style_guide
        self.term_patterns = self._build_patterns()

    def _build_patterns(self) -> Dict[str, re.Pattern]:
        """Build regex patterns for terminology detection"""
        patterns = {}
        for en_term, zh_term in self.style.terminology.items():
            # Pattern to find EN term not followed by ZH explanation
            pattern = rf"\b{re.escape(en_term)}\b(?!\s*[（(]{re.escape(zh_term)}[）)])"
            patterns[en_term] = re.compile(pattern, re.IGNORECASE)
        return patterns

    def check_terminology(self, text: str) -> List[Tuple[str, str, str]]:
        """Find terminology issues: (term, position, suggestion)"""
        issues = []
        for en_term, pattern in self.term_patterns.items():
            zh_term = self.style.terminology[en_term]
            matches = pattern.finditer(text)
            for match in matches:
                start, end = match.span()
                suggestion = f"{en_term}（{zh_term}）"
                issues.append((en_term, f"位置 {start}-{end}", suggestion))
        return issues

    def auto_fix_terminology(self, text: str) -> str:
        """Auto-fix terminology issues"""
        fixed_text = text
        for en_term, pattern in self.term_patterns.items():
            zh_term = self.style.terminology[en_term]
            replacement = f"{en_term}（{zh_term}）"
            fixed_text = pattern.sub(replacement, fixed_text)
        return fixed_text


# Test terminology checker
checker = TerminologyChecker(TECH_STYLE)
test_text = "我們使用 RAG 和 embedding 技術來改善 LLM 的回答品質。"

print("原文:", test_text)
issues = checker.check_terminology(test_text)
print("術語問題:", issues)
fixed = checker.auto_fix_terminology(test_text)
print("修正後:", fixed)

In [None]:
# Cell 5: 風格與格式檢查器
class StyleChecker:
    """Check style and formatting consistency"""

    def __init__(self, style_guide: StyleGuide):
        self.style = style_guide
        self.avoid_patterns = [
            re.compile(phrase, re.IGNORECASE) for phrase in style_guide.avoid_phrases
        ]

    def check_forbidden_phrases(self, text: str) -> List[str]:
        """Find forbidden phrases"""
        found = []
        for pattern in self.avoid_patterns:
            matches = pattern.findall(text)
            found.extend(matches)
        return found

    def check_citation_format(self, text: str) -> Tuple[bool, List[str]]:
        """Check if citations follow required format"""
        citation_pattern = re.compile(r"\[(\d+)\]")
        citations = citation_pattern.findall(text)

        issues = []
        has_sources_section = "來源" in text or "Sources" in text

        if citations and not has_sources_section:
            issues.append("有引用但缺少來源章節")

        # Check sequential numbering
        if citations:
            nums = [int(c) for c in citations]
            expected = list(range(1, len(set(nums)) + 1))
            if sorted(set(nums)) != expected:
                issues.append(f"引用編號不連續: {sorted(set(nums))}")

        return len(issues) == 0, issues

    def check_list_format(self, text: str) -> List[str]:
        """Check bullet point formatting"""
        issues = []

        # Count bullet points in lists
        bullet_sections = re.findall(
            r"(?:^|\n)(?:[-•*]\s+.+(?:\n|$))+", text, re.MULTILINE
        )
        for section in bullet_sections:
            bullets = re.findall(r"[-•*]\s+", section)
            if len(bullets) > 7:
                issues.append(f"列表項目過多: {len(bullets)} 項 (建議≤7)")

        return issues


# Test style checker
style_checker = StyleChecker(TECH_STYLE)
test_style_text = """以下是一些要點：
- RAG 技術很有用 [1]
- 以下是更多細節
- 第三點

作為一個AI，我認為這很重要。
"""

print("檢查禁用詞組:", style_checker.check_forbidden_phrases(test_style_text))
print("檢查引用格式:", style_checker.check_citation_format(test_style_text))
print("檢查列表格式:", style_checker.check_list_format(test_style_text))

In [None]:
# Cell 6: 一致性閘門整合
class ConsistencyGate:
    """Main consistency checking and auto-fixing gateway"""

    def __init__(self, style_guide: StyleGuide):
        self.style = style_guide
        self.term_checker = TerminologyChecker(style_guide)
        self.style_checker = StyleChecker(style_guide)

    def full_check(self, text: str, role: str = "writer") -> ConsistencyCheck:
        """Comprehensive consistency check"""
        issues = []
        suggestions = []
        auto_fixes = {}

        # 1. Terminology check
        term_issues = self.term_checker.check_terminology(text)
        if term_issues:
            issues.extend([f"術語: {issue[0]} {issue[1]}" for issue in term_issues])
            suggestions.append("建議添加中文說明")
            auto_fixes["terminology"] = self.term_checker.auto_fix_terminology(text)

        # 2. Forbidden phrases
        forbidden = self.style_checker.check_forbidden_phrases(text)
        if forbidden:
            issues.append(f"使用禁用詞組: {forbidden}")
            suggestions.append("避免使用AI口頭禪")

        # 3. Citation format
        citation_ok, citation_issues = self.style_checker.check_citation_format(text)
        if not citation_ok:
            issues.extend(citation_issues)
            suggestions.append("檢查引用格式與來源章節")

        # 4. List formatting
        list_issues = self.style_checker.check_list_format(text)
        if list_issues:
            issues.extend(list_issues)
            suggestions.append("簡化列表項目")

        passed = len(issues) == 0
        return ConsistencyCheck(passed, issues, suggestions, auto_fixes)

    def apply_auto_fixes(self, text: str) -> str:
        """Apply all possible auto-fixes"""
        fixed = text

        # Fix terminology
        fixed = self.term_checker.auto_fix_terminology(fixed)

        # Remove forbidden phrases (simple replacement)
        for phrase in self.style.avoid_phrases:
            fixed = re.sub(phrase, "", fixed, flags=re.IGNORECASE)

        # Clean up extra whitespace
        fixed = re.sub(r"\s+", " ", fixed).strip()

        return fixed


# Test consistency gate
gate = ConsistencyGate(TECH_STYLE)
test_content = """以下是關於 RAG 系統的分析：

作為一個AI，我認為 embedding 技術很重要。使用 LLM 可以改善結果。

參考資料顯示效果不錯 [1]。

來源:
[1] 某篇論文
"""

check_result = gate.full_check(test_content)
print("一致性檢查結果:")
print(f"通過: {check_result.passed}")
print(f"問題: {check_result.issues}")
print(f"建議: {check_result.suggestions}")

if check_result.auto_fixes:
    print("\n自動修正:")
    fixed = gate.apply_auto_fixes(test_content)
    print(fixed)

In [None]:
# Cell 7: 與多代理系統整合
class ConsistencyAwareOrchestrator(Orchestrator):
    """Extended orchestrator with consistency checking"""

    def __init__(self, llm_adapter: LLMAdapter, style_guide: StyleGuide):
        super().__init__(llm_adapter)
        self.consistency_gate = ConsistencyGate(style_guide)
        self.style_guide = style_guide

    def post_process_role_output(
        self, role: str, output: str, blackboard: Blackboard
    ) -> str:
        """Apply consistency checking after role completion"""

        # Skip consistency checking for researcher (fact-gathering phase)
        if role == "researcher":
            return output

        check_result = self.consistency_gate.full_check(output, role)

        # Log issues to blackboard
        if not check_result.passed:
            blackboard.append_log(
                f"consistency_issues_{role}",
                {
                    "issues": check_result.issues,
                    "suggestions": check_result.suggestions,
                },
            )

        # Auto-fix if possible
        if check_result.auto_fixes and role in ["writer", "reviewer"]:
            fixed_output = self.consistency_gate.apply_auto_fixes(output)
            blackboard.append_log(
                f"auto_fixed_{role}",
                {
                    "original_length": len(output),
                    "fixed_length": len(fixed_output),
                    "fixes_applied": list(check_result.auto_fixes.keys()),
                },
            )
            return fixed_output

        return output

    def style_aware_prompt(self, role: str, base_prompt: str) -> str:
        """Inject style guidelines into role prompts"""
        style_injection = f"""
Style Requirements:
- Tone: {self.style_guide.tone}
- Use these terms: {dict(list(self.style_guide.terminology.items())[:3])}
- Avoid: {', '.join(self.style_guide.avoid_phrases[:2])}
- Format: {'; '.join(self.style_guide.format_rules[:2])}

"""
        return style_injection + base_prompt


# Test with minimal LLM adapter (mock)
class MockLLMAdapter:
    def generate(self, messages, **kwargs):
        return (
            "這是一個關於 RAG 的測試回答，包含一些 embedding 概念。以下是要點：很有用。"
        )


mock_llm = MockLLMAdapter()
orchestrator = ConsistencyAwareOrchestrator(mock_llm, TECH_STYLE)

# Test blackboard integration
test_blackboard = Blackboard()
test_output = "作為一個AI，以下是關於 RAG 的分析。使用 embedding 很重要。"

processed = orchestrator.post_process_role_output(
    "writer", test_output, test_blackboard
)
print("處理前:", test_output)
print("處理後:", processed)
print("黑板日誌:", dict(test_blackboard.data))

In [None]:
# Cell 8: 風格切換與動態配置
class StyleManager:
    """Manage multiple style guides and dynamic switching"""

    def __init__(self, styles_dir: str = "../configs/styles"):
        self.styles_dir = Path(styles_dir)
        self.loaded_styles = {}
        self.current_style = None
        self._load_all_styles()

    def _load_all_styles(self):
        """Load all YAML style files"""
        if not self.styles_dir.exists():
            print(f"Warning: Styles directory {self.styles_dir} not found")
            return

        for style_file in self.styles_dir.glob("*.yaml"):
            try:
                style = StyleGuide.from_yaml(str(style_file))
                self.loaded_styles[style.name] = style
                print(f"Loaded style: {style.name}")
            except Exception as e:
                print(f"Failed to load {style_file}: {e}")

    def set_style(self, style_name: str) -> bool:
        """Switch to a different style"""
        if style_name in self.loaded_styles:
            self.current_style = self.loaded_styles[style_name]
            return True
        return False

    def get_available_styles(self) -> List[str]:
        """Get list of available style names"""
        return list(self.loaded_styles.keys())

    def create_consistency_gate(self) -> Optional[ConsistencyGate]:
        """Create consistency gate with current style"""
        if self.current_style:
            return ConsistencyGate(self.current_style)
        return None


# Create sample style files structure (simulation)
sample_styles_dir = Path("../configs/styles")
sample_styles_dir.mkdir(parents=True, exist_ok=True)

# Create a sample education style
edu_style_data = {
    "name": "education",
    "tone": "friendly-instructive",
    "terminology": {"AI": "人工智慧", "ML": "機器學習", "model": "模型"},
    "avoid_phrases": ["很顯然", "毫無疑問"],
    "format_rules": [
        "Use examples after concepts",
        "Keep paragraphs under 100 characters",
    ],
}

with open(sample_styles_dir / "education.yaml", "w", encoding="utf-8") as f:
    yaml.dump(edu_style_data, f, allow_unicode=True)

# Test style manager
style_manager = StyleManager("../configs/styles")
print("可用風格:", style_manager.get_available_styles())

if style_manager.set_style("education"):
    print("切換到教育風格成功")
    edu_gate = style_manager.create_consistency_gate()
    if edu_gate:
        test_edu = "很顯然 AI 模型很有用。"
        edu_check = edu_gate.full_check(test_edu)
        print("教育風格檢查:", edu_check.issues)

In [None]:
# Cell 9: Smoke Test - 端到端一致性檢查
def smoke_test_consistency_system():
    """Smoke test: Full consistency checking pipeline"""

    print("=== 一致性系統煙霧測試 ===")

    # 1. Create test content with multiple issues
    test_content = """以下是關於 RAG 系統的研究報告：

作為一個AI助手，我分析了 embedding 和 LLM 的應用。總的來說，效果很好。

主要發現：
- RAG 技術有效 [1]
- embedding 改善檢索 [2]
- 更多要點1
- 更多要點2
- 更多要點3
- 更多要點4
- 更多要點5
- 更多要點6
- 更多要點7
- 更多要點8

參考文獻 [3] 顯示良好結果。
"""

    # 2. Initialize consistency system
    gate = ConsistencyGate(TECH_STYLE)

    # 3. Run full check
    result = gate.full_check(test_content, "writer")

    print(f"✓ 一致性檢查完成")
    print(f"  通過: {result.passed}")
    print(f"  發現問題: {len(result.issues)}")
    print(f"  問題詳情: {result.issues[:3]}...")  # Show first 3

    # 4. Apply auto-fixes
    if result.auto_fixes:
        fixed_content = gate.apply_auto_fixes(test_content)
        print(f"✓ 自動修正應用")
        print(f"  原文長度: {len(test_content)} 字元")
        print(f"  修正後: {len(fixed_content)} 字元")

        # Show diff of key changes
        changes = []
        if "RAG" in test_content and "RAG（檢索增強生成）" in fixed_content:
            changes.append("RAG → RAG（檢索增強生成）")
        if changes:
            print(f"  主要變更: {'; '.join(changes)}")

    # 5. Test style switching
    style_manager = StyleManager("../configs/styles")
    available = style_manager.get_available_styles()
    print(f"✓ 風格管理器就緒，可用風格: {len(available)}")

    return {
        "consistency_check": not result.passed,  # Should find issues
        "auto_fix_available": bool(result.auto_fixes),
        "style_manager_loaded": len(available) > 0,
    }


# Run smoke test
smoke_results = smoke_test_consistency_system()
print("\n煙霧測試結果:", smoke_results)
print(
    "✅ 所有核心功能正常運作" if all(smoke_results.values()) else "⚠️  部分功能需要檢查"
)

In [None]:
# Key Parameters (低顯存選項)
# 一致性檢查參數
CONSISTENCY_CONFIG = {
    "terminology_strict": True,  # 嚴格術語檢查
    "auto_fix_enabled": True,  # 啟用自動修正
    "max_issues_before_reject": 10,  # 最大問題數量閾值
    "style_injection_mode": "minimal",  # minimal/full 風格注入
}

# 記憶體優化
MEMORY_CONFIG = {
    "cache_compiled_patterns": True,  # 快取正規表示式
    "batch_check_size": 50,  # 批量檢查大小
    "max_text_length": 4000,  # 最大文本長度限制
}

In [None]:
# Smoke Test Summary
# 一致性檢查參數
CONSISTENCY_CONFIG = {
    "terminology_strict": True,  # 嚴格術語檢查
    "auto_fix_enabled": True,  # 啟用自動修正
    "max_issues_before_reject": 10,  # 最大問題數量閾值
    "style_injection_mode": "minimal",  # minimal/full 風格注入
}

# 記憶體優化
MEMORY_CONFIG = {
    "cache_compiled_patterns": True,  # 快取正規表示式
    "batch_check_size": 50,  # 批量檢查大小
    "max_text_length": 4000,  # 最大文本長度限制
}