In [None]:
# Cell1:  Shared Cache Bootstrap
import os, pathlib, torch
import sys
from datetime import datetime

# Shared cache configuration (複製到每本 notebook)
AI_CACHE_ROOT = os.getenv("AI_CACHE_ROOT", "../ai_warehouse/cache")

for k, v in {
    "HF_HOME": f"{AI_CACHE_ROOT}/hf",
    "TRANSFORMERS_CACHE": f"{AI_CACHE_ROOT}/hf/transformers",
    "HF_DATASETS_CACHE": f"{AI_CACHE_ROOT}/hf/datasets",
    "HUGGINGFACE_HUB_CACHE": f"{AI_CACHE_ROOT}/hf/hub",
    "TORCH_HOME": f"{AI_CACHE_ROOT}/torch",
}.items():
    os.environ[k] = v
    pathlib.Path(v).mkdir(parents=True, exist_ok=True)
print("[Cache]", AI_CACHE_ROOT, "| GPU:", torch.cuda.is_available())

In [None]:
# Cell 2: World KB Schema and Sample Data
import json
import re
from pathlib import Path
from typing import Dict, List, Optional
from pydantic import BaseModel, Field
from opencc import OpenCC


# Game world schema
class Character(BaseModel):
    name: str
    title: Optional[str] = None
    description: str
    attributes: Dict[str, int] = Field(default_factory=dict)
    background: str
    relationships: List[str] = Field(default_factory=list)


class Location(BaseModel):
    name: str
    type: str  # city, dungeon, wilderness, etc.
    description: str
    connections: List[str] = Field(default_factory=list)
    features: List[str] = Field(default_factory=list)
    dangers: List[str] = Field(default_factory=list)


class Item(BaseModel):
    name: str
    type: str  # weapon, armor, consumable, quest, etc.
    description: str
    properties: Dict[str, str] = Field(default_factory=dict)
    rarity: str = "common"


class Event(BaseModel):
    name: str
    type: str  # story, random, quest, etc.
    description: str
    triggers: List[str] = Field(default_factory=list)
    outcomes: List[str] = Field(default_factory=list)
    requirements: Dict[str, str] = Field(default_factory=dict)


# Create sample world data
def create_sample_world_data():
    """Create sample world KB data for testing"""

    # Characters
    characters = [
        Character(
            name="艾莉亞",
            title="星空法師",
            description="掌握古老星象魔法的年輕法師，擁有預見未來的能力。",
            attributes={"智力": 18, "魔力": 16, "體力": 12},
            background="出身於古老的星象法師家族，自幼習得觀星占卜之術。在一次意外中獲得了預見未來的能力，但代價是每次使用都會縮短壽命。",
            relationships=["導師：賢者萬德", "宿敵：暗影領主"],
        ),
        Character(
            name="雷克斯",
            title="鋼鐵騎士",
            description="身穿重甲的正義騎士，以保護無辜為己任。",
            attributes={"力量": 17, "體力": 19, "意志": 15},
            background="曾經是王國精銳騎士團的隊長，在一場政變中失去了國王的信任。現在作為獨行俠游走各地，尋找重新證明自己的機會。",
            relationships=["舊友：宮廷侍衛長", "仇敵：叛變公爵"],
        ),
    ]

    # Locations
    locations = [
        Location(
            name="翡翠森林",
            type="wilderness",
            description="古老而神秘的森林，據說是精靈族的故鄉。樹木高聳入云，陽光透過樹葉灑下斑駁的光影。",
            connections=["銀月城", "古墓迷宮", "水晶湖"],
            features=["精靈遺跡", "古老智慧樹", "魔法泉水"],
            dangers=["迷路風險", "野生魔獸", "精靈陷阱"],
        ),
        Location(
            name="銀月城",
            type="city",
            description="建立在高山上的雄偉城市，以其銀白色的城牆和尖塔聞名。城中心有一座古老的魔法塔。",
            connections=["翡翠森林", "荒野平原", "地下城"],
            features=["魔法學院", "冒險者公會", "皇家圖書館"],
            dangers=["政治陰謀", "盜賊公會", "魔法實驗意外"],
        ),
    ]

    # Items
    items = [
        Item(
            name="星辰法杖",
            type="weapon",
            description="鑲嵌著古老星石的法杖，能夠引導星空的力量。在夜晚使用時威力倍增。",
            properties={"魔法攻擊": "+15", "星象魔法": "+3", "夜晚加成": "雙倍威力"},
            rarity="legendary",
        ),
        Item(
            name="鋼鐵意志護符",
            type="accessory",
            description="古老騎士留下的護符，能夠增強佩戴者的意志力和勇氣。",
            properties={"意志": "+2", "恐懼抗性": "免疫", "正義感": "+1"},
            rarity="rare",
        ),
    ]

    # Events
    events = [
        Event(
            name="古老預言的實現",
            type="story",
            description="星空中出現了古老預言記載的星象，預示著重大變化即將來臨。",
            triggers=["進入翡翠森林", "與艾莉亞對話", "夜晚時刻"],
            outcomes=["獲得預言線索", "解鎖隱藏任務", "提升魔法能力"],
            requirements={"智力": "15+", "魔法親和": "中等以上"},
        )
    ]

    return {
        "characters": [c.dict() for c in characters],
        "locations": [l.dict() for l in locations],
        "items": [i.dict() for i in items],
        "events": [e.dict() for e in events],
    }


# Create and save sample data
sample_data = create_sample_world_data()
Path("data/world_kb").mkdir(parents=True, exist_ok=True)

with open("data/world_kb/world_data.json", "w", encoding="utf-8") as f:
    json.dump(sample_data, f, ensure_ascii=False, indent=2)

print("✓ Created sample world KB data")
print(f"Characters: {len(sample_data['characters'])}")
print(f"Locations: {len(sample_data['locations'])}")
print(f"Items: {len(sample_data['items'])}")
print(f"Events: {len(sample_data['events'])}")

In [None]:
# Cell 3: World KB Document Processor
class WorldKBProcessor:
    """Process world KB documents into chunks for RAG indexing"""

    def __init__(self, language="zh-tw"):
        self.language = language
        self.cc = OpenCC("s2t" if language == "zh-tw" else "t2s")

    def process_character(self, char_data: Dict) -> List[Dict]:
        """Convert character data to searchable chunks"""
        chunks = []

        # Basic info chunk
        basic_info = f"角色：{char_data['name']}\n"
        if char_data.get("title"):
            basic_info += f"稱號：{char_data['title']}\n"
        basic_info += f"描述：{char_data['description']}\n"

        # Attributes chunk
        if char_data.get("attributes"):
            attrs = "屬性：" + "、".join(
                [f"{k}{v}" for k, v in char_data["attributes"].items()]
            )
            basic_info += attrs + "\n"

        chunks.append(
            {
                "text": basic_info.strip(),
                "meta": {
                    "type": "character",
                    "name": char_data["name"],
                    "category": "basic_info",
                    "source": "world_kb",
                },
            }
        )

        # Background chunk
        if char_data.get("background"):
            chunks.append(
                {
                    "text": f"角色背景：{char_data['name']}\n{char_data['background']}",
                    "meta": {
                        "type": "character",
                        "name": char_data["name"],
                        "category": "background",
                        "source": "world_kb",
                    },
                }
            )

        # Relationships chunk
        if char_data.get("relationships"):
            rel_text = f"人物關係：{char_data['name']}\n" + "\n".join(
                char_data["relationships"]
            )
            chunks.append(
                {
                    "text": rel_text,
                    "meta": {
                        "type": "character",
                        "name": char_data["name"],
                        "category": "relationships",
                        "source": "world_kb",
                    },
                }
            )

        return chunks

    def process_location(self, loc_data: Dict) -> List[Dict]:
        """Convert location data to searchable chunks"""
        chunks = []

        # Basic info chunk
        basic_info = f"地點：{loc_data['name']}\n"
        basic_info += f"類型：{loc_data['type']}\n"
        basic_info += f"描述：{loc_data['description']}\n"

        chunks.append(
            {
                "text": basic_info.strip(),
                "meta": {
                    "type": "location",
                    "name": loc_data["name"],
                    "category": "basic_info",
                    "source": "world_kb",
                },
            }
        )

        # Connections chunk
        if loc_data.get("connections"):
            conn_text = f"地點連接：{loc_data['name']}\n可前往：" + "、".join(
                loc_data["connections"]
            )
            chunks.append(
                {
                    "text": conn_text,
                    "meta": {
                        "type": "location",
                        "name": loc_data["name"],
                        "category": "connections",
                        "source": "world_kb",
                    },
                }
            )

        # Features and dangers chunk
        features_text = f"地點特色：{loc_data['name']}\n"
        if loc_data.get("features"):
            features_text += "特色：" + "、".join(loc_data["features"]) + "\n"
        if loc_data.get("dangers"):
            features_text += "危險：" + "、".join(loc_data["dangers"])

        if len(features_text.strip()) > len(f"地點特色：{loc_data['name']}"):
            chunks.append(
                {
                    "text": features_text.strip(),
                    "meta": {
                        "type": "location",
                        "name": loc_data["name"],
                        "category": "features",
                        "source": "world_kb",
                    },
                }
            )

        return chunks

    def process_item(self, item_data: Dict) -> List[Dict]:
        """Convert item data to searchable chunks"""
        item_text = f"物品：{item_data['name']}\n"
        item_text += f"類型：{item_data['type']}\n"
        item_text += f"稀有度：{item_data['rarity']}\n"
        item_text += f"描述：{item_data['description']}\n"

        if item_data.get("properties"):
            props = "屬性：" + "、".join(
                [f"{k}:{v}" for k, v in item_data["properties"].items()]
            )
            item_text += props

        return [
            {
                "text": item_text.strip(),
                "meta": {
                    "type": "item",
                    "name": item_data["name"],
                    "category": "full_info",
                    "source": "world_kb",
                },
            }
        ]

    def process_event(self, event_data: Dict) -> List[Dict]:
        """Convert event data to searchable chunks"""
        event_text = f"事件：{event_data['name']}\n"
        event_text += f"類型：{event_data['type']}\n"
        event_text += f"描述：{event_data['description']}\n"

        if event_data.get("triggers"):
            event_text += "觸發條件：" + "、".join(event_data["triggers"]) + "\n"
        if event_data.get("outcomes"):
            event_text += "可能結果：" + "、".join(event_data["outcomes"]) + "\n"
        if event_data.get("requirements"):
            reqs = "需求：" + "、".join(
                [f"{k}:{v}" for k, v in event_data["requirements"].items()]
            )
            event_text += reqs

        return [
            {
                "text": event_text.strip(),
                "meta": {
                    "type": "event",
                    "name": event_data["name"],
                    "category": "full_info",
                    "source": "world_kb",
                },
            }
        ]

    def process_world_data(self, world_data: Dict) -> List[Dict]:
        """Process complete world data into chunks"""
        all_chunks = []

        # Process each category
        for char in world_data.get("characters", []):
            all_chunks.extend(self.process_character(char))

        for loc in world_data.get("locations", []):
            all_chunks.extend(self.process_location(loc))

        for item in world_data.get("items", []):
            all_chunks.extend(self.process_item(item))

        for event in world_data.get("events", []):
            all_chunks.extend(self.process_event(event))

        return all_chunks


# Test the processor
processor = WorldKBProcessor()
chunks = processor.process_world_data(sample_data)

print(f"✓ Processed world data into {len(chunks)} chunks")
for i, chunk in enumerate(chunks[:3]):
    print(f"\nChunk {i+1} ({chunk['meta']['type']} - {chunk['meta']['category']}):")
    print(chunk["text"][:100] + "..." if len(chunk["text"]) > 100 else chunk["text"])

In [None]:
# Cell 4: Build FAISS Index for World KB
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np


class WorldKBIndex:
    """FAISS index specifically for game world knowledge base"""

    def __init__(self, model_name="BAAI/bge-m3"):
        print(f"Loading embedding model: {model_name}")
        self.model = SentenceTransformer(model_name)
        self.index = None
        self.chunks = []

    def build_index(self, chunks: List[Dict]):
        """Build FAISS index from world KB chunks"""
        print(f"Building index for {len(chunks)} chunks...")

        # Extract texts and compute embeddings
        texts = [chunk["text"] for chunk in chunks]
        print("Computing embeddings...")
        embeddings = self.model.encode(
            texts, normalize_embeddings=True, batch_size=16, show_progress_bar=True
        ).astype("float32")

        # Create FAISS index
        dimension = embeddings.shape[1]
        self.index = faiss.IndexFlatIP(
            dimension
        )  # Inner product for normalized vectors
        self.index.add(embeddings)

        # Store chunks for retrieval
        self.chunks = chunks

        print(
            f"✓ Built index with {self.index.ntotal} vectors (dimension: {dimension})"
        )
        return self.index

    def search(
        self, query: str, k: int = 5, filter_type: Optional[str] = None
    ) -> List[Dict]:
        """Search world KB with optional type filtering"""
        if self.index is None:
            raise ValueError("Index not built yet. Call build_index() first.")

        # Compute query embedding
        query_embedding = self.model.encode([query], normalize_embeddings=True).astype(
            "float32"
        )

        # Search index
        scores, indices = self.index.search(
            query_embedding, min(k * 3, len(self.chunks))
        )

        # Get results with metadata
        results = []
        for score, idx in zip(scores[0], indices[0]):
            if idx == -1:  # Invalid index
                continue

            chunk = self.chunks[idx]

            # Apply type filter if specified
            if filter_type and chunk["meta"]["type"] != filter_type:
                continue

            results.append(
                {"text": chunk["text"], "meta": chunk["meta"], "score": float(score)}
            )

            if len(results) >= k:
                break

        return results

    def save_index(self, index_path: str, chunks_path: str):
        """Save index and chunks to disk"""
        if self.index is None:
            raise ValueError("No index to save")

        # Save FAISS index
        faiss.write_index(self.index, index_path)

        # Save chunks as JSONL
        with open(chunks_path, "w", encoding="utf-8") as f:
            for chunk in self.chunks:
                f.write(json.dumps(chunk, ensure_ascii=False) + "\n")

        print(f"✓ Saved index to {index_path}")
        print(f"✓ Saved chunks to {chunks_path}")

    def load_index(self, index_path: str, chunks_path: str):
        """Load index and chunks from disk"""
        # Load FAISS index
        self.index = faiss.read_index(index_path)

        # Load chunks
        self.chunks = []
        with open(chunks_path, "r", encoding="utf-8") as f:
            for line in f:
                self.chunks.append(json.loads(line.strip()))

        print(f"✓ Loaded index with {self.index.ntotal} vectors")
        print(f"✓ Loaded {len(self.chunks)} chunks")


# Build the world KB index
kb_index = WorldKBIndex()
kb_index.build_index(chunks)

# Save to indices folder
Path("indices").mkdir(exist_ok=True)
kb_index.save_index("indices/world_kb.faiss", "indices/world_kb_chunks.jsonl")

print("\n=== World KB Index Ready ===")

In [None]:
# Cell 5: World KB Retrieval System
class WorldKBRetriever:
    """High-level retrieval system for game world knowledge"""

    def __init__(self, index_path: str, chunks_path: str):
        self.kb_index = WorldKBIndex()
        self.kb_index.load_index(index_path, chunks_path)

    def get_character_info(self, character_name: str) -> Dict:
        """Get comprehensive character information"""
        results = self.kb_index.search(
            f"角色 {character_name}", k=10, filter_type="character"
        )

        # Group by category
        info = {"basic_info": [], "background": [], "relationships": []}

        for result in results:
            category = result["meta"]["category"]
            if category in info:
                info[category].append(result)

        return info

    def get_location_info(self, location_name: str) -> Dict:
        """Get comprehensive location information"""
        results = self.kb_index.search(
            f"地點 {location_name}", k=10, filter_type="location"
        )

        info = {"basic_info": [], "connections": [], "features": []}

        for result in results:
            category = result["meta"]["category"]
            if category in info:
                info[category].append(result)

        return info

    def search_items_by_type(self, item_type: str) -> List[Dict]:
        """Search items by type"""
        return self.kb_index.search(f"物品 類型 {item_type}", k=10, filter_type="item")

    def find_related_events(self, context: str) -> List[Dict]:
        """Find events related to given context"""
        return self.kb_index.search(f"事件 {context}", k=5, filter_type="event")

    def contextual_search(self, query: str, k: int = 5) -> List[Dict]:
        """General contextual search across all world knowledge"""
        return self.kb_index.search(query, k=k)


# Test the retrieval system
retriever = WorldKBRetriever("indices/world_kb.faiss", "indices/world_kb_chunks.jsonl")

print("=== Testing World KB Retrieval ===\n")

# Test character lookup
char_info = retriever.get_character_info("艾莉亞")
print("Character Info for 艾莉亞:")
for category, results in char_info.items():
    if results:
        print(f"  {category}: {len(results)} results")
        print(f"    {results[0]['text'][:80]}...")

print()

# Test location lookup
loc_info = retriever.get_location_info("翡翠森林")
print("Location Info for 翡翠森林:")
for category, results in loc_info.items():
    if results:
        print(f"  {category}: {len(results)} results")

print()

# Test contextual search
context_results = retriever.contextual_search("魔法 法師")
print("Contextual search for '魔法 法師':")
for i, result in enumerate(context_results[:2]):
    print(
        f"  {i+1}. [{result['meta']['type']}] {result['text'][:60]}... (score: {result['score']:.3f})"
    )

In [None]:
# Cell 6: Game Event Generator with RAG
class GameEventGenerator:
    """Generate dynamic game events using world KB knowledge"""

    def __init__(self, retriever: WorldKBRetriever):
        self.retriever = retriever

    def generate_location_event(self, location_name: str, player_context: Dict) -> Dict:
        """Generate an event for a specific location"""

        # Get location info from KB
        loc_info = self.retriever.get_location_info(location_name)

        # Build context from KB
        context_parts = []
        for category, results in loc_info.items():
            for result in results:
                context_parts.append(result["text"])

        location_context = "\n".join(context_parts[:3])  # Limit context size

        # Find related events
        related_events = self.retriever.find_related_events(location_name)
        event_context = ""
        if related_events:
            event_context = related_events[0]["text"]

        # Simple event generation logic (in real game, this would use LLM)
        event_data = {
            "location": location_name,
            "context_used": {
                "location_info": len(context_parts),
                "related_events": len(related_events),
            },
            "event_prompt": f"""
基於以下世界知識生成事件：

地點信息：
{location_context}

相關事件：
{event_context}

玩家狀態：
- 等級：{player_context.get('level', 1)}
- 職業：{player_context.get('class', '冒險者')}
- 當前位置：{location_name}

請生成一個適合此情境的遊戲事件。
""".strip(),
        }

        return event_data

    def generate_character_encounter(self, character_name: str) -> Dict:
        """Generate encounter with specific character"""

        char_info = self.retriever.get_character_info(character_name)

        # Collect character context
        context_parts = []
        for category, results in char_info.items():
            for result in results:
                context_parts.append(result["text"])

        character_context = "\n".join(context_parts[:2])

        encounter_data = {
            "character": character_name,
            "context_used": {"character_info": len(context_parts)},
            "encounter_prompt": f"""
基於以下角色信息生成遭遇事件：

角色信息：
{character_context}

生成與此角色的互動事件，包括可能的對話選項和結果。
""".strip(),
        }

        return encounter_data


# Test event generation
event_gen = GameEventGenerator(retriever)

print("=== Testing Event Generation ===\n")

# Test location event
player_context = {"level": 3, "class": "法師"}
location_event = event_gen.generate_location_event("翡翠森林", player_context)
print("Location Event for 翡翠森林:")
print(f"Context used: {location_event['context_used']}")
print(f"Prompt length: {len(location_event['event_prompt'])} characters")

print()

# Test character encounter
char_encounter = event_gen.generate_character_encounter("艾莉亞")
print("Character Encounter for 艾莉亞:")
print(f"Context used: {char_encounter['context_used']}")
print(f"Prompt length: {len(char_encounter['encounter_prompt'])} characters")

In [None]:
# Cell 7: Smoke Test - Complete World KB Pipeline
def smoke_test_world_kb():
    """Comprehensive smoke test for world KB system"""

    print("🧪 World KB Smoke Test")
    print("=" * 50)

    # Test 1: Data creation
    print("\n1. Testing sample data creation...")
    data = create_sample_world_data()
    assert len(data["characters"]) >= 2, "Should have at least 2 characters"
    assert len(data["locations"]) >= 2, "Should have at least 2 locations"
    print("✓ Sample data created successfully")

    # Test 2: Document processing
    print("\n2. Testing document processing...")
    processor = WorldKBProcessor()
    chunks = processor.process_world_data(data)
    assert len(chunks) > 0, "Should generate chunks"
    assert all(
        "text" in chunk and "meta" in chunk for chunk in chunks
    ), "Chunks should have text and meta"
    print(f"✓ Generated {len(chunks)} chunks")

    # Test 3: Index building
    print("\n3. Testing index building...")
    kb_index = WorldKBIndex()
    index = kb_index.build_index(chunks)
    assert index.ntotal == len(chunks), "Index should contain all chunks"
    print(f"✓ Built index with {index.ntotal} vectors")

    # Test 4: Search functionality
    print("\n4. Testing search functionality...")
    results = kb_index.search("艾莉亞", k=3)
    assert len(results) > 0, "Should find results for character search"
    assert all("score" in r for r in results), "Results should have scores"
    print(f"✓ Found {len(results)} results for character search")

    # Test 5: Retrieval system
    print("\n5. Testing retrieval system...")
    # Create temporary files for testing
    kb_index.save_index("test_world.faiss", "test_world_chunks.jsonl")
    retriever = WorldKBRetriever("test_world.faiss", "test_world_chunks.jsonl")

    char_info = retriever.get_character_info("艾莉亞")
    assert any(
        len(info) > 0 for info in char_info.values()
    ), "Should find character info"
    print("✓ Character retrieval working")

    # Test 6: Event generation
    print("\n6. Testing event generation...")
    event_gen = GameEventGenerator(retriever)
    location_event = event_gen.generate_location_event("翡翠森林", {"level": 1})
    assert "event_prompt" in location_event, "Should generate event prompt"
    assert (
        len(location_event["event_prompt"]) > 100
    ), "Event prompt should be substantial"
    print("✓ Event generation working")

    # Cleanup
    import os

    try:
        os.remove("test_world.faiss")
        os.remove("test_world_chunks.jsonl")
    except:
        pass

    print("\n🎉 All tests passed! World KB system is ready.")
    return True


# Run smoke test
smoke_test_world_kb()

In [None]:
# Cell 8: Integration Example - World Query System
class WorldQuerySystem:
    """Complete query system for game world information"""

    def __init__(self, retriever: WorldKBRetriever):
        self.retriever = retriever

    def answer_world_question(self, question: str) -> Dict:
        """Answer questions about the game world using RAG"""

        # Search for relevant information
        results = self.retriever.contextual_search(question, k=5)

        # Build context from search results
        context_parts = []
        sources = []

        for i, result in enumerate(results):
            context_parts.append(f"[{i+1}] {result['text']}")
            sources.append(
                {
                    "index": i + 1,
                    "type": result["meta"]["type"],
                    "name": result["meta"]["name"],
                    "category": result["meta"]["category"],
                    "score": result["score"],
                }
            )

        context = "\n\n".join(context_parts)

        # Generate answer prompt (in real game, this would go to LLM)
        answer_prompt = f"""
問題：{question}

相關世界知識：
{context}

請根據上述世界知識回答問題，並在回答中標註引用來源 [1]、[2] 等。
如果知識不足以回答問題，請說明需要更多哪方面的信息。
"""

        return {
            "question": question,
            "context": context,
            "sources": sources,
            "answer_prompt": answer_prompt,
            "source_count": len(sources),
        }

    def get_location_summary(self, location: str) -> str:
        """Get a comprehensive summary of a location"""
        loc_info = self.retriever.get_location_info(location)

        summary_parts = []
        for category, results in loc_info.items():
            if results:
                summary_parts.append(f"{category}: {results[0]['text']}")

        return "\n".join(summary_parts)

    def find_quest_hooks(self, player_interests: List[str]) -> List[Dict]:
        """Find potential quest hooks based on player interests"""
        quest_hooks = []

        for interest in player_interests:
            results = self.retriever.contextual_search(f"任務 {interest}", k=3)
            for result in results:
                if result["meta"]["type"] == "event":
                    quest_hooks.append(
                        {
                            "hook": result["text"],
                            "interest": interest,
                            "score": result["score"],
                        }
                    )

        # Sort by score and remove duplicates
        quest_hooks.sort(key=lambda x: x["score"], reverse=True)
        seen = set()
        unique_hooks = []
        for hook in quest_hooks:
            hook_text = hook["hook"][:50]  # First 50 chars as identifier
            if hook_text not in seen:
                seen.add(hook_text)
                unique_hooks.append(hook)

        return unique_hooks[:5]  # Return top 5


# Test the query system
query_system = WorldQuerySystem(retriever)

print("=== Testing World Query System ===\n")

# Test world question answering
question = "艾莉亞有什麼特殊能力？"
answer_data = query_system.answer_world_question(question)
print(f"Question: {question}")
print(f"Found {answer_data['source_count']} relevant sources")
print("Sources:")
for source in answer_data["sources"][:2]:
    print(
        f"  [{source['index']}] {source['type']} - {source['name']} ({source['score']:.3f})"
    )

print()

# Test location summary
location = "銀月城"
summary = query_system.get_location_summary(location)
print(f"Location Summary for {location}:")
print(summary[:200] + "..." if len(summary) > 200 else summary)

print()

# Test quest hook finding
player_interests = ["魔法", "冒險"]
quest_hooks = query_system.find_quest_hooks(player_interests)
print(f"Quest hooks for interests {player_interests}:")
for i, hook in enumerate(quest_hooks[:2]):
    print(
        f"  {i+1}. {hook['hook'][:80]}... (interest: {hook['interest']}, score: {hook['score']:.3f})"
    )

print("\n✅ World KB system fully operational!")

In [None]:
# Cell 9: Performance and Usage Tips
print("=== World KB Performance Metrics ===")

import time
import psutil
import os


def measure_performance():
    """Measure key performance metrics"""

    # Memory usage
    process = psutil.Process(os.getpid())
    memory_mb = process.memory_info().rss / 1024 / 1024

    # Index size
    try:
        index_size_mb = os.path.getsize("indices/world_kb.faiss") / 1024 / 1024
        chunks_size_mb = os.path.getsize("indices/world_kb_chunks.jsonl") / 1024 / 1024
    except:
        index_size_mb = chunks_size_mb = 0

    # Search latency
    start_time = time.time()
    results = retriever.contextual_search("測試查詢", k=5)
    search_latency_ms = (time.time() - start_time) * 1000

    print(f"Memory usage: {memory_mb:.1f} MB")
    print(f"Index size: {index_size_mb:.2f} MB")
    print(f"Chunks size: {chunks_size_mb:.2f} MB")
    print(f"Search latency: {search_latency_ms:.1f} ms")
    print(f"Chunks in index: {len(retriever.kb_index.chunks)}")

    return {
        "memory_mb": memory_mb,
        "index_size_mb": index_size_mb,
        "search_latency_ms": search_latency_ms,
        "chunk_count": len(retriever.kb_index.chunks),
    }


metrics = measure_performance()

print("\n=== Usage Tips ===")
print(
    """
1. **擴展世界內容**：
   - 在 data/world_kb/ 添加更多 JSON 檔案
   - 使用 WorldKBProcessor 處理新內容
   - 重建索引以包含新內容

2. **優化檢索效果**：
   - 使用具體的查詢詞彙
   - 利用類型過濾 (filter_type) 縮小搜尋範圍
   - 調整 k 值平衡召回率和精確度

3. **記憶體優化**：
   - 目前記憶體使用：{:.1f} MB
   - 可使用較小的嵌入模型（如 bge-small-zh）
   - 考慮使用 FAISS IVF 索引處理大型世界

4. **遊戲整合**：
   - 將 retriever 整合到遊戲主迴圈
   - 快取常用查詢結果
   - 使用 filter_type 提高查詢效率

5. **內容管理**：
   - 定期備份索引檔案
   - 版本控制世界內容變更
   - 監控索引大小和查詢延遲
""".format(
        metrics["memory_mb"]
    )
)

print("\n🎮 Ready to power your text adventure game!")

In [None]:
# Cell 10: What We Built / Pitfalls / Next Steps
print("=== What We Built ===")
print(
    """
✅ 完整的遊戲世界知識庫系統：
   • Pydantic schema 定義角色、地點、物品、事件
   • 世界內容處理器，將結構化數據轉為可檢索片段
   • FAISS 向量索引，支援語義搜尋
   • 專門的檢索系統，支援類型過濾和分類查詢
   • 事件生成器，基於世界知識產生動態內容
   • 完整的查詢系統，支援問答和任務線索發現

✅ 核心功能：
   • 多類型內容索引（角色、地點、物品、事件）
   • 語義搜尋和精確檢索
   • 上下文感知的事件生成
   • 引用追蹤和來源標註
   • 效能監控和優化建議
"""
)

print("\n=== Pitfalls 避坑指南 ===")
print(
    """
⚠️  常見問題：
   • 索引檔案可能很大，確保有足夠儲存空間
   • 嵌入計算需要時間，大型世界建議分批處理
   • 查詢結果品質取決於原始內容的結構化程度
   • 中文分詞可能影響檢索效果，考慮使用專業中文嵌入模型

⚠️  效能注意事項：
   • 避免頻繁重建索引，使用增量更新
   • 快取常用查詢結果
   • 監控記憶體使用，必要時使用較小模型
   • 大型世界考慮使用 FAISS GPU 版本
"""
)

print("\n=== Next Steps 後續發展 ===")
print(
    """
🚀 立即可行：
   • 整合到 nb41 狀態機核心
   • 添加更豐富的世界內容
   • 實現增量索引更新機制

🚀 進階功能：
   • 多語言世界內容支援
   • 時間軸相關的動態世界狀態
   • 玩家行為對世界知識的影響
   • 更智慧的事件生成（整合 LLM）

🚀 系統整合：
   • 與 nb42 事件生成系統深度整合
   • 支援 nb45 存檔系統的世界狀態
   • 為 nb46 敘事風格提供一致性檢查
"""
)

print("\n=== Reproducibility 重現步驟 ===")
print(
    """
1. 確保環境變數 AI_CACHE_ROOT 已設置
2. 安裝依賴：sentence-transformers, faiss-cpu, opencc, pydantic
3. 運行所有 cells 按順序執行
4. 檢查 indices/ 資料夾下的索引檔案
5. 驗證 smoke test 全部通過

檔案輸出：
• indices/world_kb.faiss (向量索引)
• indices/world_kb_chunks.jsonl (文檔片段)
• data/world_kb/world_data.json (範例世界數據)
"""
)

print(
    "\n🎯 本 Notebook 為 Stage 5 的基石，為後續文字冒險遊戲提供了強大的世界知識檢索能力！"
)