In [1]:
from setting.db import SessionLocal
from llm_inference.base import LLMInterface
from graph.graph_knowledge_base import GraphKnowledgeBase, SearchAction

llm_client = LLMInterface("ollama", "deepseek-qwen-32b-q5")

gkb = GraphKnowledgeBase(llm_client, "entities_150001", "relationships_150001", "chunks_150001")
session = SessionLocal()

In [2]:
query = "在 TiDB 中, 如果某个节点发生故障 (down机), 并且该节点的实例一直存在, 那么在故障节点的实例副本全部迁移完成后, down-peer 的数量会减少吗？请详细说明 TiDB 的副本迁移机制和 down-peer 数量变化的过程。"
model_kwargs = {
    "options": {
        "num_ctx": 8092,
        "num_gpu": 80,
        "num_predict": 10000,
        "temperature": 0.1,
    }
}

In [3]:
gkb.retrieve_documents(session, "TiDB fault tolerance behavior during node failure")

Invalid result format {'chunk_id': '19f4559cffba473bb38f0fd8f696a7ed', 'is_levant': True, 'confidence': 0.85, 'reasoning': 'Discusses troubleshooting node failures, including checking if services are running and logs which relate directly to fault tolerance during node failure.'}, 'is_relevant'
Invalid result format {'chunk_id': '1d01cd924a7042ce8afdc2a168e5d786', 'is_levant': False, 'confidence': 0.3, 'reasoning': 'Focuses on performance metrics and resource utilization, not directly related to fault tolerance during node failure.'}, 'is_relevant'
Invalid result format {'chunk_id': '182bdc9c3b974bf9bc773284712e891d', 'is_levant': False, 'confidence': 0.2, 'reasoning': 'Talks about metadata locks and DDL operations, unrelated to node failure handling.'}, 'is_relevant'
Invalid result format {'chunk_id': '8d791b565b3443f58c70e7817d43e9de', 'is_levant': True, 'confidence': 0.7, 'reasoning': 'Provides information on handling node failures during live migrations by cordoning nodes and evacu

Filter response: [{'chunk_id': '19f4559cffba473bb38f0fd8f696a7ed', 'is_levant': True, 'confidence': 0.85, 'reasoning': 'Discusses troubleshooting node failures, including checking if services are running and logs which relate directly to fault tolerance during node failure.'}, {'chunk_id': '1d01cd924a7042ce8afdc2a168e5d786', 'is_levant': False, 'confidence': 0.3, 'reasoning': 'Focuses on performance metrics and resource utilization, not directly related to fault tolerance during node failure.'}, {'chunk_id': '182bdc9c3b974bf9bc773284712e891d', 'is_levant': False, 'confidence': 0.2, 'reasoning': 'Talks about metadata locks and DDL operations, unrelated to node failure handling.'}, {'chunk_id': '8d791b565b3443f58c70e7817d43e9de', 'is_levant': True, 'confidence': 0.7, 'reasoning': 'Provides information on handling node failures during live migrations by cordoning nodes and evacuating leaders.'}, {'chunk_id': '620d42b3182740818072a97c8c80c5b0', 'is_levant': False, 'confidence': 0.1, 'reaso

Invalid result format {'chunk_id': '7b3f69aaaa3f46a686b42937c5b159b3', 'is_levant': False, 'confidence': 0.15, 'reasoning': "The chunk discusses PD election and scheduling, which is not directly related to TiDB's fault tolerance behavior during node failure."}, 'is_relevant'
Invalid result format {'chunk_id': 'a9363ee0816844de8ae6d82d67cd5429', 'is_levant': False, 'confidence': 0.1, 'reasoning': 'The chunk contains release notes about TiDB and TiKV fixes, which do not specifically address fault tolerance during node failure.'}, 'is_relevant'


Filter response: [{'chunk_id': 'b4f5fbf21c974947abbf11d021023350', 'is_relevant': True, 'confidence': 0.95, 'reasoning': "The chunk discusses TiDB's fault tolerance during node failure, specifically in the context of pessimistic transactions and pipelined locking process. It mentions scenarios where network partition or TiKV node failures affect lock acquisition and transaction commits."}, {'chunk_id': '7d9ec6766af442c4a9caf13707ad492a', 'is_relevant': False, 'confidence': 0.25, 'reasoning': 'The chunk is about migrating data between TiDB clusters, which does not directly address fault tolerance during node failure.'}, {'chunk_id': '6feea30baa6e438abf97f0900d59687a', 'is_relevant': False, 'confidence': 0.2, 'reasoning': 'The chunk focuses on DDL issues in TiDB, which are unrelated to fault tolerance during node failure.'}, {'chunk_id': '7b3f69aaaa3f46a686b42937c5b159b3', 'is_levant': False, 'confidence': 0.15, 'reasoning': "The chunk discusses PD election and scheduling, which is not d

Error parsing filter response as input
</think>

Here's a step-by-step analysis of each chunk based on the query "TiDB fault tolerance behavior during node failure":

1. **Chunk 010eb781918d41628d55129509854051**:
   - This chunk discusses features in TiDB v7.1.0, including a checkpoint mechanism for Fast Online DDL that improves fault tolerance and recovery.
   - The feature directly addresses fault tolerance during node failures by enabling recovery from checkpoints.
   - **Relevance**: High.

2. **Chunk 77ff5fd173524bd1900639c41dc31426**:
   - It describes a bug fix in TiKV related to handling PD leader switches or restarts, which caused SQL execution errors.
   - The issue was resolved by fixing TiKV's heartbeat mechanism, improving stability during node failures.
   - **Relevance**: High.

3. **Chunk 5c73504ed11748b9bcc87c462986ffd2**:
   - Lists bug fixes in TiDB v6.5.4, including issues like heartbeat storms and PITR getting stuck due to network interruptions.
   - These improve

KeyboardInterrupt: 

In [None]:
from graph.query_analyzer import DeepUnderstandingAnalyzer

analyzer = DeepUnderstandingAnalyzer(llm_client)
analysis_res = analyzer.perform(query)
print(analysis_res)

In [4]:
action_history = []
current_findings = []
docs = {}

next_actions = [SearchAction(
    tool="retrieve_documents",
    query=a
) for a in analysis_res.initial_queries]

reasoning = analysis_res.reasoning
queries = analysis_res.initial_queries

In [None]:

knowledge_retrieved = {}
for action in next_actions:
    print(action)
    if action.tool == 'retrieve_knowledge':
        data = gkb.retrieve_graph_data(session, action.query)
    elif action.tool == 'retrieve_neighbors':
        data = gkb.retrieve_neighbors(session, action.entity_ids, action.query)
    elif action.tool == 'retrieve_documents':
        data = gkb.retrieve_documents(session, action.query, 10)
    else:
        raise ValueError(f"Invalid tool: {action.tool}")

    for doc_id, doc in data.documents.items():
        if doc_id not in knowledge_retrieved:
            knowledge_retrieved[doc_id] = doc
        
        for chunk_id, chunk in doc.chunks.items():
            if chunk_id not in knowledge_retrieved[doc_id].chunks:
                knowledge_retrieved[doc_id].chunks[chunk_id] = chunk
                continue

            existing_chunk = knowledge_retrieved[doc_id].chunks[chunk_id]
            rel_dict = {r['id']: r for r in existing_chunk.relationships}
            for relationship in chunk.relationships:
                rel_id = relationship.id
                if rel_id in rel_dict:
                    rel_dict[rel_id]['similarity_score'] = max(
                        rel_dict[rel_id]['similarity_score'],
                        relationship.similarity_score
                    )
                else:
                    rel_dict[rel_id] = relationship.to_dict()

            knowledge_retrieved[doc_id].chunks[chunk_id].relationships = list(rel_dict.values())

action_history.append(action)

knowledge_retrieved

In [None]:
from graph.knowledge_synthesizer import KnowledgeSynthesizer

synthesizer = KnowledgeSynthesizer(llm_client)
result = synthesizer.iterative_answer_synthesis(
    query=query,
    documents=knowledge_retrieved,
    reasoning=reasoning
)

# Access the results
final_answer = result["final_answer"]
evolution = result["evolution_history"]

In [None]:
final_answer