In [4]:
from setting.db import SessionLocal

from graph.graph_knowledge_base import GraphKnowledgeBase

gkb = GraphKnowledgeBase("entities_150001", "relationships_150001", "chunks_150001")
session = SessionLocal()

In [5]:
query = "what is the latest version of tidb"
model_kwargs = {
    "num_ctx": 8092,
    "num_gpu": 40,
    "num_predict": 10000,
    "temperature": 0.1,
}

In [6]:
from llm_inference.base import LLMInterface
from graph.query_analyzer import DeepUnderstandingAnalyzer

llm_client = LLMInterface("ollama", "deepseek-r1:14b")
analyzer = DeepUnderstandingAnalyzer(llm_client)
analysis_res = analyzer.perform(query, **model_kwargs)
print(analysis_res)

Analysis Result:
Reasoning: The user is seeking knowledge about the latest version of TiDB to stay updated with advancements, improvements, and compatibility in their technical context. They likely aim to optimize their workflow by using the most recent features or resolving issues tied to prior versions.
Intent:
    Action: Seek Information
    Target: Latest Version Details of TiDB
    Context: Database Technology
Initial Queries:
    latest version of tidb release notes
    what's new in latest tiDB version
    tiDB latest version features


In [7]:
from graph.knowledge_synthesizer import KnowledgeSynthesizer, SearchAction

synthesizer = KnowledgeSynthesizer(llm_client)

In [8]:
action_history = []
current_findings = []
docs = {}

next_actions = [SearchAction(
    tool="retrieve_knowledge",
    query=a
) for a in analysis_res.initial_queries]

reasoning = analysis_res.reasoning
queries = analysis_res.initial_queries

In [9]:
import datetime

# Fact 1: 
current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
prompt = f"Current Time: {current_time}.\n\nThe question: {query}.\n\nThe Reasoning: {reasoning}.\n\nSo the answer is:"

answer_1 = llm_client.generate(prompt, **model_kwargs)
answer_1

"<think>\nAlright, so I need to find out what the latest version of TiDB is as of now, which is January 2025. I'm not exactly sure about all the details, but I know TiDB is a distributed database that's designed for both analytics and transactional workloads. It's an open-source project, so updates are pretty frequent.\n\nFirst, I remember that TiDB releases versions periodically, usually major or minor updates. Major versions might change how things work fundamentally, while minor versions add features or fix bugs without changing the core too much. So, the latest version as of 2025 would probably be something like v7.x or higher.\n\nI think the last time I checked, TiDB was around version 6.x. But since it's 2025, there might have been a couple more releases. Maybe they've released version 7 already. I should check if TiDB has any official release notes or a changelog on their website.\n\nAlternatively, I can think about the timeline. If each major version comes out every year or so,

In [10]:

knowledge_retrieved = {}
for action in next_actions:
    if action.tool == 'retrieve_knowledge':
        data = gkb.retrieve_graph_data(session, action.query)
    elif action.tool == 'retrieve_neighbors':
        data = gkb.retrieve_neighbors(session, action.entity_ids, action.query)
    knowledge_retrieved[str(action)] = data
    action_history.append(action)

knowledge_retrieved

query relationships use 3.0731258392333984
query relationships use 0.7830181121826172
query relationships use 0.7322230339050293


{'SearchAction(tool=retrieve_knowledge, query=latest version of tidb release notes, entity_ids=None)': {'chunks': [{'id': 'c989d0092a964f1a861ba640ef6c79a6',
    'text': '---\ntitle: Release Notes\nsummary: TiDB has released multiple versions, including 8.1.0, 8.0.0-DMR, 7.6.0-DMR, 7.5.1, 7.5.0, 7.4.0-DMR, 7.3.0-DMR, 7.2.0-DMR, 7.1.4, 7.1.3, 7.1.2, 7.1.1, 7.1.0, 7.0.0-DMR, 6.6.0-DMR, 6.5.9, 6.5.8, 6.5.7, 6.5.6, 6.5.5, 6.5.4, 6.5.3, 6.5.2, 6.5.1, 6.5.0, 6.4.0-DMR, 6.3.0-DMR, 6.2.0-DMR, 6.1.7, 6.1.6, 6.1.5, 6.1.4, 6.1.3, 6.1.2, 6.1.1, 6.1.0, 6.0.0-DMR, 5.4.3, 5.4.2, 5.4.1, 5.4.0, 5.3.4, 5.3.3, 5.3.2, 5.3.1, 5.3.0, 5.2.4, 5.2.3, 5.2.2, 5.2.1, 5.2.0, 5.1.5, 5.1.4, 5.1.3, 5.1.2, 5.1.1, 5.1.0, 5.0.6, 5.0.5, 5.0.4, 5.0.3, 5.0.2, 5.0.1, 5.0.0, 5.0.0-rc, 4.0.16, 4.0.15, 4.0.14, 4.0.13, 4.0.12, 4.0.11, 4.0.10, 4.0.9, 4.0.8, 4.0.7, 4.0.6, 4.0.5, 4.0.4, 4.0.3, 4.0.2, 4.0.1, 4.0.0, 4.0.0-rc.2, 4.0.0-rc.1, 4.0.0-rc, 4.0.0-beta.2, 4.0.0-beta.1, 4.0.0-beta, 3.1.2, 3.1.1, 3.1.0, 3.1.0-rc, 3.1.0-beta.2,

In [11]:
current_findings = synthesizer.extract_findings(reasoning, query, knowledge_retrieved, current_findings)
current_findings

[Finding(content='TiDB is a distributed database that supports horizontal scalability and is built for cloud-native applications.', confidence_score=0.95, is_novel=False, is_supplementary=True, reasoning="Both documents explicitly mention TiDB's support for horizontal scalability and its cloud-native architecture. This consistent information across two sources increases confidence in the finding.", source_quotes=[{'doc_link': 'doc_link_1', 'chunk_ids': ['chunk_id_1'], 'relationship_ids': ['relationship_id_1']}, {'doc_link': 'doc_link_2', 'chunk_ids': ['chunk_id_1'], 'relationship_ids': ['relationship_id_1']}]),
 Finding(content='The `KILL TIDB` command can be used to terminate TiDB server processes, and this feature is explicitly documented in both search results.', confidence_score=0.98, is_novel=False, is_supplementary=True, reasoning='Both documents confirm the existence and purpose of the `KILL TIDB` command, making this a highly reliable finding.', source_quotes=[{'doc_link': 'doc

In [None]:
for finding in current_findings:
    for quote in finding.source_quotes:
        refer_chunks = {}
        refer_relationships = {}
        doc_link = quote.get('doc_link')
        if doc_link is None:
            print(f"doc_link is None for quote: {quote}")
        
        for chunk_id in quote.get('chunk_ids', []):
            is_found = False
            for action, data in knowledge_retrieved.items():
                for ck in data.get('chunks', []):
                    if ck.get('id') == chunk_id:
                        refer_chunks[chunk_id] = ck
                        is_found = True
                        break
                if is_found:
                    break

        for relationship_id in quote.get('relationship_ids', []):
            is_found = False
            for action, data in knowledge_retrieved.items():
                for rel in data.get('relationships', []):
                    if rel.get('id') == relationship_id:
                        refer_relationships[relationship_id] = rel
                        is_found = True
                        break
                if is_found:
                    break

        if doc_link not in docs:
            docs[doc_link] = {
                'refer_chunks': refer_chunks,
                'refer_relationships': refer_relationships,
            }
        else:
            docs[doc_link]['refer_chunks'].update(refer_chunks)
            docs[doc_link]['refer_relationships'].update(refer_relationships)

docs

In [None]:
eval_res = synthesizer.evaluate_findings(query, reasoning, current_findings, docs, action_history)
eval_res

In [None]:
next_actions = []
if eval_res.is_sufficient is False:
    print(f"Not sufficient, re-synthesizing...\n    Reasoning: {eval_res.reasoning}\n    missing_aspects: {eval_res.missing_aspects}")
    for action in eval_res.next_actions:
        next_actions.append(action)
next_actions