## Prepare Data

In [None]:
import logging
import os

from llm.factory import LLMInterface
from llm.embedding import get_text_embedding
from setting.db import db_manager
from knowledge_graph.knowledge import KnowledgeBuilder
from knowledge_graph.graph_builder import KnowledgeGraphBuilder

llm_client = LLMInterface("ollama", "qwen3:32b-fp16")
session_factory = db_manager.get_session_factory(os.getenv("GRAPH_DATABASE_URI"))
kb_builder = KnowledgeBuilder(llm_client, get_text_embedding, session_factory)
graph_builder = KnowledgeGraphBuilder(llm_client, get_text_embedding, session_factory)

# Initialize logging module with a basic configuration for console output
logging.basicConfig(
    level=logging.INFO,
    format='[%(asctime)s] %(levelname)s - %(filename)s:%(lineno)d: %(message)s'
)
logger = logging.getLogger(__name__)

In [None]:
import json
import os
import hashlib

categories = [
    'tidbcloud/API/API Overview',
    'tidbcloud/About TiDB Cloud',
]

# Define the path to the JSON configuration file
config_file_path = '/Users/ian/Work/docs/toc_files_for_tidb_cloud.json'

# Variable to store the loaded data
loaded_docs = []

# Read the JSON configuration file
try:
    with open(config_file_path, 'r', encoding='utf-8') as f:
        loaded_docs = json.load(f)
    print(f"Successfully loaded configuration from: {config_file_path}")
except FileNotFoundError:
    print(f"Error: Configuration file not found at '{config_file_path}'")
except json.JSONDecodeError:
    print(f"Error: Could not decode JSON from file '{config_file_path}'. Check file format.")
except Exception as e:
    print(f"An unexpected error occurred while reading the file: {e}")

if len(loaded_docs) > 0:
    print("\nExample: Accessing first document data:")
    print(loaded_docs[0])
else:
    print("\nConfiguration file is empty.")


tidb_product_docs = {}
for category in categories:
    topic_name = "TiDBCloud Product Documentation - " + category
    tidb_product_docs[topic_name] = []
    topic_docs = set()
    for doc in loaded_docs:
        if category == doc['category']:
            topic_id = f"{category}-{doc['web_view_link']}"
            if topic_id in topic_docs:
                continue
            topic_docs.add(topic_id)
            tidb_product_docs[topic_name].append({
                'topic_name': topic_name,
                'path': doc['path'],  # required
                'doc_link': doc['web_view_link'], # required
                'category': category,
                'updated_at': doc['modified_time'],
                'mime_type': doc['mime_type'],
                'version': "2025-07-07"
            })
    print(f"Topic: {topic_name}, Number of documents: {len(tidb_product_docs[topic_name])}")

## Restful API Example

In [None]:
topic_name = "TiDB Product Documentation - tidb/Get Started"
topic_docs = tidb_product_docs[topic_name]


### Upload documents by topic. 

The same document can be uploaded to different topics repeatedly, and the backend will automatically handle deduplication.

In [None]:
import requests

url = "http://192.168.206.252:23333/api/v1/knowledge/upload"

files = []
links = []
for doc in topic_docs:
    files.append(('files', (doc["path"].split('/')[-1], open(doc["path"], 'rb'), 'application/pdf')))
    links.append(doc["doc_link"])

data = {
    'links': links,
    'topic_name': topic_name,
    'database_uri': os.getenv("GRAPH_DATABASE_URI")
}
response = requests.post(url, files=files, data=data)

print(response.status_code)
print(response.json())


### Build Graph

After documents are uploaded to the same topic, a build of the corresponding graph can be triggered.

In [None]:
import requests

database_uri = os.getenv("GRAPH_DATABASE_URI")

# Call the trigger-processing API to start processing uploaded all documents for a topic
url = "http://192.168.206.252:23333/api/v1/knowledge/trigger-processing"
data = {
    "topic_name": topic_name,
    "database_uri": database_uri
}

response = requests.post(url, data=data)
print(response.status_code)
print(response.json())

## Query Graph

### Vector Similarity based Search

In [None]:
from knowledge_graph.query import search_relationships_by_vector_similarity, query_topic_graph

query = "Where are li ming now?"
res = search_relationships_by_vector_similarity(query, similarity_threshold=0.2, top_k=20)
context = ""
entities = set()
relationships = []

for index, row in res.iterrows():
    entities.add(f"{row['source_entity']} {row['source_entity_description']}")
    entities.add(f"{row['target_entity']} {row['target_entity_description']}")
    relationships.append(f"{row['source_entity']} {row['relationship_desc']} {row['target_entity']}")

context = "Entities:\n" + "\n".join(entities) + "\n\nRelationships:\n" + "\n".join(relationships)

print(context)

In [None]:
from llm.factory import LLMInterface

llm_client = LLMInterface("bedrock", "us.anthropic.claude-3-7-sonnet-20250219-v1:0")
response =llm_client.generate(f"""Given the following context
<context>
{context}
</context>
answer the question: {query}
""")
print(response)