In [1]:
import dotenv
import os
import json
import numpy as np
from openai import OpenAI
from typing import List, Tuple, Dict

from backend.db import VectorStore, Neo4jGraphDB

dotenv.load_dotenv()

True

Assuming input is

```
{
    "chunk_id": "xxx",
    "text": "...",
    "summary": "...",
}
```

In [2]:
# Examples
doc_texts = {
    "chunk_id": 0,
    "text": "Gradient Descent is used to optimize neural networks...",
    "summary": "Gradient Descent and Neural Networks"
}

In [3]:
# Initialize vector db and graph db, would have been initialized before starting the application
vs = VectorStore()
graphdb = Neo4jGraphDB(uri=os.getenv("NEO4J_URI"), user=os.getenv("NEO4J_USERNAME"), password=os.getenv("NEO4J_PASSWORD"),
                       db_name=os.getenv("NEO4J_DATABASE"))

[VectorStore] Loaded existing store with 1 chunks.


In [4]:
chunk_id = vs.add(doc_texts["text"], doc_texts["summary"])
doc_texts["chunk_id"] = chunk_id

In [5]:
client = OpenAI(
  base_url="https://openrouter.ai/api/v1",
  api_key=os.getenv("OPENROUTER_API_KEY"),
)

In [6]:
def call_llm(prompt, text):
    completion = client.chat.completions.create(
        model="openai/gpt-oss-20b:free",
        messages=[
            {
                "role": "user",
                "content": f"{prompt}\n\n {text}"
            }
        ]
    )
    
    # Assuming json output
    print(completion.choices[0].message.content)
    result = json.loads(completion.choices[0].message.content)
    return result

In [7]:
prompt = """
Extract key entities (concepts, methods, objects)
from the text below. Output JSON array of strings only.
"""

In [8]:
entities = call_llm(prompt=prompt, text=doc_texts["text"])
entities

["Gradient Descent", "neural networks"]


['Gradient Descent', 'neural networks']

In [9]:
relation_prompt = """
Given the entities and the text, extract directed relations.
Return list of relations in format:
{ "source": "...", "relation": "...", "target": "..." }
"""

In [10]:
relations = call_llm(prompt=relation_prompt, text=f"Entities: {entities}\n\n Text: {doc_texts['text']}")

[{"source":"Gradient Descent","relation":"used to optimize","target":"neural networks"}]


In [11]:
mapping = {}
for ent in entities:
    matches = graphdb.search_nodes(ent)
    if len(matches) > 0:
        mapping[ent] = matches[0]
    else:
        mapping[ent] = ent

canon_entities = list(set(mapping.values()))

In [12]:
canon_entities

['Gradient Descent', 'neural networks']

In [13]:
for node in canon_entities:
    if not graphdb.node_exists(node):
        print("NEW NODE")
        graphdb.create_node(name=node, node_type="concept", chunk_ids=[doc_texts["chunk_id"]])
    else:
        print("OLD NODE")
        graphdb.add_chunks_to_node(name=node, chunk_ids=[doc_texts["chunk_id"]])

OLD NODE
OLD NODE


In [14]:
for rel in relations:
    print(
    graphdb.create_relation_safe(
        mapping[rel["source"]],
        rel["relation"],
        mapping[rel["target"]]
    )
    )

False


In [15]:
vs.update(doc_texts["chunk_id"], canon_entities)

'New nodes assigned'