In [None]:
import nest_asyncio
nest_asyncio.apply()

from llama_index.core import SimpleDirectoryReader, KnowledgeGraphIndex, StorageContext, PropertyGraphIndex
from llama_index.core.graph_stores import SimpleGraphStore

from llama_index.llms.openai import OpenAI
from llama_index.core import Settings
from IPython.display import Markdown, display

from llama_index.readers.docling import DoclingReader
from llama_index.core.node_parser import MarkdownNodeParser
from llama_index.core.node_parser import SentenceWindowNodeParser

import dotenv
dotenv.load_dotenv()

from llama_index.core.schema import TextNode

from llama_index.core.indices.property_graph import (
    SimpleLLMPathExtractor,
    SchemaLLMPathExtractor,
    DynamicLLMPathExtractor,
)

from llama_index.core.graph_stores import SimplePropertyGraphStore
from llama_index.core.graph_stores.types import LabelledPropertyGraph, EntityNode, Relation, ChunkNode


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# sentence1 = TextNode(text="the company Applied Materials (AMAT) has 100 apples")
# sentence2 = TextNode(text="the company Applied Materials (AMAT) has 10 pears")
# nodes = [sentence1, sentence2]

sentence1 = EntityNode(label="TEXT", name="the company Applied Materials (AMAT) has 100 apples")
sentence2 = EntityNode(label="TEXT", name="the company Applied Materials (AMAT) has 10 pears")

company = EntityNode(label="COMPANY", name="Applied Materials")
apple = EntityNode(label="VALUE", name="number of apples")
pear = EntityNode(label="VALUE", name="number of pears")
banana = EntityNode(label="VALUE", name="number of bananas")
apple_pear = EntityNode(label="VALUE", name="number of apples - number of pears")
entities = [company, apple, pear, banana, apple_pear, sentence1, sentence2]

relations_kpi = [
    Relation(label="HAS_PROPERTY", source_id=company.id, target_id=apple.id), 
    Relation(label="HAS_PROPERTY", source_id=company.id, target_id=pear.id), 
    Relation(label="HAS_PROPERTY", source_id=company.id, target_id=banana.id), 
    Relation(label="EQUALS", source_id=banana.id, target_id=apple_pear.id), 
    Relation(label="INCREASE_WITH", source_id=apple_pear.id, target_id=apple.id), 
    Relation(label="DECREASE_WITH", source_id=apple_pear.id, target_id=pear.id),
    Relation(label="MENTIONED_IN", source_id=apple.id, target_id=sentence1.id),
    Relation(label="MENTIONED_IN", source_id=pear.id, target_id=sentence2.id),
]

# relations_text=[
#     Relation(label="MENTIONS", source_id=sentence1.node_id, target_id=apple.id), 
#     Relation(label="MENTIONS", source_id=sentence2.node_id, target_id=pear.id),
# ]

kgraph = SimplePropertyGraphStore()
kgraph.upsert_nodes(entities)
kgraph.upsert_relations(relations_kpi)
# kgraph.upsert_llama_nodes(nodes)
# kgraph.upsert_relations(relations_text)

index = PropertyGraphIndex.from_existing(property_graph_store=kgraph)

In [3]:
retriever = index.as_retriever(
    include_text=True,  # include source chunk with matching paths
    similarity_top_k=2,  # top k for vector kg node retrieval
    path_depth=5,
    limit=4,
)

nodes = retriever.retrieve("number of bananas?")

for node in nodes:
    print(node)


Node ID: a095fa88-4ce4-4c3c-a512-56d602d41ef4
Text: number of apples -> MENTIONED_IN -> the company Applied
Materials (AMAT) has 100 apples
Score:  0.000

Node ID: f5e55f88-5c22-4a18-9d42-15b518a2af7a
Text: Applied Materials -> HAS_PROPERTY -> number of pears
Score:  0.000

Node ID: 9b5397c1-1299-4ced-be03-1b1545926788
Text: Applied Materials -> HAS_PROPERTY -> number of bananas
Score:  0.000

Node ID: 71c2eab9-786f-45e5-a35d-66d02bcdad33
Text: Applied Materials -> HAS_PROPERTY -> number of apples
Score:  0.000



In [4]:
query_engine = index.as_query_engine(
    llm=OpenAI(model="gpt-4o-mini", temperature=0),
    include_text=False, 
    response_mode="tree_summarize",
    path_depth=3,
)

response = query_engine.query(
    "Using only the information in your graph, suppose the company receives 4 pears, by is the stock of bananas affected?"
)

print(response.get_formatted_sources())
print(response.response)


> Source (Doc id: ba7a9024-b821-44d7-a7f7-741c59724f2d): number of apples -> MENTIONED_IN -> the company Applied Materials (AMAT) has 100 apples

> Source (Doc id: 39ee1dcc-e752-4764-879c-ef22732717a8): Applied Materials -> HAS_PROPERTY -> number of pears

> Source (Doc id: b1d6c42d-d07a-4ef4-afba-8d53369a0de0): Applied Materials -> HAS_PROPERTY -> number of bananas

> Source (Doc id: 9bad6f51-6146-44bf-bac9-cf36f0750c94): Applied Materials -> HAS_PROPERTY -> number of apples

> Source (Doc id: b3afbfe1-4815-42bd-8183-9d6b19402b0c): number of bananas -> EQUALS -> number of apples - number of pears

> Source (Doc id: f3e659f1-5ed6-447b-90c0-dd598122251a): number of pears -> MENTIONED_IN -> the company Applied Materials (AMAT) has 10 pears

> Source (Doc id: 9de57155-dc9e-4e8a-9ec0-37a29964489b): number of apples - number of pears -> INCREASE_WITH -> number of apples

> Source (Doc id: a2d51a43-73c6-48d5-ba33-cad922707ed1): number of apples - number of pears -> DECREASE_WITH -> number of

In [3]:
import nest_asyncio
nest_asyncio.apply()

from llama_index.core import PropertyGraphIndex
from llama_index.llms.openai import OpenAI
from llama_index.core.indices.property_graph import SchemaLLMPathExtractor
from typing import Literal
from llama_index.core.schema import TextNode
from llama_index.core.prompts import PromptTemplate

# best practice to use upper-case
entities = Literal["NUMBER", "SENTENCE"]
relations = Literal[
    "ALIAS_OF",
    "EQUALS", "INCREASES_WITH", "DECREASES_WITH",
    "MENTIONED_IN", "MENTIONS"
]

# define which entities can have which relations
validation_schema = {
    "NUMBER": [
        "ALIAS_OF", "MENTIONED_IN",
        "EQUALS", "INCREASES_WITH", "DECREASES_WITH",
    ],
    "SENTENCE": [],
}

num_apples = 12
num_pears = 4.3
kpi_relations=f"""\
(number of bananas) equals (number of apples minus number of pears)
(number of apples minus number of pears) increases with (number of apples)
(number of apples minus number of pears) decreases with (number of pears)
(number of apricots) is alias of (number of bananas)
(number of apples) is alias of (apples) 
(number of pears) is alias of (pears)
"""

parsed_relations = [rel.strip() for rel in kpi_relations.split('\n') if rel.strip()]

nodes = [TextNode(text=rel) for rel in parsed_relations]

extraction_prompt= PromptTemplate(
    "Extract the knowledge graph from the following text according to the provided schema. "
    "The text is structured as: (node 1) relation (node 2). "
    "Return at most {max_triplets_per_chunk} extracted paths.\n"
    "-------\n"
    "{text}\n"
    "-------\n"
)


# Set up the LLM and parser
llm = OpenAI(model="gpt-4o-mini", temperature=0)

kg_extractor = SchemaLLMPathExtractor(
    llm=llm,
    extract_prompt=extraction_prompt,
    possible_entities=entities,
    possible_relations=relations,
    kg_validation_schema=validation_schema,
    max_triplets_per_chunk=1,
    num_workers=10,
    # if false, allows for values outside of the schema
    # useful for using the schema as a suggestion
    strict=True,
)

# Build the Knowledge Graph Index
kg_index = PropertyGraphIndex(
    nodes=nodes,
    kg_extractors=[kg_extractor],
    embed_kg_nodes=True,
    show_progress=True,
)

Extracting paths from text with schema: 100%|██████████| 6/6 [00:01<00:00,  3.98it/s]
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00,  1.20it/s]
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00,  1.03it/s]


In [4]:
kg_index.property_graph_store.show_jupyter_graph()

GraphWidget(layout=Layout(height='630px', width='100%'))

In [3]:
query_engine = kg_index.as_query_engine(
    include_text=False,
    path_depth=5,

)
response = query_engine.query("how many apricots does the company have?")

print(response)

[print(node) for node in response.source_nodes]

The company has the same number of apricots as the number of bananas.
Node ID: 28cda70d-96bb-4dd3-b384-800bd05228a2
Text: number of apricots -> ALIAS_OF -> number of bananas
Score:  0.853

Node ID: 72de6a96-0000-4ef3-bf9c-c93cea3f5023
Text: number of apples minus number of pears -> INCREASES_WITH ->
number of apples
Score:  0.800

Node ID: 1b74e4e1-3f7a-4ed3-b0e9-b07bbdc767f3
Text: number of apples -> ALIAS_OF -> apples
Score:  0.800

Node ID: 2c83b21e-cbae-4775-a131-6783dfeb3a3f
Text: number of apples minus number of pears -> DECREASES_WITH ->
number of pears
Score:  0.789

Node ID: 5b703ab2-4cc0-4d42-bf9c-09fab37ae2c4
Text: number of bananas -> EQUALS -> number of apples minus number of
pears
Score:  0.789

Node ID: 1e610846-806c-40c8-beb5-dd52a70fd934
Text: number of pears -> ALIAS_OF -> pears
Score:  0.781



[None, None, None, None, None, None]

In [9]:
retriever = kg_index.as_retriever(
    include_text=False,
    path_depth=2,
)
nodes = retriever.retrieve("if I have 12 apples and 5.5 pears, how many apricots do I have?")

[print(node.text+"\n") for node in nodes]

number of bananas -> EQUALS -> number of apples minus number of pears

number of apples minus number of pears -> DECREASES_WITH -> number of pears

number of apples minus number of pears -> INCREASES_WITH -> number of apples

number of apricots -> ALIAS_OF -> number of bananas

number of apples -> ALIAS_OF -> apples

number of pears -> ALIAS_OF -> pears



[None, None, None, None, None, None]

In [8]:
# for item in kg_index.property_graph_store.graph:
#     print(item)
nodes,_,_ = kg_index.property_graph_store.graph

for node in nodes:
    print(nodes)

('nodes', {'3ee1436b-0cdc-41a5-9fb9-f4a78e3d4251': ChunkNode(label='text_chunk', embedding=[0.019477687776088715, -0.012532304041087627, 0.01931186579167843, 0.0030661094933748245, 0.009853643365204334, -0.0003958198940381408, -0.02523042820394039, -0.01996239647269249, 0.004132790025323629, -0.02699069119989872, -0.007461982313543558, 0.02130172774195671, -0.030179573222994804, 0.003149020252749324, -0.009687821380794048, 0.020995594561100006, 0.008329357951879501, -0.005724042188376188, 0.0018639011541381478, -0.011039907112717628, -0.00897351186722517, 0.02385283261537552, 0.012806547805666924, -0.013992811553180218, -0.014235165901482105, -0.005912186112254858, 0.02090630680322647, -0.016531160101294518, 0.01802355796098709, 0.01571480743587017, 0.03020508401095867, -0.010427641682326794, -0.002911448711529374, -0.01511529739946127, -0.022832389920949936, -0.006036552134901285, 0.003925513010472059, -0.0048311552964150906, 0.040307458490133286, -0.020804261788725853, 0.013712190091

In [28]:
text = TextNode(id="text_node", text="Supper comes after dinner.")

chunk = ChunkNode(
    label='text_chunk', 
    embedding=None,
    id_="chunk_node", 
    text=text.text,
    properties={
        'header_path': '/', 
        '_node_content': '{"id_": "chunk_node", "embedding": null, "metadata": {"header_path": "/"}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "text_node", "node_type": "4", "metadata": {}, "hash": "777c87bce9b25ceadef48f1501c6ab52103f15f96ba24cbf5c0f2fb2ecb2ae40", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\\n", "text": "", "mimetype": "text/plain", "start_char_idx": 0, "end_char_idx": 25, "metadata_seperator": "\\n", "text_template": "{metadata_str}\\n\\n{content}", "class_name": "TextNode"}', 
        '_node_type': 'TextNode', 
        'document_id': 'text_node', 
        'doc_id': 'text_node', 
        'ref_doc_id': 'text_node'
    },
)

supper = EntityNode(label='entity', embedding=None, properties={'header_path': '/', 'triplet_source_id': 'chunk_node'}, name='supper')
dinner = EntityNode(label='entity', embedding=None, properties={'header_path': '/', 'triplet_source_id': 'chunk_node'}, name='dinner')

sentence = EntityNode(label='entity', embedding=None, properties={}, name='text_node')

relation1 = Relation(
    label='Comes after', source_id='supper', target_id='dinner', 
    properties={'header_path': '/', 'triplet_source_id': 'chunk_node'}
)
relation2 = Relation(
    label='SOURCE', source_id='chunk_node', target_id='text_node', 
    properties={'header_path': '/', 'triplet_source_id': 'chunk_node'}
)


g = SimplePropertyGraphStore()
g.upsert_nodes([chunk, supper, dinner, sentence])
g.upsert_relations([relation1, relation2])

In [29]:

index = PropertyGraphIndex.from_existing(property_graph_store=g)

query_engine = index.as_query_engine()
response = query_engine.query("What comes after supper?")

response.source_nodes

[NodeWithScore(node=TextNode(id_='chunk_node', embedding=None, metadata={'header_path': '/'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='text_node', node_type='4', metadata={}, hash='777c87bce9b25ceadef48f1501c6ab52103f15f96ba24cbf5c0f2fb2ecb2ae40')}, metadata_template='{key}: {value}', metadata_separator='\n', text='Here are some facts extracted from the provided text:\n\nsupper -> Comes after -> dinner\n\nSupper comes after dinner.', mimetype='text/plain', start_char_idx=0, end_char_idx=25, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'), score=0.0)]