In [6]:
import os

os.environ['OPENAI_API_KEY']=''

In [1]:
from dotenv import load_dotenv
load_dotenv(override=True)

True

In [2]:
import os
NEO4J_USERNAME = os.getenv('NEO4J_USERNAME')
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
NEO4J_URL = os.getenv('NEO4J_URL')
NEO4J_DATABASE = os.getenv('NEO4J_DATABASE')

In [2]:
!pip install openai langchain tiktoken pdfplumber llama_index pypdf



# Knowledge graph query engine

https://siwei.io/graph-enabled-llama-index/knowledge_graph_query_engine.html



In [3]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) ###<----this will display a LOT of stuff, put INFO if want less
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

## Setting Up the llama_index service context

In [4]:
from llama_index import (
    KnowledgeGraphIndex,
    LLMPredictor,
    ServiceContext,
    SimpleDirectoryReader,
)
from llama_index.storage.storage_context import StorageContext
from llama_index.graph_stores import NebulaGraphStore

from langchain.chat_models import ChatOpenAI
from IPython.display import Markdown, display

llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name="gpt-4"))
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, chunk_size=512)

INFO:numexpr.utils:Note: NumExpr detected 12 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
Note: NumExpr detected 12 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO:numexpr.utils:NumExpr defaulting to 8 threads.
NumExpr defaulting to 8 threads.


## Prepare for Neo4j

Please set up a free AuraDB instance

Then set up environment variable with username, password, url and database name  
e.g.  
  
NEO4J_USERNAME = "neo4j"
NEO4J_PASSWORD = "retractor-knot-thermocouples"
NEO4J_URL = "bolt://44.211.44.239:7687"
NEO4J_DATABASE = "neo4j"


In [5]:
%pip install neo4j


Collecting neo4j
  Downloading neo4j-5.13.0.tar.gz (192 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m192.3/192.3 kB[0m [31m1.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Installing backend dependencies ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: neo4j
  Building wheel for neo4j (pyproject.toml) ... [?25ldone
[?25h  Created wheel for neo4j: filename=neo4j-5.13.0-py3-none-any.whl size=265313 sha256=b8912cc7bcf134dafa317a5120abfb5071a75aa17c6258e9ae68a030a07a987f
  Stored in directory: /home/vscode/.cache/pip/wheels/8e/4a/31/7e0a1339965ec771b0e0b476445e00e2ed1d2f30d4bc52616f
Successfully built neo4j
Installing collected packages: neo4j
Successfully installed neo4j-5.13.0
Note: you may need to restart the kernel to use updated packages.


In [5]:
#----------------------------------------
# VERIFY CONNECTIVITY TO NEO4J INSTANCE
#----------------------------------------

from neo4j import GraphDatabase

# URI examples: "neo4j://localhost", "neo4j+s://xxx.databases.neo4j.io"
URI = "neo4j+s://6ba0e3e4.databases.neo4j.io"
AUTH = (NEO4J_USERNAME, NEO4J_PASSWORD)

with GraphDatabase.driver(URI, auth=AUTH) as driver:
    driver.verify_connectivity()

DEBUG:neo4j:[#0000]  _: <POOL> created, routing address IPv4Address(('6ba0e3e4.databases.neo4j.io', 7687))
[#0000]  _: <POOL> created, routing address IPv4Address(('6ba0e3e4.databases.neo4j.io', 7687))
DEBUG:neo4j:[#0000]  _: <WORKSPACE> resolve home database
[#0000]  _: <WORKSPACE> resolve home database
DEBUG:neo4j:[#0000]  _: <POOL> attempting to update routing table from IPv4Address(('6ba0e3e4.databases.neo4j.io', 7687))
[#0000]  _: <POOL> attempting to update routing table from IPv4Address(('6ba0e3e4.databases.neo4j.io', 7687))
DEBUG:neo4j:[#0000]  _: <RESOLVE> in: 6ba0e3e4.databases.neo4j.io:7687
[#0000]  _: <RESOLVE> in: 6ba0e3e4.databases.neo4j.io:7687
DEBUG:neo4j:[#0000]  _: <RESOLVE> dns resolver out: 34.126.114.186:7687
[#0000]  _: <RESOLVE> dns resolver out: 34.126.114.186:7687
DEBUG:neo4j:[#0000]  _: <POOL> _acquire router connection, database=None, address=ResolvedIPv4Address(('34.126.114.186', 7687))
[#0000]  _: <POOL> _acquire router connection, database=None, address=Re

Prepare for StorageContext with graph_store as Neo4j

In [6]:
from llama_index.graph_stores import Neo4jGraphStore
from llama_index.storage.storage_context import StorageContext

graph_store = Neo4jGraphStore(
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
    url="neo4j+s://6ba0e3e4.databases.neo4j.io"
)

storage_context = StorageContext.from_defaults(graph_store=graph_store)

DEBUG:neo4j:[#0000]  _: <POOL> created, routing address IPv4Address(('6ba0e3e4.databases.neo4j.io', 7687))
[#0000]  _: <POOL> created, routing address IPv4Address(('6ba0e3e4.databases.neo4j.io', 7687))
DEBUG:neo4j:[#0000]  _: <WORKSPACE> resolve home database
[#0000]  _: <WORKSPACE> resolve home database
DEBUG:neo4j:[#0000]  _: <POOL> attempting to update routing table from IPv4Address(('6ba0e3e4.databases.neo4j.io', 7687))
[#0000]  _: <POOL> attempting to update routing table from IPv4Address(('6ba0e3e4.databases.neo4j.io', 7687))
DEBUG:neo4j:[#0000]  _: <RESOLVE> in: 6ba0e3e4.databases.neo4j.io:7687
[#0000]  _: <RESOLVE> in: 6ba0e3e4.databases.neo4j.io:7687
DEBUG:neo4j:[#0000]  _: <RESOLVE> dns resolver out: 34.126.114.186:7687
[#0000]  _: <RESOLVE> dns resolver out: 34.126.114.186:7687
DEBUG:neo4j:[#0000]  _: <POOL> _acquire router connection, database=None, address=ResolvedIPv4Address(('34.126.114.186', 7687))
[#0000]  _: <POOL> _acquire router connection, database=None, address=Re

## Build the Knowledge Graph with LlamaIndex  
With the help of Llama Index and LLM defined, we could build Knowledge Graph from given documents.


In [8]:
!pip install arxiv

Collecting arxiv
  Obtaining dependency information for arxiv from https://files.pythonhosted.org/packages/f0/06/9b9d553d93e25ae27ec5ba794216afb1af248e43d85a35e922a85cbb396a/arxiv-1.4.8-py3-none-any.whl.metadata
  Downloading arxiv-1.4.8-py3-none-any.whl.metadata (8.1 kB)
Collecting feedparser (from arxiv)
  Downloading feedparser-6.0.10-py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.1/81.1 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting sgmllib3k (from feedparser->arxiv)
  Downloading sgmllib3k-1.0.0.tar.gz (5.8 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hDownloading arxiv-1.4.8-py3-none-any.whl (12 kB)
Building wheels for collected packages: sgmllib3k
  Building wheel for sgmllib3k (setup.py) ... [?25ldone
[?25h  Created wheel for sgmllib3k: filename=sgmllib3k-1.0.0-py3-none-any.whl size=6048 sha256=e5d88fdf7812753a9d236eda9a8223ff75086ecd920251b36b1e194bc73799a4
  Stored in directory: /home/

In [9]:
import arxiv

paper = next(arxiv.Search(id_list=['2303.11366']).results())
paper.download_pdf(filename='agent-reflexion.pdf')

INFO:arxiv.arxiv:Requesting 100 results at offset 0
Requesting 100 results at offset 0
INFO:arxiv.arxiv:Requesting page of results
Requesting page of results
INFO:arxiv.arxiv:Got first page; 1 of inf results available
Got first page; 1 of inf results available


'./agent-reflexion.pdf'

In [7]:
from llama_index import SimpleDirectoryReader

documents = SimpleDirectoryReader("../../datatest2").load_data()  #on local

DEBUG:llama_index.readers.file.base:> [SimpleDirectoryReader] Total files added: 1
> [SimpleDirectoryReader] Total files added: 1
LLAMINDEX Input file: ../../datatest2/agent-reflexion.pdf
PDF reader opening file  ../../datatest2/agent-reflexion.pdf


### Step 2, Generate a KnowledgeGraphIndex with nEO4J as graph_store

Then, we will create a KnowledgeGraphIndex to enable Graph based RAG.  
Apart from that, we have a Knowledge Graph up and running for other purposes, too!  

See here also: https://gpt-index.readthedocs.io/en/latest/examples/index_structs/knowledge_graph/KnowledgeGraphIndex_vs_VectorStoreIndex_vs_CustomIndex_combined.html



In [8]:
from llama_index import KnowledgeGraphIndex

kg_index = KnowledgeGraphIndex.from_documents(
    documents,
    storage_context=storage_context,
    max_triplets_per_chunk=10,
    service_context=service_context,
    include_embeddings=True
)

DEBUG:llama_index.node_parser.node_utils:> Adding chunk: Reflexion: Language Agents with
Verbal Reinforc...
> Adding chunk: Reflexion: Language Agents with
Verbal Reinforc...
DEBUG:llama_index.node_parser.node_utils:> Adding chunk: 1 Introduction
Recent works such as ReAct [ 32]...
> Adding chunk: 1 Introduction
Recent works such as ReAct [ 32]...
DEBUG:llama_index.node_parser.node_utils:> Adding chunk: environment into verbal feedback in the form of...
> Adding chunk: environment into verbal feedback in the form of...
DEBUG:llama_index.node_parser.node_utils:> Adding chunk: However, as LLM capabilities improve, we only e...
> Adding chunk: However, as LLM capabilities improve, we only e...
DEBUG:llama_index.node_parser.node_utils:> Adding chunk: Paul et al. [20] fine-tune critic models to pro...
> Adding chunk: Paul et al. [20] fine-tune critic models to pro...
DEBUG:llama_index.node_parser.node_utils:> Adding chunk: <RXDUHLQWKHPLGGOHRIDURRP>@7DVNFOH...
> Adding chunk: <

#### Create graph from index  

In [9]:
## create graph
from pyvis.network import Network

g = kg_index.get_networkx_graph(200)
net = Network(notebook=True, cdn_resources="in_line", directed=True)
net.from_nx(g)
# net.show("example.html")

DEBUG:neo4j:[#0000]  _: <POOL> acquire routing connection, access_mode='WRITE', database='neo4j'
[#0000]  _: <POOL> acquire routing connection, access_mode='WRITE', database='neo4j'
DEBUG:neo4j:[#0000]  _: <POOL> routing aged?, database=neo4j
[#0000]  _: <POOL> routing aged?, database=neo4j
DEBUG:neo4j:[#0000]  _: <ROUTING> purge check: last_updated_time=107941.2039628, ttl=10, perf_time=164719.3893178 => True
[#0000]  _: <ROUTING> purge check: last_updated_time=107941.2039628, ttl=10, perf_time=164719.3893178 => True
DEBUG:neo4j:[#0000]  _: <POOL> dropping routing table for database=neo4j
[#0000]  _: <POOL> dropping routing table for database=neo4j
DEBUG:neo4j:[#0000]  _: <ROUTING> checking table freshness (readonly=False): table expired=True, has_server_for_mode=False, table routers={IPv4Address(('6ba0e3e4.databases.neo4j.io', 7687))} => False
[#0000]  _: <ROUTING> checking table freshness (readonly=False): table expired=True, has_server_for_mode=False, table routers={IPv4Address(('6

## Asking the Knowledge Graph   
Finally, let's demo how to Query Knowledge Graph with Natural language!   

Here, we will leverage the KnowledgeGraphQueryEngine, with NebulaGraphStore as the storage_context.graph_store   

In [10]:
from llama_index.query_engine import KnowledgeGraphQueryEngine

# from llama_index.storage.storage_context import StorageContext
# from llama_index.graph_stores import NebulaGraphStore

llm = ChatOpenAI(model='gpt-4', temperature=0)

query_engine = KnowledgeGraphQueryEngine(
    storage_context=storage_context,
    service_context=service_context,
    llm=llm,
    verbose=True,
)

In [13]:
response = query_engine.query(
    "Tell me what are agents doing in this paper?",
)
display(Markdown(f"<b>{response}</b>"))

DEBUG:openai:message='Request to OpenAI API' method=post path=https://api.openai.com/v1/chat/completions
message='Request to OpenAI API' method=post path=https://api.openai.com/v1/chat/completions
DEBUG:openai:api_version=None data='{"messages": [{"role": "user", "content": "Task:Generate Cypher statement to query a graph database.\\nInstructions:\\nUse only the provided relationship types and properties in the schema.\\nDo not use any other relationship types or properties that are not provided.\\nSchema:\\n\\n        Node properties are the following:\\n        []\\n        Relationship properties are the following:\\n        []\\n        The relationships are the following:\\n        []\\n        \\nNote: Do not include any explanations or apologies in your responses.\\nDo not respond to any questions that might ask anything else than for you to construct a Cypher statement. \\nDo not include any text except the generated Cypher statement.\\n\\nThe question is:\\nTell me what are ag

CypherSyntaxError: {code: Neo.ClientError.Statement.SyntaxError} {message: Invalid input 'The': expected
  "ALTER"
  "CALL"
  "CREATE"
  "DEALLOCATE"
  "DELETE"
  "DENY"
  "DETACH"
  "DROP"
  "DRYRUN"
  "ENABLE"
  "FOREACH"
  "GRANT"
  "LOAD"
  "MATCH"
  "MERGE"
  "OPTIONAL"
  "REALLOCATE"
  "REMOVE"
  "RENAME"
  "RETURN"
  "REVOKE"
  "SET"
  "SHOW"
  "START"
  "STOP"
  "TERMINATE"
  "UNWIND"
  "USE"
  "USING"
  "WITH" (line 1, column 1 (offset: 0))
"The schema provided does not contain any information about nodes, relationship properties, or relationships. Therefore, it is impossible to generate a Cypher statement to answer the question about what agents are doing in the paper."
 ^}

### Translate natural language into queries

In [None]:
graph_query = query_engine.generate_query(
    "Tell me about Peter Quill?",
)

display(Markdown(f"""
```cypher
{graph_query}
```
"""))

DEBUG:openai:message='Request to OpenAI API' method=post path=https://api.openai.com/v1/completions
message='Request to OpenAI API' method=post path=https://api.openai.com/v1/completions
message='Request to OpenAI API' method=post path=https://api.openai.com/v1/completions
DEBUG:openai:api_version=None data='{"prompt": ["\\nGenerate NebulaGraph query from natural language.\\nUse only the provided relationship types and properties in the schema.\\nDo not use any other relationship types or properties that are not provided.\\nSchema:\\n---\\nNode properties: [{\'tag\': \'entity\', \'properties\': [(\'name\', \'string\')]}]\\nEdge properties: [{\'edge\': \'relationship\', \'properties\': [(\'relationship\', \'string\')]}]\\nRelationships: [\'(:entity)-[:relationship]->(:entity)\']\\n\\n---\\nNote: NebulaGraph speaks a dialect of Cypher, comparing to standard Cypher:\\n\\n1. it uses double equals sign for comparison: `==` rather than `=`\\n2. it needs explicit label specification when refe


```cypher
```
MATCH (p:`entity`)-[:relationship]->(m:`entity`) WHERE p.`entity`.`name` == 'Peter Quill'
RETURN p.`entity`.`name`;
```
```


In [None]:
%%ngql
MATCH (p:`entity`)-[e:relationship]->(m:`entity`)
  WHERE p.`entity`.`name` == 'Peter Quill'
RETURN p.`entity`.`name`, e.relationship, m.`entity`.`name`;

INFO:nebula3.logger:Get connection to ('192.168.1.198', 9669)
Get connection to ('192.168.1.198', 9669)
Get connection to ('192.168.1.198', 9669)


Unnamed: 0,p.entity.name,e.relationship,m.entity.name
0,Peter Quill,would return to the MCU,May 2021


# Knowledge graph and semantic search index

https://gpt-index.readthedocs.io/en/latest/examples/index_structs/knowledge_graph/KnowledgeGraphIndex_vs_VectorStoreIndex_vs_CustomIndex_combined.html



In [None]:
import logging
import sys
logging.basicConfig(
    stream=sys.stdout, level=logging.DEBUG
)  # logging.DEBUG for more verbose output

from llama_index import (
    KnowledgeGraphIndex,
    ServiceContext,
    SimpleDirectoryReader,
    VectorStoreIndex
)
from llama_index.storage.storage_context import StorageContext
from llama_index.graph_stores import NebulaGraphStore


from llama_index.llms import OpenAI
from IPython.display import Markdown, display

## Prepare Nebula graph

In [None]:
import os
os.environ["NEBULA_USER"] = "root"
os.environ["NEBULA_PASSWORD"] = "nebula"
os.environ[
    "NEBULA_ADDRESS"
] = "172.16.0.245:9669"  # assumed we have NebulaGraph 3.5.0 or newer installed locally

# Assume that the graph has already been created
# Create a NebulaGraph cluster with:
# Option 0: `curl -fsSL nebula-up.siwei.io/install.sh | bash`
# Option 1: NebulaGraph Docker Extension https://hub.docker.com/extensions/weygu/nebulagraph-dd-ext
# and that the graph space is called "llamaindex"
# If not, create it with the following commands from NebulaGraph's console:
# CREATE SPACE llamaindex(vid_type=FIXED_STRING(256), partition_num=1, replica_factor=1);
# :sleep 10;
# USE llamaindex;
# CREATE TAG entity(name string);
# CREATE EDGE relationship(relationship string);
# CREATE TAG INDEX entity_index ON entity(name(256));

space_name = "llamaindex_2"
edge_types, rel_prop_names = ["relationship"], [
    "relationship"
]  # default, could be omit if create from an empty kg
tags = ["entity"]  # default, could be omit if create from an empty kg

## Load data from Wikipedia

In [None]:
from llama_index import download_loader

WikipediaReader = download_loader("WikipediaReader")

loader = WikipediaReader()

documents_science= loader.load_data(pages=["2023 in science"], auto_suggest=False)

In [None]:
len(documents_science[0].text)

54082

## Service context for loading documents and creating knowledge graph

In [None]:

from llama_index.embeddings import OpenAIEmbedding
from llama_index.node_parser import SimpleNodeParser
from llama_index import ServiceContext, LLMPredictor, OpenAIEmbedding, PromptHelper
from llama_index.llms import OpenAI
from llama_index.text_splitter import TokenTextSplitter

# define LLM
# NOTE: at the time of demo, text-davinci-002 did not have rate-limit errors
llm = OpenAI(temperature=0, model="text-davinci-002")

embed_model = OpenAIEmbedding()
node_parser = SimpleNodeParser.from_defaults(
  text_splitter=TokenTextSplitter(chunk_size=512, chunk_overlap=64))

service_context = ServiceContext.from_defaults(
  llm=llm,
  embed_model=embed_model,
  node_parser=node_parser
)

## Create knowledge graph index

In [None]:
graph_store = NebulaGraphStore(
    space_name=space_name,
    edge_types=edge_types,
    rel_prop_names=rel_prop_names,
    tags=tags,
)
storage_context = StorageContext.from_defaults(graph_store=graph_store)

kg_index = KnowledgeGraphIndex.from_documents(
    documents,
    storage_context=storage_context,
    service_context=service_context,
    max_triplets_per_chunk=10,
    space_name=space_name,
    edge_types=edge_types,
    rel_prop_names=rel_prop_names,
    tags=tags,
    include_embeddings=True,
)

(National Disability Insurance Scheme, Pricing Arrangements and Price Limits, 2023-24)
(Pricing Arrangements, valid from, 1 July 2023)
(Version, 1.0, Released 16 June 2023)
(NDIS Pricing Arrangements and Price Limits 2023-24, is, document)
(NDIS Pricing Arrangements and Price Limits 2023-24, published, 16/06/2023)
(NDIS Pricing Arrangements and Price Limits 2023-24, has date of effect, 1 July 2023)
(NDIS Pricing Arrangements and Price Limits, are, price regulation documents)
(NDIS Pricing Arrangements and Price Limits, contain, support items)
(NDIS Pricing Arrangements and Price Limits, contain, support purposes)
(NDIS Pricing Arrangements and Price Limits, contain, support categories)
(NDIS Pricing Arrangements and Price Limits, contain, registration groups)
(NDIS Pricing Arrangements and Price Limits, contain, units of measure)
(NDIS Pricing Arrangements and Price Limits, contain, general claiming rules)
(NDIS Pricing Arrangements and Price Limits, contain, service agreements)
(NDIS 

In [None]:
G = kg_index.get_networkx_graph(limit = 10000)

In [None]:
from helpers import pickleSave
pickleSave(G, 'NDISPriceGuideKG_nx', folder='.', silent=False)

Saving object NDISPriceGuideKG_nx to pickle file ./NDISPriceGuideKG_nx.pkl


## Checking the KG

In [None]:
# Service context for querying the KG


from llama_index.embeddings import OpenAIEmbedding
from llama_index.node_parser import SimpleNodeParser
from llama_index import ServiceContext, LLMPredictor, OpenAIEmbedding, PromptHelper
from llama_index.llms import OpenAI
from llama_index.text_splitter import TokenTextSplitter

# define LLM
# NOTE: at the time of demo, text-davinci-002 did not have rate-limit errors
llm = OpenAI(temperature=0, model="gpt-4")

embed_model = OpenAIEmbedding()
# node_parser = SimpleNodeParser.from_defaults(
#   text_splitter=TokenTextSplitter(chunk_size=512, chunk_overlap=64))

service_context_querying_kg = ServiceContext.from_defaults(
  llm=llm,
  embed_model=embed_model,
#   node_parser=node_parser
)

In [None]:
storage_context.graph_store.persist('NdisPriceGuide_512_64_graph.txt')

In [None]:
from llama_index.query_engine import KnowledgeGraphQueryEngine

# from llama_index.storage.storage_context import StorageContext
# from llama_index.graph_stores import NebulaGraphStore

query_engine = KnowledgeGraphQueryEngine(
    storage_context=storage_context,
    service_context=service_context_querying_kg,
    llm=llm,
    verbose=True,
)

In [None]:
graph_query = query_engine.generate_query(
    "What is the support item code for invoicing gardening for a participant?",
)

display(Markdown(f"""
```cypher
{graph_query}
```
"""))


```cypher
The provided schema does not contain any information related to "support item code", "invoicing", "gardening", or "participant". Therefore, it's not possible to generate a NebulaGraph query based on the provided schema and the given question.
```


## Create vector store index

In [None]:
documents[1].text

' \nNDIS Pricing Arrangements and Price Limits 202 3-24 Version 1.0  (published 16/06/2023 )  Page 2 of 101 Copyright  \n© National Disability Insurance Agency 202 3 \nUse of National Disability Insurance Agency copyright material  \nThe material in this document  with the exception of logos, trademarks, third party material and \nother content as specified is licensed under Creative Commons CC NC licence, version 4.0. With \nthe exception of logos, trademarks, third party material and other content as specified, you may \nreproduce the material in this document , provided you a cknowledge the National Disability \nInsurance Agency as the owner of all intellectual property rights in the reproduced material by \nusing ‘© National Disability Insurance Agency 2023’ and do not use the material for commercial \npurposes.  \nReproduction of any Cr eative Commons material in this document is subject to the CC NC licence \nconditions available on the Creative Commons site, as is the full legal

In [None]:
vector_index = VectorStoreIndex.from_documents(documents)

## Define a CustomRetriever  


The purpose of this demo was to test the effectiveness of using Knowledge Graph queries for retrieving information that is distributed across multiple nodes in small pieces. To achieve this, we adopted a simple approach: performing retrieval on both sources and then combining them into a single context to be sent to LLM.

Thanks to the flexible abstraction provided by Llama Index Retriever, implementing this approach was relatively straightforward. We created a new class called CustomRetriever which retrieves data from both VectorIndexRetriever and KGTableRetriever

In [None]:
# import QueryBundle
from llama_index import QueryBundle

# import NodeWithScore
from llama_index.schema import NodeWithScore

# Retrievers
from llama_index.retrievers import BaseRetriever, VectorIndexRetriever, KGTableRetriever

from typing import List


class CustomRetriever(BaseRetriever):
    """Custom retriever that performs both Vector search and Knowledge Graph search"""

    def __init__(
        self,
        vector_retriever: VectorIndexRetriever,
        kg_retriever: KGTableRetriever,
        mode: str = "OR",
    ) -> None:
        """Init params."""

        self._vector_retriever = vector_retriever
        self._kg_retriever = kg_retriever
        if mode not in ("AND", "OR"):
            raise ValueError("Invalid mode.")
        self._mode = mode

    def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        """Retrieve nodes given query."""

        vector_nodes = self._vector_retriever.retrieve(query_bundle)
        kg_nodes = self._kg_retriever.retrieve(query_bundle)

        vector_ids = {n.node.node_id for n in vector_nodes}
        kg_ids = {n.node.node_id for n in kg_nodes}

        combined_dict = {n.node.node_id: n for n in vector_nodes}
        combined_dict.update({n.node.node_id: n for n in kg_nodes})

        if self._mode == "AND":
            retrieve_ids = vector_ids.intersection(kg_ids)
        else:
            retrieve_ids = vector_ids.union(kg_ids)

        retrieve_nodes = [combined_dict[rid] for rid in retrieve_ids]
        return retrieve_nodes

In [None]:

from llama_index.query_engine import RetrieverQueryEngine

# create custom retriever
vector_retriever = VectorIndexRetriever(index=vector_index)
kg_retriever = KGTableRetriever(
    index=kg_index, retriever_mode="keyword", include_text=False
)
custom_retriever = CustomRetriever(vector_retriever, kg_retriever)



## Create Query Engines  

To enable comparsion, we also create vector_query_engine, kg_keyword_query_engine together with our custom_query_engine.

In [None]:
llm = OpenAI(temperature=0, model="gpt-4")
embed_model = OpenAIEmbedding()

service_context_querying_engine = ServiceContext.from_defaults(  # useful?
  llm=llm,
  embed_model=embed_model,
  system_prompt=prompt_llamaindex_retriever)


In [None]:
from llama_index import get_response_synthesizer
# create response synthesizer
response_synthesizer = get_response_synthesizer(
    service_context=service_context_querying_engine,
    response_mode="tree_summarize",
)
custom_query_engine = RetrieverQueryEngine(
    retriever=custom_retriever,
    response_synthesizer=response_synthesizer,
)

vector_query_engine = vector_index.as_query_engine(service_context=service_context_querying_engine)

kg_keyword_query_engine = kg_index.as_query_engine(
    # setting to false uses the raw triplets instead of adding the text from the corresponding nodes
    include_text=False,
    retriever_mode="keyword",
    response_mode="tree_summarize",
    service_context=service_context_querying_engine # useful?
)

## Query with different retrievers

With the above query engines created for corresponding retrievers, let’s see how they perform.

First, we go with the pure knowledge graph.

In [None]:
query = "I have mowed the lawn of 1 participant on Saturday 9 to 11 am last week, how should I invoice NDIS?"

In [None]:
response = kg_keyword_query_engine.query(query)
display(Markdown(f"<b>{response}</b>"))

<b>To invoice NDIS for the lawn mowing service you provided, you need to follow the NDIS Pricing Arrangements and Price Limits. However, I need more information to provide the correct support item code and maximum price. Specifically, I need to know the location where the service was provided and whether it falls under a specific support category. 

Once I have this information, I can provide the appropriate support item code, the maximum price you can charge for the service, and any specific rules related to claiming for this service.</b>

Then the vector store approach.

In [None]:
response = vector_query_engine.query(query)
display(Markdown(f"<b>{response}</b>"))

<b>To invoice NDIS for the lawn mowing service you provided to a participant on a Saturday, you need to follow these steps:

### Support Item Code
First, you need to identify the appropriate support item code for the lawn mowing service. This code will depend on the specific category of support that lawn mowing falls under in the NDIS Price Guide. If you're unsure, you may need to consult the NDIS Price Guide or ask for assistance from an NDIS representative.

### Maximum Price
The maximum price you can charge will depend on the support item code and the location where the service was provided. The NDIS Price Guide provides a list of maximum prices for each support item, which varies by location. 

### Specific Rules
As the service was provided on a Saturday, it falls under the "Saturday Support" category as per the NDIS Pricing Arrangements and Price Limits. Therefore, you should invoice it as a "Saturday Support". 

Remember, if the service crosses a shift boundary and the same worker delivers the entire support, the higher of the relevant price limits applies to the entire support. You should discuss this billing arrangement with the participant.

Finally, ensure that the proposed charges for the activities comply with the NDIS Pricing Arrangements and Price Limits, and that you have the agreement of the participant in advance.</b>

In [None]:
response = custom_query_engine.query(query)
display(Markdown(f"<b>{response}</b>"))

<b>To invoice NDIS for the lawn mowing service you provided to a participant on a Saturday, you need to follow these steps:

### Support Item Code
First, you need to identify the appropriate support item code for the lawn mowing service. This code should be listed in the NDIS Price Guide under the category of 'Yard Maintenance' or similar. If you're unsure of the exact code, please provide more details about the service you provided.

### Maximum Price
The maximum price you can charge will depend on the specific support item code and the location where the service was provided. The NDIS Pricing Arrangements and Price Limits document outlines the maximum prices for different services and locations.

### Specific Rules
When claiming for this service, you need to ensure that the proposed charges comply with the NDIS Pricing Arrangements and Price Limits. Also, the service agreement with the participant should specify that such services can be claimed. 

Remember, the claim should be made using the same support item as would have been used if the support had been delivered. 

Please note that the information provided here is general in nature. For specific advice related to your situation, you may need to consult with an NDIS representative or a professional advisor.</b>

In [None]:
response_synthesizer = get_response_synthesizer(
    service_context=service_context,
    response_mode="tree_summarize",
)
custom_query_engine = RetrieverQueryEngine(
    retriever=custom_retriever,
    response_synthesizer=response_synthesizer,
)

In [None]:
response = custom_query_engine.query("What are plan managers?")
display(Markdown(f"<b>{response}</b>"))

<b>Response 1: 

Plan managers are individuals who help participants with the management of their plans. This includes tasks such as budgeting, claims management, and provider payments.
---------------------
Response 2: 

Plan managers are responsible for ensuring that providers do not charge participants more than the price limits set by the NDIA. They must also provide the NDIA with the Australian Business Number (ABN) of the service provider delivering the support.
---------------------
Response 3: 

Plan managers are responsible for managing NDIS plans and ensuring that participants receive the support they need. They are not responsible for ensuring that providers are TTP compliant.</b>