# 1. Preparation

## 1.1 Prepare for LLM

In [None]:
# %pip install llama-index-llms-azure-openai
# %pip install llama-index-graph-stores-nebula
# %pip install llama-index-llms-openai
# %pip install llama-index-embeddings-azure-openai

In [1]:
# For OpenAI

import os

# os.environ["OPENAI_API_KEY"] = "INSERT YOUR KEY"

import logging
import sys

logging.basicConfig(
    stream=sys.stdout, level=logging.INFO
)  # logging.DEBUG for more verbose output

from llama_index.core import (
    KnowledgeGraphIndex,
    VectorStoreIndex,
    ServiceContext,
    SimpleDirectoryReader,
    StorageContext,
    PromptTemplate
)
from llama_index.graph_stores.nebula import NebulaGraphStore


from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from IPython.display import Markdown, display

from finllmqa.api.core import LLM_API_URL

from llama_index.core import Settings

llm = OpenAI(model="gpt-3.5-turbo", api_base=LLM_API_URL, api_key='null')
embed_model = OpenAIEmbedding(api_base=LLM_API_URL, api_key='null')

Settings.llm = llm
Settings.embed_model = embed_model

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# For Azure OpenAI

import os
import json
import openai
from langchain.embeddings import OpenAIEmbeddings
from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.embeddings.langchain import LangchainEmbedding
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    KnowledgeGraphIndex,
    ServiceContext
)
from llama_index.core import set_global_service_context

from llama_index.core.storage.storage_context import StorageContext
from llama_index.graph_stores.nebula import NebulaGraphStore

import logging
import sys

from IPython.display import Markdown, display

from finllmqa.api.core import LLM_API_URL

logging.basicConfig(
    stream=sys.stdout, level=logging.INFO
)  # logging.DEBUG for more verbose output
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

openai.api_type = "azure"
openai.api_base = LLM_API_URL
openai.api_version = "2024-03-01"
# os.environ["OPENAI_API_KEY"] = "youcannottellanyone"
# openai.api_key = os.getenv("OPENAI_API_KEY")
openai.api_key = 'null'

llm = AzureOpenAI(
    engine="<foo-bar-deployment>",
    temperature=0,
    openai_api_version=openai.api_version,
    model_kwargs={
        "api_key": openai.api_key,
        "api_base": openai.api_base,
        "api_type": openai.api_type,
        "api_version": openai.api_version,
    },
)

# You need to deploy your own embedding model as well as your own chat completion model
embedding_llm = LangchainEmbedding(
    OpenAIEmbeddings(
        model="text-embedding-ada-002",
        deployment="<foo-bar-deployment>",
        openai_api_key=openai.api_key,
        openai_api_base=openai.api_base,
        openai_api_type=openai.api_type,
        openai_api_version=openai.api_version,
    ),
    embed_batch_size=1,
)

# service_context = ServiceContext.from_defaults(
#     llm=llm,
#     embed_model=embedding_llm,
# )

# set_global_service_context(service_context)

In [None]:
from llama_index.core import Settings

Settings.llm = llm
Settings.embed_model = embedding_llm

## 1.2. Prepare for NebulaGraph as Graph Store


❗Access NebulaGraph Console to **create space** and **graph schema**

```sql
CREATE SPACE guardians(vid_type=FIXED_STRING(256), partition_num=1, replica_factor=1);
:sleep 10;
USE guardians;
CREATE TAG entity(name string);
CREATE EDGE relationship(relationship string);
:sleep 10;
CREATE TAG INDEX entity_index ON entity(name(256));
```

In [None]:
# %pip install nebula3-python ipython-ngql

In [2]:
os.environ['NEBULA_USER'] = "root"
os.environ['NEBULA_PASSWORD'] = "nebula" # default password
os.environ['NEBULA_ADDRESS'] = "192.168.30.158:9669" 

## 2. Build the Knowledge Graph and Persist(Optional)

**you can skip this step and move to $4 if you have downloaded and unzipped 'index.zip'**   

In my work, the Knowledge Graph was created with LLM.

We simply do so leveragint the `KnowledgeGraphIndex` from LlamaIndex, when creating it, Triplets will be extracted with LLM and evantually persisted into `NebulaGraphStore`.

### 2.1 Load Data

In [3]:
from llama_index.core import SimpleDirectoryReader

# change path to where you save the teaching resources
document_path = 'books/'
file_name_ls = ['微观经济学.pdf']
file_name_ls = [document_path + file_name for file_name in file_name_ls]

reader = SimpleDirectoryReader(input_files=file_name_ls)
documents = reader.load_data()

### 2.2 Split Documents

In [4]:
from llama_index.core.node_parser import SentenceSplitter


chunk_size_ls = [256, 512, 1024]
chunk_overlap_pct_ls = [1/8, 1/4]
split_document_dc = {}
for chunk_size in chunk_size_ls:
    for chunk_overlap_pct in chunk_overlap_pct_ls:
        chunk_overlap = int(chunk_size * chunk_overlap_pct)
        nodes_group = f'size_{chunk_size}_overlap_{chunk_overlap}'
        splitter = SentenceSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
        split_document = splitter.get_nodes_from_documents(documents=documents)
        split_document_dc[nodes_group] = split_document
        print(f'chunk_size: {chunk_size}; chunk_overlap: {chunk_overlap} len_chunks: {len(split_document)}')

chunk_size: 256; chunk_overlap: 32 len_chunks: 2941
chunk_size: 256; chunk_overlap: 64 len_chunks: 3277
chunk_size: 512; chunk_overlap: 64 len_chunks: 1508
chunk_size: 512; chunk_overlap: 128 len_chunks: 1588
chunk_size: 1024; chunk_overlap: 128 len_chunks: 871
chunk_size: 1024; chunk_overlap: 256 len_chunks: 874


### 2.3 Extract Triplets and Save to NebulaGraph

In [5]:
kg_extract_template = """
    下面提供了一些文本。根据文本，提取最多 {max_knowledge_triplets} 个三元组的知识，形式为(实体,关系,实体)，具体可以是(主语,谓语,宾语)或者其他类型，注意避开停用词。
    请忽略page_label和file_path
    ---------------------
    示例：
    文本：小红是小明的母亲.
    三元组：
    (小红,是母亲,小明)
    文本:瑞幸是2017年在厦门创立的咖啡店。
    三元组：
    (瑞幸,是,咖啡店)
    (瑞幸,创立于,厦门)
    (瑞幸,创立于,2017)
    文本:在长期中，物价总水平会调整到使货币需求等于货币供给的水平。
    三元组：
    (物价总水平,长期调整使等于,货币需求等于货币供给的水平)
    ---------------------
    文本：{text}
    三元组："""
kg_extract_template = PromptTemplate(kg_extract_template)

This cell will take some time, it'll extract entities and relationships and store them into NebulaGraph, but once you have run it before, you can skip this step and load storage_context from disk in $4

In [None]:
import time
kg_index_ls = []

for nodes_group, nodes in split_document_dc.items():
    start = time.time()
    print(f'\n\nstart extract {nodes_group} nodes...\n\n')
    space_name = f"books_content_{nodes_group}"
    edge_types, rel_prop_names = ["relationship"], ["relationship"] # default, could be omit if create from an empty kg
    tags = ["entity"] # default, could be omit if create from an empty kg

    graph_store = NebulaGraphStore(
        space_name=space_name,
        edge_types=edge_types,
        rel_prop_names=rel_prop_names,
        tags=tags,
    )
    storage_context = StorageContext.from_defaults(graph_store=graph_store)
    kg_index = KnowledgeGraphIndex(
        nodes=nodes,
        storage_context=storage_context,
        max_triplets_per_chunk=10,
        space_name=space_name,
        edge_types=edge_types,
        rel_prop_names=rel_prop_names,
        tags=tags,
        include_embeddings=True,
        kg_triple_extract_template=kg_extract_template
    )
    end = time.time()
    print(f'{nodes_group} takes {(end-start)//60} min')
    kg_index_ls.append(kg_index)

    # store index
    kg_index.storage_context.persist(persist_dir=f'../storage/storage_graph/{nodes_group}')

#### Multiprocessing

In [1]:
%%writefile create_kg_index.py
import os
import logging
import sys

os.environ['NEBULA_USER'] = "root"
os.environ['NEBULA_PASSWORD'] = "nebula" # default password
os.environ['NEBULA_ADDRESS'] = "192.168.30.158:9669" 

logging.basicConfig(
    stream=sys.stdout, level=logging.INFO
)  # logging.DEBUG for more verbose output

from llama_index.core import (
    KnowledgeGraphIndex,
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    PromptTemplate)
from llama_index.graph_stores.nebula import NebulaGraphStore
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from finllmqa.api.core import LLM_API_URL
from llama_index.core import Settings

llm = OpenAI(model="gpt-3.5-turbo", api_base=LLM_API_URL, api_key='null')
embed_model = OpenAIEmbedding(api_base=LLM_API_URL, api_key='null')

Settings.llm = llm
Settings.embed_model = embed_model

# change path to where you save the teaching resources
document_path = 'books/'
file_name_ls = ['微观经济学.pdf']
file_name_ls = [document_path + file_name for file_name in file_name_ls]

reader = SimpleDirectoryReader(input_files=file_name_ls)
documents = reader.load_data()

from llama_index.core.node_parser import SentenceSplitter


chunk_size_ls = [256, 512, 1024]
chunk_overlap_pct_ls = [1/8, 1/4]
split_document_dc = {}
for chunk_size in chunk_size_ls:
    for chunk_overlap_pct in chunk_overlap_pct_ls:
        chunk_overlap = int(chunk_size * chunk_overlap_pct)
        nodes_group = f'size_{chunk_size}_overlap_{chunk_overlap}'
        if os.path.exists(f'../storage/storage_graph/{nodes_group}'):
            continue
        splitter = SentenceSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
        split_document = splitter.get_nodes_from_documents(documents=documents)
        split_document_dc[nodes_group] = split_document
        print(f'chunk_size: {chunk_size}; chunk_overlap: {chunk_overlap} len_chunks: {len(split_document)}')
print(split_document_dc.keys())
from threading import Thread

def create_and_store_kg_index(nodes_group, nodes):
    kg_extract_template = """
    下面提供了一些文本。根据文本，提取最多 {max_knowledge_triplets} 个三元组的知识，形式为(实体,关系,实体)，具体可以是(主语,谓语,宾语)或者其他类型，注意避开停用词。
    请忽略page_label和file_path
    ---------------------
    示例：
    文本：小红是小明的母亲.
    三元组：
    (小红,是母亲,小明)
    文本:瑞幸是2017年在厦门创立的咖啡店。
    三元组：
    (瑞幸,是,咖啡店)
    (瑞幸,创立于,厦门)
    (瑞幸,创立于,2017)
    文本:在长期中，物价总水平会调整到使货币需求等于货币供给的水平。
    三元组：
    (物价总水平,长期调整使等于,货币需求等于货币供给的水平)
    ---------------------
    文本：{text}
    三元组："""
    kg_extract_template = PromptTemplate(kg_extract_template)

    print(f'\n\nstart extract {nodes_group} nodes...\n\n')
    space_name = f"books_content_{nodes_group}"
    edge_types, rel_prop_names = ["relationship"], ["relationship"] # default, could be omit if create from an empty kg
    tags = ["entity"] # default, could be omit if create from an empty kg

    graph_store = NebulaGraphStore(
        space_name=space_name,
        edge_types=edge_types,
        rel_prop_names=rel_prop_names,
        tags=tags,
    )
    storage_context = StorageContext.from_defaults(graph_store=graph_store)
    kg_index = KnowledgeGraphIndex(
        nodes=nodes,
        storage_context=storage_context,
        max_triplets_per_chunk=10,
        space_name=space_name,
        edge_types=edge_types,
        rel_prop_names=rel_prop_names,
        tags=tags,
        include_embeddings=True,
        kg_triple_extract_template=kg_extract_template
    )

    # store index
    kg_index.storage_context.persist(persist_dir=f'../storage/storage_graph/{nodes_group}')

for nodes_group, nodes in split_document_dc.items():
    thread = Thread(target=create_and_store_kg_index, args=(nodes_group, nodes))
    thread.start()

Overwriting create_kg_index.py


In [None]:
%run create_kg_index.py

  from .autonotebook import tqdm as notebook_tqdm


chunk_size: 256; chunk_overlap: 64 len_chunks: 3277
chunk_size: 512; chunk_overlap: 64 len_chunks: 1508
chunk_size: 512; chunk_overlap: 128 len_chunks: 1588
chunk_size: 1024; chunk_overlap: 128 len_chunks: 871
chunk_size: 1024; chunk_overlap: 256 len_chunks: 874
dict_keys(['size_256_overlap_64', 'size_512_overlap_64', 'size_512_overlap_128', 'size_1024_overlap_128', 'size_1024_overlap_256'])


start extract size_256_overlap_64 nodes...




start extract size_512_overlap_64 nodes...




start extract size_512_overlap_128 nodes...




start extract size_1024_overlap_128 nodes...




start extract size_1024_overlap_256 nodes...




2024-04-09 16:46:05,895 - httpx - INFO - HTTP Request: POST http://gemini2.sufe.edu.cn:27282/v1/chat/completions "HTTP/1.1 200 OK"
2024-04-09 16:46:08,832 - httpx - INFO - HTTP Request: POST http://gemini2.sufe.edu.cn:27282/v1/chat/completions "HTTP/1.1 200 OK"
2024-04-09 16:46:11,769 - httpx - INFO - HTTP Request: POST http://gemini2.sufe.edu.cn:27282/v1/chat/completions "HTTP/1.1 200 OK"
2024-04-09 16:46:14,714 - httpx - INFO - HTTP Request: POST http://gemini2.sufe.edu.cn:27282/v1/chat/completions "HTTP/1.1 200 OK"
2024-04-09 16:46:17,729 - httpx - INFO - HTTP Request: POST http://gemini2.sufe.edu.cn:27282/v1/chat/completions "HTTP/1.1 200 OK"
2024-04-09 16:46:17,830 - httpx - INFO - HTTP Request: POST http://gemini2.sufe.edu.cn:27282/v1/embeddings "HTTP/1.1 200 OK"
2024-04-09 16:46:17,926 - httpx - INFO - HTTP Request: POST http://gemini2.sufe.edu.cn:27282/v1/embeddings "HTTP/1.1 200 OK"
2024-04-09 16:46:18,019 - httpx - INFO - HTTP Request: POST http://gemini2.sufe.edu.cn:27282/v1

## 3 Create VectorStoreIndex for RAG and Persist(Optional)

**you can skip this step and move to $4 if you have downloaded and unzipped 'index.zip'**   

To compare with/work together with VectorDB based RAG, let's also create a `VectorStoreIndex`.

During the creation, same data source will be split into chunks and embedding of them will be created, during the RAG query time, the top-k related embeddings will be vector-searched with the embedding of the question.

In [None]:
vector_index_ls = []

for nodes_group, nodes in split_document_dc.items():
    print(f'\n\nstart extract {nodes_group} nodes...\n\n')
    vector_index = VectorStoreIndex(nodes=nodes)
    vector_index_ls.append(vector_index)

    # store index
    vector_index.storage_context.persist(persist_dir=f'../storage/storage_vector/{nodes_group}')

## 4. Load from disk Llama Indexes

**You have to run cells in $2 and $3 or download index.zip first**

Both the `KnowledgeGraphIndex` and `VectorStoreIndex` will be created only once, afterwards, we could persist their in-memory context to enable their reuse from disk anytime.

In [None]:
from llama_index.core import load_index_from_storage

assert os.path.exists(os.path.join(os.path.abspath(os.path.join('..')), 'storage/storage_graph')), 'Do not have graph storage_context in disk'
assert os.path.exists(os.path.join(os.path.abspath(os.path.join('..')), 'storage/storage_vector')), 'Do not have vector storage_context in disk'

entries = os.listdir()
folders = [entry for entry in entries if os.path.isdir(os.path.join(entry))]

kg_index_ls = []
vector_index_ls = []
for nodes_group in folders:
    space_name = f"books_content_{nodes_group}"
    edge_types, rel_prop_names = ["relationship"], ["relationship"] # default, could be omit if create from an empty kg
    tags = ["entity"] # default, could be omit if create from an empty kg

    graph_store = NebulaGraphStore(
        space_name=space_name,
        edge_types=edge_types,
        rel_prop_names=rel_prop_names,
        tags=tags,
    )
    storage_context = StorageContext.from_defaults(persist_dir=f'../storage/storage_graph/{nodes_group}', graph_store=graph_store)
    kg_index = load_index_from_storage(
        storage_context=storage_context,
        space_name=space_name,
        edge_types=edge_types,
        rel_prop_names=rel_prop_names,
        tags=tags,
        include_embeddings=True,
    )
    kg_index_ls.append(kg_index)

    storage_context_vector = StorageContext.from_defaults(persist_dir=f'../storage_vector/{nodes_group}')
    vector_index = load_index_from_storage(
    #     service_context=service_context,
        storage_context=storage_context_vector
    )
    vector_index_ls.append(vector_index)

## 5. Prepare for different query approaches

We will do 4 types of query approaches with LLM, KG, VectorDB:

| QueryEngine | Knowledge Graph query engine                                 | Graph RAG query engine                                       | Vector RAG query engine                                      | Graph Vector RAG query engine                                |
| ----------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
| Mechanism   | 1. **Text-to-GraphQuery** based on KG<br />2. Query KG with the result<br />3. Answer synthesis based on query result | 1. Get related entities of the question<br />2. Get n-depth **SubGraphs** of related entities from KG<br />3. Answer synthesis based on related SubGraphs | 1. Create embedding of question<br />2. Semantic search **top-k related doc chunks**<br />3. Answer synthesis based on related doc chunks | 1. Do retrieval as Vector and Graph RAG <br />2. Answer synthesis based on **both related chunks and SubGraphs** |


### 5.1 text-to-NebulaGraphCypher

Text-to-NebulaGraphCypher approach Translate task/question into a Graph Cypher Query, and answer based on its query result.

In [None]:
from llama_index.core.query_engine import KnowledgeGraphQueryEngine

from llama_index.core import load_index_from_storage

assert os.path.exists(os.path.join(os.path.abspath(os.path.join('..')), 'storage/storage_graph')), 'Do not have graph storage_context in disk'
assert os.path.exists(os.path.join(os.path.abspath(os.path.join('..')), 'storage/storage_vector')), 'Do not have vector storage_context in disk'

entries = os.listdir()
folders = [entry for entry in entries if os.path.isdir(os.path.join(entry))]

nl2kg_qg_ls = []
for nodes_group in folders:
    space_name = f"books_content_{nodes_group}"
    edge_types, rel_prop_names = ["relationship"], ["relationship"] # default, could be omit if create from an empty kg
    tags = ["entity"] # default, could be omit if create from an empty kg

    graph_store = NebulaGraphStore(
        space_name=space_name,
        edge_types=edge_types,
        rel_prop_names=rel_prop_names,
        tags=tags,
    )
    storage_context = StorageContext.from_defaults(persist_dir=f'../storage/storage_graph/{nodes_group}', graph_store=graph_store)

    nl2kg_query_engine = KnowledgeGraphQueryEngine(
        storage_context=storage_context,
    #     service_context=service_context,
        verbose=True
    )
    nl2kg_qg_ls.append(nl2kg_query_engine)

In [None]:
nl2kg_query_engine.get_prompts()

### 5.2 Graph RAG query engine

Graph RAG takes SubGraphs related to entities of the task/question as Context.

```
           Graph RAG with Llama Index
                  ┌────┬────┬────┬────┐                  
                  │ 1  │ 2  │ 3  │ 4  │                  
                  ├────┴────┴────┴────┤                  
                  │  Docs/Knowledge   │                  
┌───────┐         │        ...        │       ┌─────────┐
│       │         ├────┬────┬────┬────┤       │         │
│       │         │ 95 │ 96 │    │    │       │         │
│       │         └────┴────┴────┴────┘       │         │
│ User  │─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─▶   LLM   │
│       │                                     │         │
│       │                                     │         │
└───────┘    ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐  └─────────┘
    │          ┌──────────────────────────┐        ▲     
    └────────┼▶│  Tell me about x, please │├───────┘     
               └──────────────────────────┘              
             │ Below are knowledge about x │             
               x->y<-z,x->h->i, m<-n,...                            
             │ Please answer based on them │             
              ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ 
```

In [None]:
kg_rag_qg_ls = []
for kg_index in kg_index_ls:
    kg_rag_query_engine = kg_index.as_query_engine(
        include_text=False,
        retriever_mode="hybrid",
        response_mode="tree_summarize",
    )
    kg_rag_qg_ls.append(kg_rag_query_engine)

### 5.3 Vector RAG query engine

Vector RAG is the common approach to find topK semantic related doc chunks as context to synthesize the answer.

```
                  RAG with Llama Index
                  ┌────┬────┬────┬────┐                  
                  │ 1  │ 2  │ 3  │ 4  │                  
                  ├────┴────┴────┴────┤                  
                  │  Docs/Knowledge   │                  
┌───────┐         │        ...        │       ┌─────────┐
│       │         ├────┬────┬────┬────┤       │         │
│       │         │ 95 │ 96 │    │    │       │         │
│       │         └────┴────┴────┴────┘       │         │
│ User  │─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─▶   LLM   │
│       │                                     │         │
│       │                                     │         │
└───────┘    ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐  └─────────┘
    │          ┌──────────────────────────┐        ▲     
    └────────┼▶│  Tell me ....., please   │├───────┘     
               └──────────────────────────┘              
             │ ┌────┐ ┌────┐               │             
               │ 3  │ │ 96 │                             
             │ └────┘ └────┘               │             
              ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ 
```

In [None]:
vector_rag_qg_ls = []
for vetor_index in vector_index_ls:
    vector_rag_query_engine = vector_index.as_query_engine()
    vector_rag_qg_ls.append(vector_rag_query_engine)

### 5.4 Graph+Vector RAG query engine

This is a combined Graph+Vector Based RAG, where we will retrieve both VectorDB and KG SubGraphs as the context, for synthesis of the answer.

```
           Graph + Vector RAG with Llama Index
                  ┌────┬────┬────┬────┐                  
                  │ 1  │ 2  │ 3  │ 4  │                  
                  ├────┴────┴────┴────┤                  
                  │  Docs/Knowledge   │                  
┌───────┐         │        ...        │       ┌─────────┐
│       │         ├────┬────┬────┬────┤       │         │
│       │         │ 95 │ 96 │    │    │       │         │
│       │         └────┴────┴────┴────┘       │         │
│ User  │─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─▶   LLM   │
│       │                                     │         │
│       │                                     │         │
└───────┘    ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐  └─────────┘
    │          ┌──────────────────────────┐        ▲     
    └────────┼▶│  Tell me ....., please   │├───────┘     
               └──────────────────────────┘              
             │ ┌────┐┌────┐               │             
               │ 3  ││ 96 │ x->y<-z,x->h...                            
             │ └────┘└────┘               │             
              ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ 
```

To implement that in Llama Index, we create a `CustomRetriever` to comebine the two: 

In [None]:
# import QueryBundle
from llama_index.core import QueryBundle

# import NodeWithScore
from llama_index.core.schema import NodeWithScore

# Retrievers
from llama_index.core.retrievers import BaseRetriever, VectorIndexRetriever, KGTableRetriever

from typing import List


class CustomRetriever(BaseRetriever):
    """Custom retriever that performs both Vector search and Knowledge Graph search"""

    def __init__(
        self,
        vector_retriever: VectorIndexRetriever,
        kg_retriever: KGTableRetriever,
        mode: str = "OR",
    ) -> None:
        """Init params."""

        self._vector_retriever = vector_retriever
        self._kg_retriever = kg_retriever
        if mode not in ("AND", "OR"):
            raise ValueError("Invalid mode.")
        self._mode = mode

    def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        """Retrieve nodes given query."""

        vector_nodes = self._vector_retriever.retrieve(query_bundle)
        kg_nodes = self._kg_retriever.retrieve(query_bundle)

        vector_ids = {n.node.node_id for n in vector_nodes}
        kg_ids = {n.node.node_id for n in kg_nodes}

        combined_dict = {n.node.node_id: n for n in vector_nodes}
        combined_dict.update({n.node.node_id: n for n in kg_nodes})

        if self._mode == "AND":
            retrieve_ids = vector_ids.intersection(kg_ids)
        else:
            retrieve_ids = vector_ids.union(kg_ids)

        retrieve_nodes = [combined_dict[rid] for rid in retrieve_ids]
        return retrieve_nodes

Next, we will create instances of the Vector and KG retrievers, which will be used in the instantiation of the Custom Retriever.

In [None]:
from llama_index.core import get_response_synthesizer
from llama_index.core.query_engine import RetrieverQueryEngine

kg_vec_rag_qg_ls = []
for kg_index, vector_index in zip(kg_index_ls, vector_index_ls):
    # create custom retriever
    vector_retriever = VectorIndexRetriever(index=vector_index)
    kg_retriever = KGTableRetriever(
        index=kg_index, retriever_mode="keyword", include_text=False
    )
    custom_retriever = CustomRetriever(vector_retriever, kg_retriever)

    # create response synthesizer
    response_synthesizer = get_response_synthesizer(
    #     service_context=service_context,
        response_mode="tree_summarize",
    )
    kg_vector_rag_query_engine = RetrieverQueryEngine(
    retriever=custom_retriever,
    response_synthesizer=response_synthesizer
    )
    kg_vec_rag_qg_ls.append(kg_vector_rag_query_engine)

### 5.5 General load index from disk and get query engine function

In [None]:
def get_all_query_engine_from_cache_index(kg_index_folder_path, vector_index_folder_path, nodes_group: str|List[str]):
    if isinstance(nodes_group, str):
        nodes_group_ls = [nodes_group]
    else:
        nodes_group_ls = nodes_group
    query_engine_dc = {
        'nl2kg': [],
        'kg_rag': [],
        'vec_rag': [],
        'kg_vec_rag': []
    }
    for nodes_group in nodes_group_ls:
        space_name = f"books_content_{nodes_group}"
        edge_types, rel_prop_names = ["relationship"], ["relationship"] # default, could be omit if create from an empty kg
        tags = ["entity"] # default, could be omit if create from an empty kg

        graph_store = NebulaGraphStore(
            space_name=space_name,
            edge_types=edge_types,
            rel_prop_names=rel_prop_names,
            tags=tags,
        )
        storage_context_kg = StorageContext.from_defaults(persist_dir=kg_index_folder_path + f'/{nodes_group}', graph_store=graph_store)
        kg_index = load_index_from_storage(
            storage_context=storage_context,
            space_name=space_name,
            edge_types=edge_types,
            rel_prop_names=rel_prop_names,
            tags=tags,
            include_embeddings=True,
        )

        storage_context_vector = StorageContext.from_defaults(persist_dir=vector_index_folder_path + f'{nodes_group}')
        vector_index = load_index_from_storage(
            storage_context=storage_context_vector
        )

        # text2cypher query engine
        nl2kg_query_engine = KnowledgeGraphQueryEngine(
        storage_context=storage_context_kg,
        verbose=True
        )

        #


## 6. Base Query with all the Engines

### 6.1 Text-to-GraphQuery

In [None]:
response_nl2kg = nl2kg_query_engine.query("什么是经济学十大原理.")


display(Markdown(f"<b>{response_nl2kg}</b>"))

# Cypher:

print("Cypher Query:")

graph_query = nl2kg_query_engine.generate_query(
    "什么是经济学十大原理",
)
graph_query = graph_query.replace("WHERE", "\n  WHERE").replace("RETURN", "\nRETURN")

display(
    Markdown(
        f"""
```cypher
{graph_query}
```
"""
    )
)

### 6.2 Graph RAG

In [None]:
response_graph_rag = kg_rag_query_engine.query("什么是经济学十大原理")

display(Markdown(f"<b>{response_graph_rag}</b>"))

### 6.3 Vector RAG

In [None]:
response_vector_rag = vector_rag_query_engine.query("什么是经济学十大原理")

display(Markdown(f"<b>{response_vector_rag}</b>"))

In [None]:
display(
    Markdown(
        llm.complete(f"""
Compare the two QA result on "什么是经济学十大原理", list the differences between them, to help evalute them. Output in markdown table.

Result from Graph: {response_graph_rag}
---
Result from Vector: {response_vector_rag}

"""
           ).text
    )
)

### 6.4 Graph + Vector RAG

In [None]:
response_graph_vector_rag = graph_vector_rag_query_engine.query("什么是经济学十大原理")

display(Markdown(f"<b>{response_graph_vector_rag}</b>"))

### 6.5 Overall Comparision

Let's compare the results of them.

First check the information that were coverred by different approaches:

In [None]:
display(
    Markdown(
        llm(f"""
Compare the QA results on "Tell me about Peter Quill.", list the knowledge facts between them, to help evalute them. Output in markdown table.

Result text2GraphQuery: {response_nl2kg}
---
Result Graph: {response_graph_rag}
---
Result Vector: {response_vector_rag}
---
Result Graph+Vector: {response_graph_vector_rag}
---

"""
           )
    )
)

**Conclusion**

- The pure **KG**(both text2GraphQuery and Graph RAG) comes with **concise** results, and much **lower cost**(for cost comparision see our previous result [here](https://gpt-index.readthedocs.io/en/latest/examples/index_structs/knowledge_graph/KnowledgeGraphIndex_vs_VectorStoreIndex_vs_CustomIndex_combined.html#comparison-of-results) )
- The **Graph+Vector** RAG could be more **comprehensive** in case the question envolves knowledge that's fine-grained **spread** across more chunks than top-K searching.


| QueryEngine | Knowledge Graph query engine                                 | Graph RAG query engine                                       | Vector RAG query engine                                      | Graph Vector RAG query engine                                |
| ----------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
| Mechanism   | 1. **Text-to-GraphQuery** based on KG<br />2. Query KG with the result<br />3. Answer synthesis based on query result | 1. Get related entities of the question<br />2. Get n-depth **SubGraphs** of related entities from KG<br />3. Answer synthesis based on related SubGraphs | 1. Create embedding of question<br />2. Semantic search **top-k related doc chunks**<br />3. Answer synthesis based on related doc chunks | 1. Do retrieval as Vector and Graph RAG <br />2. Answer synthesis based on **both related chunks and SubGraphs** |
| Performance | Concise                                                      | Concise                                                      | Fruitful                                                     | Fruitful, could be more comprehensive                        |
| Cost        | Low                                                          | Low                                                          | High                                                         | High                                                         |


**Conclusion**

For those tasks:

- Potentially cares more relationed knowledge
- Schema of the KG is sophisticated to be hard for text2cypher to express the task
- KG quality isn't good enough
- Multiple "starting entities" are involved

Graph RAG could be a better approach to start with.

## 7. Financial Evaluation on four types of engines

### 7.1 FinEval on query engines base on nodes of different chunk sizes and chunk overlaps