### Graph RAG experimentations

In [1]:
import pickle
import nest_asyncio

# Apply asyncio modifications
nest_asyncio.apply()

# Llama Index imports
from llama_index.core import (
    KnowledgeGraphIndex,
    StorageContext
)
from llama_index.core.graph_stores import SimpleGraphStore
from llama_index.core.indices.property_graph import SchemaLLMPathExtractor

# Llama Index embeddings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# Llama Index LLMs
from llama_index.llms.llama_cpp import LlamaCPP

# Graph stores
from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore

from graphs import triplets_extraction, GraphRAGExtractor, parse_fn, GraphRAGStore, graph_rag_pipeline

import sys
sys.path.append('../')
from utils import retriever_evaluation, display_results_retriever


  from .autonotebook import tqdm as notebook_tqdm
2024-09-18 16:14:38.315067: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-18 16:14:38.322861: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-18 16:14:38.331731: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-18 16:14:38.334529: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-09-18 16:14:38.3

Note: to be able to use all crisp methods, you need to install some additional packages:  {'wurlitzer', 'leidenalg', 'bayanpy', 'graph_tool', 'infomap'}
Note: to be able to use all crisp methods, you need to install some additional packages:  {'pyclustering', 'ASLPAw'}
Note: to be able to use all crisp methods, you need to install some additional packages:  {'leidenalg', 'infomap', 'wurlitzer'}


In [2]:
model_url = "https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q4_0.gguf"
llm_llama3 = LlamaCPP(
    model_url=model_url,
    model_path=None,
    temperature=0.1,
    max_new_tokens=512,
    context_window=3900,
    generate_kwargs={},
    model_kwargs={"n_gpu_layers": -1},
    verbose=True,
)

embed_model = HuggingFaceEmbedding(model_name = "dunzhang/stella_en_400M_v5", device = "cuda", trust_remote_code=True, embed_batch_size=20)

nodes = pickle.load(open('nodes_icrc_english.pkl','rb'))
qa_dataset = pickle.load(open("qa_dataset_icrc_500.pkl",'rb'))

llama_model_loader: loaded meta data with 27 key-value pairs and 291 tensors from /tmp/llama_index/models/Meta-Llama-3-8B-Instruct.Q4_0.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Models
llama_model_loader: - kv   3:                         general.size_label str              = 8.0B
llama_model_loader: - kv   4:                            general.license str              = llama3
llama_model_loader: - kv   5:                               general.tags arr[str,6]       = ["facebook", "meta", "pytorch", "llam...
llama_model_loader: - kv   6:                          general.languages arr[str,1]       = ["en"]
llama_model

llm_load_vocab: special tokens cache size = 256
llm_load_vocab: token to piece cache size = 0.8000 MB
llm_load_print_meta: format           = GGUF V3 (latest)
llm_load_print_meta: arch             = llama
llm_load_print_meta: vocab type       = BPE
llm_load_print_meta: n_vocab          = 128256
llm_load_print_meta: n_merges         = 280147
llm_load_print_meta: vocab_only       = 0
llm_load_print_meta: n_ctx_train      = 8192
llm_load_print_meta: n_embd           = 4096
llm_load_print_meta: n_layer          = 32
llm_load_print_meta: n_head           = 32
llm_load_print_meta: n_head_kv        = 8
llm_load_print_meta: n_rot            = 128
llm_load_print_meta: n_swa            = 0
llm_load_print_meta: n_embd_head_k    = 128
llm_load_print_meta: n_embd_head_v    = 128
llm_load_print_meta: n_gqa            = 4
llm_load_print_meta: n_embd_k_gqa     = 1024
llm_load_print_meta: n_embd_v_gqa     = 1024
llm_load_print_meta: f_norm_eps       = 0.0e+00
llm_load_print_meta: f_norm_rms_eps   = 1.0

### Sciphi Triplex

In [3]:
#setup the storage context
graph_store = SimpleGraphStore()
storage_context = StorageContext.from_defaults(graph_store=graph_store)

# Construct the Knowlege Graph Undex
index = KnowledgeGraphIndex(nodes,
                                llm = llm_llama3,
                                max_triplets_per_chunk=5,
                                kg_triplet_extract_fn=triplets_extraction,
                                storage_context=storage_context,
                                embed_model=embed_model,
                                include_embeddings=True,
                                show_progress=True,
                                )
                                

Processing nodes:   0%|          | 0/2 [00:00<?, ?it/s]`flash-attention` package not found, consider installing for better performance: No module named 'flash_attn'.
Current `flash-attention` does not support `window_size`. Either upgrade or use `attn_implementation='eager'`.
Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.70it/s]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token.As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
The `seen_tokens` attribute is deprecated and will be removed in v4.41. Use the `cache_position` model input instead.
You are not running the flash-attention implementation, expect numerical differences.
Generating embeddings: 100%|██████████| 5/5 [00:00<00:00, 158.82it/s]
Processing nodes:  50%|█████  

In [4]:
#evaluation
base_retriever = index.as_retriever(similarity_top_k=3)
base_retriever_evaluator = retriever_evaluation(base_retriever)

base_eval_results = await base_retriever_evaluator.aevaluate_dataset(qa_dataset,workers=10, show_progress=True, kwargs={"device":"cuda", "n_gpu_layers": -1})
display_results_retriever("Base Retriever", base_eval_results)



llama_print_timings:        load time =    2112.14 ms
llama_print_timings:      sample time =      45.85 ms /    87 runs   (    0.53 ms per token,  1897.66 tokens per second)
llama_print_timings: prompt eval time =    2112.04 ms /    91 tokens (   23.21 ms per token,    43.09 tokens per second)
llama_print_timings:        eval time =    8073.26 ms /    86 runs   (   93.88 ms per token,    10.65 tokens per second)
llama_print_timings:       total time =   10284.95 ms /   177 tokens
Llama.generate: prefix-match hit


In [19]:
#save the network

from pyvis.network import Network
from IPython.display import display
g = index.get_networkx_graph()
net = Network(notebook=True,cdn_resources="in_line", directed=True)
net.from_nx(g)
net.show("graph_triplex_plot.html")
net.save_graph("Knowledge_graph.html")

graph_triplex_plot.html


### GraphRag

In [3]:
KG_TRIPLET_EXTRACT_TMPL = """
-Goal-
Given a text document, identify all entities and their entity types from the text and all relationships among the identified entities.
Given the text, extract up to {max_knowledge_triplets} entity-relation triplets.

-Steps-
1. Identify all entities. For each identified entity, extract the following information:
- entity_name: Name of the entity, capitalized
- entity_type: Type of the entity
- entity_description: Comprehensive description of the entity's attributes and activities
Format each entity as ("entity")

2. From the entities identified in step 1, identify all pairs of (source_entity, target_entity) that are *clearly related* to each other.
For each pair of related entities, extract the following information:
- source_entity: name of the source entity, as identified in step 1
- target_entity: name of the target entity, as identified in step 1
- relation: relationship between source_entity and target_entity
- relationship_description: explanation as to why you think the source entity and the target entity are related to each other

Format each relationship as ("relationship"$$$$<source_entity>$$$$<target_entity>$$$$<relation>$$$$<relationship_description>)

3. When finished, output.

-Real Data-
######################
text: {text}
######################
output:"""

In [4]:
kg_extractor = GraphRAGExtractor(
    llm=llm_llama3,
    extract_prompt=KG_TRIPLET_EXTRACT_TMPL,
    max_paths_per_chunk=5,
    parse_fn=parse_fn,
)

In [5]:
from llama_index.core import PropertyGraphIndex

index = PropertyGraphIndex(
    embed_model=embed_model,
    nodes=nodes,
    property_graph_store=GraphRAGStore(),
    kg_extractors=[kg_extractor],
    show_progress=True,
)

Extracting paths from text:   0%|          | 0/2 [00:00<?, ?it/s]
llama_print_timings:        load time =   11677.27 ms
llama_print_timings:      sample time =     185.33 ms /   329 runs   (    0.56 ms per token,  1775.18 tokens per second)
llama_print_timings: prompt eval time =   12676.46 ms /   555 tokens (   22.84 ms per token,    43.78 tokens per second)
llama_print_timings:        eval time =   32945.96 ms /   328 runs   (  100.44 ms per token,     9.96 tokens per second)
llama_print_timings:       total time =   46146.55 ms /   883 tokens
Llama.generate: prefix-match hit

llama_print_timings:        load time =   11677.27 ms
llama_print_timings:      sample time =     298.52 ms /   512 runs   (    0.58 ms per token,  1715.11 tokens per second)
llama_print_timings: prompt eval time =    4411.62 ms /   168 tokens (   26.26 ms per token,    38.08 tokens per second)
llama_print_timings:        eval time =   51129.42 ms /   511 runs   (  100.06 ms per token,     9.99 tokens per secon

In [None]:
index.property_graph_store.build_communities()

In [96]:
#network
from datetime import datetime
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
index.property_graph_store.save_networkx_graph(name=f"./Graphs_openai.html")

In [107]:
#evaluation
base_retriever = index.as_retriever(similarity_top_k=3)
base_retriever_evaluator = retriever_evaluation(base_retriever)

base_eval_results = await base_retriever_evaluator.aevaluate_dataset(qa_dataset,workers=10, show_progress=True, kwargs={"device":"cuda", "n_gpu_layers": -1})
display_results_retriever("Base Retriever", base_eval_results)

100%|██████████| 500/500 [02:52<00:00,  2.90it/s]


Unnamed: 0,Retriever Name,mrr,hit_rate
0,Base Retriever,0.154467,0.23


### SimpleGraph

In [7]:
#setup the storage context
graph_store = SimpleGraphStore()
storage_context = StorageContext.from_defaults(graph_store=graph_store)

index = PropertyGraphIndex(
    llm=llm_llama3,
    embed_model=embed_model,
    nodes=nodes,
    show_progress=True,
)

#Construct the Knowlege Graph Undex
index = KnowledgeGraphIndex(nodes,
                                           storage_context=storage_context,
                                           embed_model=embed_model,
                                          include_embeddings=True,
                                          show_progress=True)

In [None]:
from pyvis.network import Network
from IPython.display import display
g = index.get_networkx_graph()
net = Network(notebook=True,cdn_resources="in_line",directed=True)
net.from_nx(g)
net.show("graph10.html")
net.save_graph("Knowledge_graph.html")
#

In [None]:
base_retriever = index.as_retriever(similarity_top_k=3)
base_retriever_evaluator = retriever_evaluation(base_retriever)
base_eval_results = await base_retriever_evaluator.aevaluate_dataset(qa_dataset)
display_results_retriever("Base Retriever", base_eval_results)

Llama.generate: prefix-match hit


### Neo4j

In [None]:
graph_store = Neo4jPropertyGraphStore(
    username="neo4j",
    password="5hpb3rDtp99mgtFbFu68xJP19xlM3sBL5-pwdFZ5JpE",
    url="neo4j+s://4a10d0f8.databases.neo4j.io",
)#to complete with account

In [13]:
#create graph
index = PropertyGraphIndex(
    nodes,
    embed_model=embed_model,
    kg_extractors=[
        SchemaLLMPathExtractor(
            llm=llm_llama3
        )
    ],
    property_graph_store=graph_store,
    show_progress=True,
)

Extracting paths from text with schema: 100%|██████████| 500/500 [12:59<00:00,  1.56s/it]
Generating embeddings: 100%|██████████| 25/25 [00:11<00:00,  2.09it/s]
Generating embeddings: 100%|██████████| 60/60 [00:14<00:00,  4.09it/s]


In [16]:
#evaluation
base_retriever = index.as_retriever(similarity_top_k=3)
base_retriever_evaluator = retriever_evaluation(base_retriever)

base_eval_results = await base_retriever_evaluator.aevaluate_dataset(qa_dataset,workers=10, show_progress=True, kwargs={"device":"cuda", "n_gpu_layers": -1})
display_results_retriever("Base Retriever", base_eval_results)

100%|██████████| 500/500 [04:27<00:00,  1.87it/s]


Unnamed: 0,Retriever Name,mrr,hit_rate
0,Base Retriever,0.09286,0.136


In [None]:
#modify the kg extraction with predefined relationship

from typing import Literal
from llama_index.core import PropertyGraphIndex
from llama_index.core.indices.property_graph import SchemaLLMPathExtractor

# best practice to use upper-case
entities = Literal[ "LOCATION", "DATE", "ENTITIES", "ORGANIZATION", "PERSON", "EVENT", "SERVICE", "NUMBER"]
relations = Literal["HAS", "PART_OF", "WORKED_ON", "PROVIDED_AID_TO", "LOCATED_IN", "HAPPENED_ON", "SUFFERED", "PARTNERED_WITH", "PROVIDED", "AFFECTED_BY", "ASKED"]

# define which entities can have which relations
validation_schema = {
    "PERSON": ["HAS", "PART_OF", "WORKED_ON", "PARTNERED_WITH", "SUFFERED", "ASKED"],
    "LOCATION": ["HAS", "LOCATED_IN"],
    "ORGANIZATION": ["HAS", "PART_OF", "PARTNERED_WITH", "PROVIDED_AID_TO", "AFFECTED_BY", "PROVIDED"],
    "ENTITIES": ["HAS", "PROVIDED_AID_TO", "AFFECTED_BY", "PROVIDED", "SUFFERED"],
    "EVENT": ["HAS", "HAPPENED_ON", "AFFECTED_BY", "SUFFERED"],
    "SERVICE": ["PROVIDED", "AFFECTED_BY"],
    "DATE": ["HAPPENED_ON"]
}

kg_extractor = SchemaLLMPathExtractor(
    llm=llm_llama3,
    possible_entities=entities,
    possible_relations=relations,
    kg_validation_schema=validation_schema,
    # if false, allows for values outside of the schema
    # useful for using the schema as a suggestion
    strict=True,
)

index = PropertyGraphIndex(
    nodes[:500],
    kg_extractors=[kg_extractor],
    embed_model=embed_model,
    property_graph_store=graph_store,
    show_progress=True,
)

In [30]:
#evaluation
base_retriever = index.as_retriever(similarity_top_k=3)
base_retriever_evaluator = retriever_evaluation(base_retriever)

base_eval_results = await base_retriever_evaluator.aevaluate_dataset(qa_dataset,workers=10, show_progress=True, kwargs={"device":"cuda", "n_gpu_layers": -1})
display_results_retriever("Base Retriever", base_eval_results)

100%|██████████| 500/500 [05:29<00:00,  1.52it/s]


Unnamed: 0,Retriever Name,mrr,hit_rate
0,Base Retriever,0.094471,0.154
