## Garph RAG

### Load Embedding model

In [1]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

embed_model = HuggingFaceEmbedding(model_name="nomic-ai/nomic-embed-text-v1.5",
                                    trust_remote_code=True,
                                    cache_folder='./hf_cache')

  from .autonotebook import tqdm as notebook_tqdm
!!!!!!!!!!!!megablocks not available, using torch.matmul instead
<All keys matched successfully>


### Load LLM model

In [2]:
from llama_index.llms.ollama import Ollama

llm = Ollama(model="llama3.2:1b", request_timeout=60000)

### Read the documents

In [3]:
from llama_index.core import SimpleDirectoryReader

loader = SimpleDirectoryReader("./data/")

documents = loader.load_data()

In [4]:
print(documents[0].text[:100])

What I Worked On

February 2021

Before college the two main things I worked on, outside of school, 


### Create a graph database

In [5]:
from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore

graph_store = Neo4jPropertyGraphStore(
    username="neo4j",
    password="",
    url="neo4j://localhost:7687",
    timeout=60,
    max_connection_pool_size = 50,
    max_connection_lifetime=3600
)

### Build a graph index

In [6]:
from llama_index.core import PropertyGraphIndex
from llama_index.core.indices.property_graph import SimpleLLMPathExtractor

In [7]:
import nest_asyncio
nest_asyncio.apply()

In [8]:
data_extractor = SimpleLLMPathExtractor(llm=llm)

index = PropertyGraphIndex.from_documents(documents,
                                          embed_model=embed_model,
                                          kg_extractors=[data_extractor],
                                          property_graph_store=graph_store,
                                          show_progress=True,
                                          )

Parsing nodes: 100%|██████████| 2/2 [00:00<00:00, 151.12it/s]
Extracting paths from text: 100%|██████████| 4/4 [02:13<00:00, 33.38s/it]
Generating embeddings: 100%|██████████| 1/1 [00:06<00:00,  6.36s/it]
Generating embeddings: 100%|██████████| 3/3 [00:05<00:00,  1.85s/it]


### Set up global configuration

In [9]:
from llama_index.core import Settings

Settings.llm = llm
Settings.embed_model = embed_model

### Set up retrieval

In [10]:
from llama_index.core.indices.property_graph import LLMSynonymRetriever, VectorContextRetriever

In [11]:
synonym_retriever = LLMSynonymRetriever(index.property_graph_store,
                                  llm=llm,
                                  include_text=False,
                                  )

In [12]:
vector_retriever = VectorContextRetriever(index.property_graph_store,
                                          embed_model=embed_model,
                                          include_text=False,
                                        )

In [13]:
retriever = index.as_retriever(sub_retrievers=[synonym_retriever,
                                               vector_retriever],
                               )

In [23]:
context = retriever.retrieve("Who is paul graham")

In [24]:
for triplet in context:
    print(triplet.text)


Subject -> Predicate -> Object
Event: publication of  on lisp -> Book -> 1993
Event: publication of  on lisp -> Book -> 1993 - repeated
Perspective: theory -> System -> Computer science
Perspective: theory -> Admiration -> Building things


### Define query engine

In [25]:
query_engine = index.as_query_engine(include_text=True)

response = query_engine.query("Who is paul graham")

In [26]:
from IPython.display import Markdown, display

display(Markdown(f"{response.response}"))

Based on the provided text, it appears that the subject of the book "On Lisp" is Paul Graham.

In [27]:
len(response.source_nodes)

1