### Setup (Installs, Data, Models)

In [None]:
%pip install llama-index
%pip install llama-index-core==0.10.42
%pip install llama-index-embeddings-openai
%pip install llama-index-postprocessor-flag-embedding-reranker
%pip install git+https://github.com/FlagOpen/FlagEmbedding.git
%pip install llama-index-graph-stores-neo4j
%pip install llama-cloud-services

In [None]:
import nest_asyncio

nest_asyncio.apply()

import os

# API access to llama-cloud
os.environ["LLAMA_CLOUD_API_KEY"] = "llx-WROpF69GBXDRmw9jP8oiN1lwU6iDTbJs0kRY0nj3ReVfXtuY"
os.environ["OPENAI_API_KEY"] = "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"


_______________________________
### Setup Model
Here we use gpt-4o and default OpenAI embeddings.
_______________________________

In [None]:
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings

llm = OpenAI(model="gpt-4o")
embed_model = OpenAIEmbedding(model="text-embedding-3-small")

Settings.llm = llm
Settings.embed_model = embed_model

# Best chunk-configuration (https://www.llamaindex.ai/blog/evaluating-the-ideal-chunk-size-for-a-rag-system-using-llamaindex-6207e5d3fec5)
Settings.chunk_size = 1024
Settings.chunk_overlap = 200
print(Settings.context_window)

_______________________________
### Load and parse Data with agent
_______________________________

In [None]:
# Better parsing
from llama_cloud_services import LlamaParse

docs = LlamaParse(
    parse_mode="parse_page_with_agent",
    # model="openai-gpt-4-1-mini",
    model="anthropic-sonnet-4.0",
    high_res_ocr=True,
    adaptive_long_table=True,
    outlined_table_extraction=True,
    output_tables_as_HTML=True,
).load_data("../data/lager.pdf")

_______________________________
### Split docs into pages
_______________________________

In [None]:
from copy import deepcopy
from llama_index.core.schema import TextNode, Document
from llama_index.core import VectorStoreIndex


def get_sub_docs(docs):
    """Split docs into pages, by separator."""
    sub_docs = []
    for doc in docs:
        doc_chunks = doc.text.split("\n---\n")
        for doc_chunk in doc_chunks:
            sub_doc = Document(
                text=doc_chunk,
                metadata=deepcopy(doc.metadata),
            )
            sub_docs.append(sub_doc)

    return sub_docs

# this will split into pages
sub_docs = get_sub_docs(docs)

_______________________________
### Initialize Graph Store
Here we use Neo4j but you can also use our other integrations like Nebula
_______________________________

To launch Neo4j locally, first ensure you have docker installed. Then, you can launch the database with the following docker command

```bash
docker run \
    -p 7474:7474 -p 7687:7687 \
    -v $PWD/data:/data -v $PWD/plugins:/plugins \
    --name neo4j-apoc \
    -e NEO4J_apoc_export_file_enabled=true \
    -e NEO4J_apoc_import_file_enabled=true \
    -e NEO4J_apoc_import_file_use__neo4j__config=true \
    -e NEO4JLABS_PLUGINS=\[\"apoc\"\] \
    neo4j:latest
```

From here, you can open the db at [http://localhost:7474/](http://localhost:7474/). On this page, you will be asked to sign in. Use the default username/password of `neo4j` and `neo4j`.

Once you login for the first time, you will be asked to change the password.

After this, you are ready to create your first property graph!

In [None]:
from llama_index.graph_stores.neo4j import Neo4jPGStore

graph_store = Neo4jPGStore(
    username="neo4j",
    password="graph1312",
    url="bolt://localhost:7687",
)
vec_store = None

_______________________________
### Construct Knowledge Graph, Get Retrievers
This section shows you how to construct the knowledge graph over the existing documents.
_______________________________

#### Document indexing + Extraction (entities/relations)
**Note**: we have the default extractors (implicit path, simple llm path) configured. You can also choose to use a pre-defined schema as mentioned in this [notebook](https://github.com/run-llama/llama_index/blob/main/docs/docs/examples/property_graph/property_graph_advanced.ipynb).

In [None]:
from llama_index.core.indices.property_graph import (
    ImplicitPathExtractor,
    SimpleLLMPathExtractor,
)
from llama_index.core import PropertyGraphIndex
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

index = PropertyGraphIndex.from_documents(
    sub_docs,
    embed_model=OpenAIEmbedding(model_name="text-embedding-3-small"),
    kg_extractors=[
        ImplicitPathExtractor(),
        SimpleLLMPathExtractor(
            llm=OpenAI(model="gpt-3.5-turbo", temperature=0.3),
            num_workers=4,
            max_paths_per_chunk=10,
        ),
    ],
    property_graph_store=graph_store,
    show_progress=True,
)

#### Neo4j
To see the entire graph -> run cypher command ```match n=() return n``` <br>
To delete the entire graph -> run cypher command ```match n=() detach delete n```

_______________________________
### Retrieval
_______________________________

#### Run baseline vector search

We also build a "baseline" vector index. This follows the "naive" RAG pipeline approach of chunking and vector embedding. We use this as a comparison point.

In [None]:
from llama_index.core import VectorStoreIndex
from llama_index.core.query_engine import RetrieverQueryEngine

base_index = VectorStoreIndex.from_documents(sub_docs, embed_model=embed_model)
vector_retriever = base_index.as_retriever(similarity_top_k=10)
base_query_engine = RetrieverQueryEngine(vector_retriever)

In [None]:
response = base_query_engine.query(
    "Worum geht es in dem Dokument? Antworte in 2-3 Sätzen."
)
print(str(response))

In [None]:
response = base_query_engine.query(
    "Wie ist die Seitenwirkung von dem Axialrillenkugellager?"
)
print(str(response))

In [None]:
response = base_query_engine.query(
    "Was bedeutet 'Das Axialrillenkugellager ist einseitig wirkend'? Antworte knapp basierend auf technische Quellen."
)
print(str(response))

In [None]:
response = base_query_engine.query(
    "Welches DIN-Bezeichnungen (und Lagerreihen falls vorhanden) sind im Dokument zu finden? Kurz auflisten bitte."
)
print(str(response))

In [None]:
response = base_query_engine.query(
    "Was bedeutet 'Lagerreihe 62'? Kurz."
)
print(str(response))

### Test results 

✅ Succeeded all tests

In [None]:
# positive test case
response = base_query_engine.query(
    "Gib mir die ganze Reihe für den Zylinderrollenlager DIN5412 mit d=55"
)
print(str(response))

### Test results 

✅ Succeeded positive test case

In [None]:
# negative test case
response = base_query_engine.query(
    "Gib mir die ganze Reihe für den Zylinderrollenlager DIN5412 mit d=15"
    "Gib mir die ganze Reihe für den Zylinderrollenlager DIN5412 mit d=20"
    "Gib mir die ganze Reihe für den Zylinderrollenlager DIN5412 mit d=85"
)
print(str(response))

### Test results 

✅ Succeeded test case '...mit d=15' <br>
✅ Succeeded test case '...mit d=20' <br>
❌ Failed test case '...mit d=85'
-> Pipeline confused small 'd' with 'D'

In [None]:
print(len(response.source_nodes))
for node in response.source_nodes:
    print("---")
    print(node.get_content())

#### Run KG search

In [None]:
from llama_index.core.indices.property_graph import VectorContextRetriever

kg_retriever = VectorContextRetriever(
    index.property_graph_store,
    embed_model=OpenAIEmbedding(model_name="text-embedding-3-small"),
    similarity_top_k=7,
    path_depth=1,
    # include_text=False,
    include_text=True,
)

nodes = kg_retriever.retrieve(
    "Gib mir die ganze Reihe für den Zylinderrollenlager DIN5412 mit d=55"
)
# nodes = kg_retriever.retrieve('san francisco')
print(len(nodes))
for idx, node in enumerate(nodes):
    print(f">> IDX: {idx}, {node.get_content()}")

### Run both vector and KG search

In [None]:
from llama_index.core.retrievers import BaseRetriever
from llama_index.core.schema import NodeWithScore
from typing import List


class CustomRetriever(BaseRetriever):
    """Custom retriever that performs both KG vector search and direct vector search."""

    def __init__(self, kg_retriever, vector_retriever):
        self._kg_retriever = kg_retriever
        self._vector_retriever = vector_retriever

    def _retrieve(self, query_bundle) -> List[NodeWithScore]:
        """Retrieve nodes given query."""
        kg_nodes = self._kg_retriever.retrieve(query_bundle)
        vector_nodes = self._vector_retriever.retrieve(query_bundle)

        unique_nodes = {n.node_id: n for n in kg_nodes}
        unique_nodes.update({n.node_id: n for n in vector_nodes})
        return list(unique_nodes.values())

In [None]:
custom_retriever = CustomRetriever(kg_retriever, vector_retriever)

In [None]:
nodes = custom_retriever.retrieve(
    "Gib mir die ganze Reihe für den Zylinderrollenlager DIN5412 mit d=55"
)
print(len(nodes))
for idx, node in enumerate(nodes):
    print(f">> IDX: {idx}, {node.get_content()}")

## Build Agent

Now that we have the retriever, we can treat it as a RAG pipeline tool, and wrap it with an agent that can perform basic CoT reasoning and maintain conversation memory over time.

In [None]:
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.query_engine import RetrieverQueryEngine

kg_query_engine = RetrieverQueryEngine(custom_retriever)
kg_query_tool = QueryEngineTool(
    query_engine=kg_query_engine,
    metadata=ToolMetadata(
        name="query_tool",
        description="Provides information about row table lookups",
    ),
)

In [None]:
# from llama_index.core.agent import FunctionCallingAgentWorker
from llama_index.core.agent.workflow import ReActAgent
from llama_index.core.workflow import Context

agent = ReActAgent(
    tools=[kg_query_tool],
    llm=llm,
    verbose=True,
    allow_parallel_tool_calls=False,
)

# context to hold this session/state
ctx = Context(agent)

## Try out Queries

Now that the agent is setup, let's try out some queries.

In [None]:
handler = agent.run(
    "Welche Lagerrtypen haben ein Lager mit d=20?",
    ctx=ctx)

In [None]:
from llama_index.core.agent.workflow import ToolCallResult, AgentStream

async for ev in handler.stream_events():
    # if isinstance(ev, ToolCallResult):
    #     print(f"\nCall {ev.tool_name} with {ev.tool_kwargs}\nReturned: {ev.tool_output}")
    if isinstance(ev, AgentStream):
        print(f"{ev.delta}", end="", flush=True)

response = await handler

In [None]:
handler = agent.run(
    "Welche Lagerrtypen haben ein Lager mit d=15?",
    ctx=ctx)
from llama_index.core.agent.workflow import ToolCallResult, AgentStream

async for ev in handler.stream_events():
    # if isinstance(ev, ToolCallResult):
    #     print(f"\nCall {ev.tool_name} with {ev.tool_kwargs}\nReturned: {ev.tool_output}")
    if isinstance(ev, AgentStream):
        print(f"{ev.delta}", end="", flush=True)

response = await handler

### Test results 

✅ Succeeded postive test case '...mit d=20' <br>
✅ Succeeded negative test case '...mit d=15' <br>

In [None]:
# positive test case
handler = agent.run(
    "Listen Sie mir bitte die komplette Reihe für den Axialrillenkugellager DIN 711 wo d_w=30",
    ctx=ctx)
from llama_index.core.agent.workflow import ToolCallResult, AgentStream

async for ev in handler.stream_events():
    # if isinstance(ev, ToolCallResult):
    #     print(f"\nCall {ev.tool_name} with {ev.tool_kwargs}\nReturned: {ev.tool_output}")
    if isinstance(ev, AgentStream):
        print(f"{ev.delta}", end="", flush=True)

response = await handler

### Test results 

✅ Succeeded test partially: <br>
    - detected d_w correctly <br>
    - forgot to mention d_g for that row (Parsing could be struggling with subscripts)

In [None]:
# positive test case
handler = agent.run(
    "Listen Sie mir bitte die komplette(n) Reihe(n) für den Kegelrollenlager DIN 720 wo ri_min=1.5",
    ctx=ctx)
from llama_index.core.agent.workflow import ToolCallResult, AgentStream

async for ev in handler.stream_events():
    # if isinstance(ev, ToolCallResult):
    #     print(f"\nCall {ev.tool_name} with {ev.tool_kwargs}\nReturned: {ev.tool_output}")
    if isinstance(ev, AgentStream):
        print(f"{ev.delta}", end="", flush=True)

response = await handler

### Test results 

❌ Failed: Should have generated rows from 30207, 30208, 30209 abd 30210 only. <br>
Cause: Parsing could be struggling with subscripts

In [None]:
# negative test case
handler = agent.run(
    "Gib mir die ganze Reihe für den Zylinderrollenlager DIN5412 mit d=15"
    "Gib mir die ganze Reihe für den Zylinderrollenlager DIN5412 mit d=20"
    "Gib mir die ganze Reihe für den Zylinderrollenlager DIN5412 mit d=85",
    ctx=ctx)

from llama_index.core.agent.workflow import ToolCallResult, AgentStream

async for ev in handler.stream_events():
    # if isinstance(ev, ToolCallResult):
    #     print(f"\nCall {ev.tool_name} with {ev.tool_kwargs}\nReturned: {ev.tool_output}")
    if isinstance(ev, AgentStream):
        print(f"{ev.delta}", end="", flush=True)

response = await handler

In [None]:
# memory test 1
handler = agent.run(
    "I deleted our conversation by mistake, what was my last question about? answer briefly",
    ctx=ctx)
from llama_index.core.agent.workflow import ToolCallResult, AgentStream

async for ev in handler.stream_events():
    # if isinstance(ev, ToolCallResult):
    #     print(f"\nCall {ev.tool_name} with {ev.tool_kwargs}\nReturned: {ev.tool_output}")
    if isinstance(ev, AgentStream):
        print(f"{ev.delta}", end="", flush=True)

response = await handler

In [None]:
# memory test 2
handler = agent.run(
    "And what was your answer to my last question? answer briefly",
    ctx=ctx)
from llama_index.core.agent.workflow import ToolCallResult, AgentStream

async for ev in handler.stream_events():
    # if isinstance(ev, ToolCallResult):
    #     print(f"\nCall {ev.tool_name} with {ev.tool_kwargs}\nReturned: {ev.tool_output}")
    if isinstance(ev, AgentStream):
        print(f"{ev.delta}", end="", flush=True)

response = await handler

In [None]:
# image test 2
handler = agent.run(
    "Can you briefly describe the figure of the 'Kegelrollenlager DIN 720' in 2-3 sentences in German?",
    ctx=ctx)
from llama_index.core.agent.workflow import ToolCallResult, AgentStream

async for ev in handler.stream_events():
    # if isinstance(ev, ToolCallResult):
    #     print(f"\nCall {ev.tool_name} with {ev.tool_kwargs}\nReturned: {ev.tool_output}")
    if isinstance(ev, AgentStream):
        print(f"{ev.delta}", end="", flush=True)

response = await handler

In [None]:
# positive test case
handler = agent.run(
    "Listen Sie mir bitte die komplette(n) Reihe(n) für den Kegelrollenlager DIN 720 wo ri_min=1.5",
    ctx=ctx)
from llama_index.core.agent.workflow import ToolCallResult, AgentStream

async for ev in handler.stream_events():
    # if isinstance(ev, ToolCallResult):
    #     print(f"\nCall {ev.tool_name} with {ev.tool_kwargs}\nReturned: {ev.tool_output}")
    if isinstance(ev, AgentStream):
        print(f"{ev.delta}", end="", flush=True)

response = await handler

In [None]:
# negative test case
handler = agent.run(
    "Gib mir die ganze Reihe für den Zylinderrollenlager DIN5412 mit d=15"
    "Gib mir die ganze Reihe für den Zylinderrollenlager DIN5412 mit d=20"
    "Gib mir die ganze Reihe für den Zylinderrollenlager DIN5412 mit d=85",
    ctx=ctx)
from llama_index.core.agent.workflow import ToolCallResult, AgentStream

async for ev in handler.stream_events():
    # if isinstance(ev, ToolCallResult):
    #     print(f"\nCall {ev.tool_name} with {ev.tool_kwargs}\nReturned: {ev.tool_output}")
    if isinstance(ev, AgentStream):
        print(f"{ev.delta}", end="", flush=True)

response = await handler

#### slightly better user prompt (not system prompt)

In [None]:
handler = agent.run(
    "Gib mir die ganze Reihe für den Zylinderrollenlager DIN5412 und Lagerreihe NU2 mit d=85. Achtung; Tabellenkopf 'd' and 'D' sind nicht gleich.",
    ctx=ctx)
from llama_index.core.agent.workflow import ToolCallResult, AgentStream

async for ev in handler.stream_events():
    # if isinstance(ev, ToolCallResult):
    #     print(f"\nCall {ev.tool_name} with {ev.tool_kwargs}\nReturned: {ev.tool_output}")
    if isinstance(ev, AgentStream):
        print(f"{ev.delta}", end="", flush=True)

response = await handler