# Knowledge Graph RAG

In this notebook we are going to show you how to build a Knowledge Graph RAG, for this first hands on with knowledge graphs we are going to use the `LocalFactMemory`, but keep in mind that some features are only available with our integrations, in particular the Cypher query engines.


In [1]:
import hybridagi.core.datatypes as dt

input_data = \
[
    {
        "Title": "The Shawshank Redemption",
        "Year Produced": 1994,
        "Actors": ["Tim Robbins", "Morgan Freeman"],
        "Directors": ["Frank Darabont"],
        "Genres": ["Drama", "Crime"],
        "Ratings": 9.3
    },
    {
        "Title": "The Godfather",
        "Year Produced": 1972,
        "Actors": ["Marlon Brando", "Al Pacino", "Diane Keaton"],
        "Directors": ["Francis Ford Coppola"],
        "Genres": ["Crime", "Drama"],
        "Ratings": 9.2
    },
    {
        "Title": "The Godfather: Part II",
        "Year Produced": 1974,
        "Actors": ["Al Pacino", "Robert De Niro", "Diane Keaton"],
        "Directors": ["Francis Ford Coppola"],
        "Genres": ["Crime", "Drama"],
        "Ratings": 9.0
    },
    {
        "Title": "The Dark Knight",
        "Year Produced": 2008,
        "Actors": ["Christian Bale", "Heath Ledger", "Maggie Gyllenhaal"],
        "Directors": ["Christopher Nolan"],
        "Genres": ["Action", "Crime", "Drama"],
        "Ratings": 9.0
    },
    {
        "Title": "Pulp Fiction",
        "Year Produced": 1994,
        "Actors": ["John Travolta", "Uma Thurman", "Samuel L. Jackson"],
        "Directors": ["Quentin Tarantino"],
        "Genres": ["Crime", "Drama"],
        "Ratings": 8.9
    },
    {
        "Title": "Schindler's List",
        "Year Produced": 1993,
        "Actors": ["Liam Neeson", "Ralph Fiennes", "Embeth Davidtz"],
        "Directors": ["Steven Spielberg"],
        "Genres": ["Drama", "History", "War"],
        "Ratings": 8.9
    },
    {
        "Title": "12 Angry Men",
        "Year Produced": 1957,
        "Actors": ["Henry Fonda", "Lee J. Cobb", "Ed Begley"],
        "Directors": ["Sidney Lumet"],
        "Genres": ["Drama"],
        "Ratings": 8.9
    },
    {
        "Title": "The Lord of the Rings: The Return of the King",
        "Year Produced": 2003,
        "Actors": ["Elijah Wood", "Viggo Mortensen", "Cate Blanchett"],
        "Directors": ["Peter Jackson"],
        "Genres": ["Adventure", "Drama", "Fantasy"],
        "Ratings": 8.9
    },
    {
        "Title": "Forrest Gump",
        "Year Produced": 1994,
        "Actors": ["Tom Hanks", "Robin Wright", "Sally Field"],
        "Directors": ["Robert Zemeckis"],
        "Genres": ["Drama", "Romance"],
        "Ratings": 8.8
    },
    {
        "Title": "Inception",
        "Year Produced": 2010,
        "Actors": ["Leonardo DiCaprio", "Joseph Gordon-Levitt", "Ellen Page"],
        "Directors": ["Christopher Nolan"],
        "Genres": ["Action", "Adventure", "Sci-Fi"],
        "Ratings": 8.8
    }
]

input_facts = dt.FactList()

for data in input_data:
    movie = dt.Entity(name=data["Title"], label="Movie")
    year = dt.Entity(name=str(data["Year Produced"]), label="Year")
    input_facts.facts.append(dt.Fact(subj=movie, rel=dt.Relationship(name="Produced in"), obj=year))
    for actor in data["Actors"]:
        actor_entity = dt.Entity(name=actor, label="Actor")
        input_facts.facts.append(dt.Fact(subj=actor_entity, rel=dt.Relationship(name="Played in"), obj=movie))
    for director in data["Directors"]:
        director_entity = dt.Entity(name=actor, label="Director")
        input_facts.facts.append(dt.Fact(subj=movie, rel=dt.Relationship(name="Directed by"), obj=director_entity))
    for genre in data["Genres"]:
        genre_entity = dt.Entity(name=genre, label="Genre")
        input_facts.facts.append(dt.Fact(subj=movie, rel=dt.Relationship(name="Has genre"), obj=genre_entity))
    rating = dt.Entity(name=str(data["Ratings"]), label="Ratings")
    input_facts.facts.append(dt.Fact(subj=movie, rel=dt.Relationship(name="Has ratings of"), obj=rating))

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from hybridagi.memory.integration.local import LocalFactMemory

fact_memory = LocalFactMemory(index_name="movies_data")
fact_memory.update(input_facts)

fact_memory.show()

movies_data_fact_memory.html


#### A simple pipeline: deduplicating entities

You will notice that each graph created by our input data are not connected, this arise when the entities are duplicated. To solve this issue, we are going to apply a very simple pipeline with one step that deduplicate the entities using their names and labels.

In [3]:
from hybridagi.core.pipeline import Pipeline
from hybridagi.embeddings import SentenceTransformerEmbeddings
from hybridagi.modules.deduplicator import EntityDeduplicator
from hybridagi.modules.embedders import EntityEmbedder, FactEmbedder

pipeline = Pipeline()

embeddings = SentenceTransformerEmbeddings(
    model_name_or_path = "all-MiniLM-L6-v2",
    dim = 384, # The dimention of the embeddings vector (also called dense vector)
)

pipeline.add("deduplicate_entities", EntityDeduplicator(method="exact"))
pipeline.add("embed_entities", EntityEmbedder(embeddings=embeddings))
pipeline.add("embed_facts", FactEmbedder(embeddings=embeddings))

output_facts = pipeline(input_facts)

fact_memory.update(output_facts) # Update the fact memory with our cleaned data

fact_memory.show()

movies_data_fact_memory.html


# Making the Knowledge Graph RAG Agent



In [4]:
import hybridagi.core.graph_program as gp

main = gp.GraphProgram(
    name = "main",
    description = "The main program",
)

main.add(gp.Action(
    id = "fact_search",
    purpose = "Find relevant facts",
    tool = "FactSearch",
    prompt = "Please infer the similarity search query (only ONE item) based on the Objective's question",
))

main.add(gp.Action(
    id = "answer",
    purpose = "Answer the Objective's question the context's facts",
    tool = "Speak",
    prompt = """
Please answer the Objective's question using the relevant facts in your context.
If no facts are relevant just say that you don't know.
Don't state the Objective's question and only give the factual answer.
""",
))

main.connect("start", "fact_search")
main.connect("fact_search", "answer")
main.connect("answer", "end")

main.build() # Verify that the graph program is correct

# Let's look at it

print(main) 

main.show()

from hybridagi.memory.integration.local import LocalProgramMemory

program_memory = LocalProgramMemory(index_name="knowledge_rag")

program_memory.update(main)


// @desc: The main program
CREATE
// Nodes declaration
(start:Control {id: "start"}),
(end:Control {id: "end"}),
(fact_search:Action {
  id: "fact_search",
  purpose: "Find relevant facts",
  tool: "FactSearch",
  prompt: "Please infer the similarity search query (only ONE item) based on the Objective's question"
}),
(answer:Action {
  id: "answer",
  purpose: "Answer the Objective's question the context's facts",
  tool: "Speak",
  prompt: "\nPlease answer the Objective's question using the relevant facts in your context.\nIf no facts are relevant just say that you don't know.\nDon't state the Objective's question and only give the factual answer.\n"
}),
// Structure declaration
(start)-[:NEXT]->(fact_search),
(fact_search)-[:NEXT]->(answer),
(answer)-[:NEXT]->(end)
main.html


In [5]:
import dspy
from hybridagi.core.datatypes import AgentState, Query
from hybridagi.modules.agents import GraphInterpreterAgent
from hybridagi.modules.agents.tools import SpeakTool, FactSearchTool
from hybridagi.modules.retrievers.integration.local import FAISSFactRetriever

agent_state = AgentState()

tools = [
    SpeakTool(
        agent_state = agent_state,
    ),
    FactSearchTool(
        retriever = FAISSFactRetriever(
            fact_memory = fact_memory,
            embeddings = embeddings,
            distance = "cosine",
            max_distance = 1.0,
            k = 5,
            reranker = None,
        )
    )
]

agent = GraphInterpreterAgent(
    agent_state = agent_state,
    program_memory = program_memory,
    tools = tools,
)

# We can now setup the LLM using Ollama client from DSPy

lm = dspy.OllamaLocal(model='mistral', max_tokens=1024, stop=["\n\n\n"])
dspy.configure(lm=lm)

result = agent(Query(query="In which movie Elijah Wood played?"))

print(result.final_answer)

[35m--- Step 0 ---
Call Program: main
Program Purpose: In which movie Elijah Wood played?[0m
[36m--- Step 1 ---
Action Purpose: Find relevant facts
Action: {
  "query": "Elijah Wood movies",
  "facts": [
    {
      "fact": "(:Actor {name:\"Elijah Wood\"})-[:Played in]->(:Movie {name:\"The Lord of the Rings: The Return of the King\"})"
    },
    {
      "fact": "(:Movie {name:\"Forrest Gump\"})-[:Has genre]->(:Genre {name:\"Drama\"})"
    },
    {
      "fact": "(:Movie {name:\"Forrest Gump\"})-[:Directed by]->(:Director {name:\"Sally Field\"})"
    },
    {
      "fact": "(:Movie {name:\"Forrest Gump\"})-[:Produced in]->(:Year {name:\"1994\"})"
    },
    {
      "fact": "(:Actor {name:\"Tom Hanks\"})-[:Played in]->(:Movie {name:\"Forrest Gump\"})"
    }
  ]
}[0m
[36m--- Step 2 ---
Action Purpose: Answer the Objective's question the context's facts
Action: {
  "message": "Elijah Wood played in The Lord of the Rings: The Return of the King."
}[0m
[35m--- Step 3 ---
End Program:

In [6]:
result = agent(Query(query="What kind of Aventure movie can you recommand me?"))

print(result.final_answer)

[35m--- Step 0 ---
Call Program: main
Program Purpose: What kind of Aventure movie can you recommand me?[0m
[36m--- Step 1 ---
Action Purpose: Find relevant facts
Action: {
  "query": "Adventure movies",
  "facts": [
    {
      "fact": "(:Movie {name:\"The Lord of the Rings: The Return of the King\"})-[:Has genre]->(:Genre {name:\"Adventure\"})"
    },
    {
      "fact": "(:Movie {name:\"Inception\"})-[:Has genre]->(:Genre {name:\"Adventure\"})"
    },
    {
      "fact": "(:Movie {name:\"The Lord of the Rings: The Return of the King\"})-[:Has genre]->(:Genre {name:\"Drama\"})"
    },
    {
      "fact": "(:Movie {name:\"The Lord of the Rings: The Return of the King\"})-[:Has genre]->(:Genre {name:\"Fantasy\"})"
    },
    {
      "fact": "(:Movie {name:\"Forrest Gump\"})-[:Has genre]->(:Genre {name:\"Drama\"})"
    }
  ]
}[0m
[36m--- Step 2 ---
Action Purpose: Answer the Objective's question the context's facts
Action: {
  "message": "Based on the adventure movies found in my c