## Setup question answering agent over RDF graph database using Langchain

In [1]:
from langchain_community.graphs.rdf_graph import RdfGraph
from langchain_community.chains.graph_qa.sparql import GraphSparqlQAChain
from langchain_google_genai import ChatGoogleGenerativeAI

In [2]:
from dotenv import load_dotenv
load_dotenv()

True

### Load RDF data into an RdfGraph

In [3]:
GRAPH_PATH = "/home/richhiey/Desktop/code/genai/genai_db_poc/opt_23.ttl"

In [4]:
rdf_graph = RdfGraph(source_file=GRAPH_PATH, serialization="ttl")

In [5]:
print(rdf_graph.load_schema())
print(rdf_graph.query("""PREFIX duke: <http://example.org/duke#>

SELECT ?transformer ?unit
WHERE {
  ?transformer duke:has_transformer_unit ?unit .
}"""))

print(rdf_graph.query("""
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX duke: <http://example.org/duke#>

SELECT ?transformer ?transformer_unit
WHERE {
  ?transformer duke:has_transformer_unit ?transformer_unit .
}
"""))

None
[(rdflib.term.URIRef('http://duke.com/model/123456789/transformer/123456789_DUMMY/transformer_unit_list'), rdflib.term.URIRef('http://duke.com/transformer_unit/123456789_DUMMY_A')), (rdflib.term.URIRef('http://duke.com/model/123456789/transformer/123456789_DUMMY/transformer_unit_list'), rdflib.term.URIRef('http://duke.com/transformer_unit/123456789_DUMMY_B')), (rdflib.term.URIRef('http://duke.com/model/123456789/transformer/123456789_DUMMY/transformer_unit_list'), rdflib.term.URIRef('http://duke.com/transformer_unit/123456789_DUMMY_C')), (rdflib.term.URIRef('http://duke.com/model/414970912/transformer/414970912_BK1/transformer_unit_list'), rdflib.term.URIRef('http://duke.com/transformer_unit/414970912_BK1_A')), (rdflib.term.URIRef('http://duke.com/model/414970912/transformer/414970912_BK1/transformer_unit_list'), rdflib.term.URIRef('http://duke.com/transformer_unit/414970912_BK1_B')), (rdflib.term.URIRef('http://duke.com/model/414970912/transformer/414970912_BK1/transformer_unit_l

### Run Gemini model over graph data to answer simple questions

In [6]:
model = ChatGoogleGenerativeAI(model="gemini-1.5-pro")

Langchain provides a useful utility called GraphSparqlQAChain that sets up a sequence of steps to answer questions on graph data.
https://python.langchain.com/v0.2/api_reference/_modules/langchain_community/chains/graph_qa/sparql.html#GraphSparqlQAChain

The source for this class shows the various prompts used to setup an agent to generate valid SparQL and execute them against a graph database.

Further, to implement RAG with graphs in Langchain, utilities such as CassandraGraphVectorStore are provided that can store documents with links between them.
Use the appropriate GraphStore to store relationships and retrieve them as context when answering questions.
https://python.langchain.com/api_reference/community/graph_vectorstores/langchain_community.graph_vectorstores.cassandra.CassandraGraphVectorStore.html

In [8]:
chain = GraphSparqlQAChain.from_llm(
    graph=rdf_graph, llm=model, verbose=True, allow_dangerous_requests=True, return_sparql_query=True
)
response = chain.invoke({"query": "List all transformers and their respective units. Do not enclose the generated SparQL query in markdown text. Only return the raw SparQL query for execution."})
response



[1m> Entering new GraphSparqlQAChain chain...[0m
Identified intent:
[32;1m[1;3mSELECT[0m
Generated SPARQL:
[32;1m[1;3mPREFIX duke: <http://example.org/duke#>
SELECT ?transformer ?unit
WHERE {
  ?transformer duke:has_transformer_unit ?unit .
}[0m
Full Context:
[32;1m[1;3m[(rdflib.term.URIRef('http://duke.com/model/123456789/transformer/123456789_DUMMY/transformer_unit_list'), rdflib.term.URIRef('http://duke.com/transformer_unit/123456789_DUMMY_A')), (rdflib.term.URIRef('http://duke.com/model/123456789/transformer/123456789_DUMMY/transformer_unit_list'), rdflib.term.URIRef('http://duke.com/transformer_unit/123456789_DUMMY_B')), (rdflib.term.URIRef('http://duke.com/model/123456789/transformer/123456789_DUMMY/transformer_unit_list'), rdflib.term.URIRef('http://duke.com/transformer_unit/123456789_DUMMY_C')), (rdflib.term.URIRef('http://duke.com/model/414970912/transformer/414970912_BK1/transformer_unit_list'), rdflib.term.URIRef('http://duke.com/transformer_unit/414970912_BK1_A'

{'query': 'List all transformers and their respective units. Do not enclose the generated SparQL query in markdown text. Only return the raw SparQL query for execution.',
 'result': 'The transformers and their units are as follows:\n\n* **Transformer 123456789_DUMMY:**\n    * 123456789_DUMMY_A\n    * 123456789_DUMMY_B\n    * 123456789_DUMMY_C\n* **Transformer 414970912_BK1:**\n    * 414970912_BK1_A\n    * 414970912_BK1_B\n    * 414970912_BK1_C',
 'sparql_query': 'PREFIX duke: <http://example.org/duke#>\nSELECT ?transformer ?unit\nWHERE {\n  ?transformer duke:has_transformer_unit ?unit .\n}'}