# Build a Question Answering application over a graph database

In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
NEO4J_URI = os.getenv("NEO4J_URI")  # Keep the original neo4j+s:// format
NEO4J_USERNAME = os.getenv("NEO4J_USERNAME")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")

print(f"Attempting connection to: {NEO4J_URI}")
print(f"Username: {NEO4J_USERNAME}")

Attempting connection to: neo4j+ssc://b74b1fa9.databases.neo4j.io
Username: neo4j


In [3]:
from langchain_community.graphs import Neo4jGraph

graph= Neo4jGraph(url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD)
graph

  graph= Neo4jGraph(url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD)


<langchain_community.graphs.neo4j_graph.Neo4jGraph at 0x29e31c33090>

In [34]:
## Dataset Movie
movie_query = """
LOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/tomasonjo/blog-datasets/main/movies/movies_small.csv' AS row

MERGE(m:Movie{id:row.movieId})
SET m.released = date(row.released),
    m.title = row.title,
    m.imdbRating = toFloat(row.imdbRating)
FOREACH (director in split(row.director, '|') |
    MERGE (d:Person {name: trim(director)})
    MERGE (d)-[:DIRECTED]->(m)
)
FOREACH (actor in split(row.actors, '|') |
    MERGE (a:Person {name: trim(actor)})
    MERGE (a)-[:ACTED_IN]->(m)
)
FOREACH (genre in split(row.genres, '|') |
    MERGE (g:Genre {name: trim(genre)}) 
    MERGE (m)-[:IN_GENRE]->(g)
)
"""

In [35]:
movie_query

"\nLOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/tomasonjo/blog-datasets/main/movies/movies_small.csv' AS row\n\nMERGE(m:Movie{id:row.movieId})\nSET m.released = date(row.released),\n    m.title = row.title,\n    m.imdbRating = toFloat(row.imdbRating)\nFOREACH (director in split(row.director, '|') |\n    MERGE (d:Person {name: trim(director)})\n    MERGE (d)-[:DIRECTED]->(m)\n)\nFOREACH (actor in split(row.actors, '|') |\n    MERGE (a:Person {name: trim(actor)})\n    MERGE (a)-[:ACTED_IN]->(m)\n)\nFOREACH (genre in split(row.genres, '|') |\n    MERGE (g:Genre {name: trim(genre)}) \n    MERGE (m)-[:IN_GENRE]->(g)\n)\n"

In [36]:
graph.query(movie_query)

[]

In [37]:
graph.refresh_schema()
print(graph.schema)

Node properties:
Movie {id: STRING, released: DATE, title: STRING, imdbRating: FLOAT}
Person {name: STRING}
Genre {name: STRING}
Relationship properties:

The relationships:
(:Movie)-[:IN_GENRE]->(:Genre)
(:Person)-[:DIRECTED]->(:Movie)
(:Person)-[:ACTED_IN]->(:Movie)


In [38]:
from langchain.chat_models import init_chat_model

chat_model = init_chat_model("groq:openai/gpt-oss-20b")
chat_model

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x0000029E0995B150>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x0000029E61CDF890>, model_name='openai/gpt-oss-20b', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [39]:
from langchain_community.chains.graph_qa.cypher import GraphCypherQAChain

chain = GraphCypherQAChain.from_llm(graph=graph, llm=chat_model, verbose=True, allow_dangerous_requests=True)
chain

GraphCypherQAChain(verbose=True, graph=<langchain_community.graphs.neo4j_graph.Neo4jGraph object at 0x0000029E31C33090>, cypher_generation_chain=LLMChain(verbose=False, prompt=PromptTemplate(input_variables=['question', 'schema'], input_types={}, partial_variables={}, template='Task:Generate Cypher statement to query a graph database.\nInstructions:\nUse only the provided relationship types and properties in the schema.\nDo not use any other relationship types or properties that are not provided.\nSchema:\n{schema}\nNote: Do not include any explanations or apologies in your responses.\nDo not respond to any questions that might ask anything else than for you to construct a Cypher statement.\nDo not include any text except the generated Cypher statement.\n\nThe question is:\n{question}'), llm=ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x0000029E0995B150>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x0000029E61CDF890>, model_name=

In [40]:
response = chain.invoke({"query":"Who was the director of the movie Casino?"})
response



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Person)-[:DIRECTED]->(m:Movie {title: "Casino"})
RETURN p.name;[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Person)-[:DIRECTED]->(m:Movie {title: "Casino"})
RETURN p.name;[0m
Full Context:
[32;1m[1;3m[{'p.name': 'Martin Scorsese'}][0m
Full Context:
[32;1m[1;3m[{'p.name': 'Martin Scorsese'}][0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


{'query': 'Who was the director of the movie Casino?',
 'result': 'Martin Scorsese'}

In [41]:
response = chain.invoke({"query":"Who were the actors of the movie Casino?"})
response



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Person)-[:ACTED_IN]->(m:Movie {title: "Casino"})
RETURN p.name;[0m
Full Context:
[32;1m[1;3m[{'p.name': 'Robert De Niro'}, {'p.name': 'Joe Pesci'}, {'p.name': 'Sharon Stone'}, {'p.name': 'James Woods'}][0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Person)-[:ACTED_IN]->(m:Movie {title: "Casino"})
RETURN p.name;[0m
Full Context:
[32;1m[1;3m[{'p.name': 'Robert De Niro'}, {'p.name': 'Joe Pesci'}, {'p.name': 'Sharon Stone'}, {'p.name': 'James Woods'}][0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


{'query': 'Who were the actors of the movie Casino?',
 'result': 'Robert De\u202fNiro, Joe Pesci, Sharon Stone, James Woods.'}