## BUILDING A QnA App using Graph DB

In [26]:
import os
from dotenv import load_dotenv

load_dotenv()

True

In [27]:
NEO4J_URI = os.getenv("NEO4J_URI")
NEO4J_USERNAME = os.getenv("NEO4J_USERNAME")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")

In [28]:
import neo4j
import langchain_neo4j

print(neo4j.__version__)
print(langchain_neo4j.__version__)

5.28.2
0.5.0


In [29]:
from langchain_community.graphs import Neo4jGraph

graph = Neo4jGraph(
    url=NEO4J_URI, 
    username=NEO4J_USERNAME, 
    password=NEO4J_PASSWORD
)

graph

<langchain_community.graphs.neo4j_graph.Neo4jGraph at 0x28b515122d0>

In [30]:
## Dataset Movie
movie_query="""
LOAD CSV WITH HEADERS FROM
'https://raw.githubusercontent.com/tomasonjo/blog-datasets/main/movies/movies_small.csv' as row

MERGE(m:Movie{id:row.movieId})
SET m.released = date(row.released),
    m.title = row.title,
    m.imdbRating = toFloat(row.imdbRating)
FOREACH (director in split(row.director, '|') | 
    MERGE (p:Person {name:trim(director)})
    MERGE (p)-[:DIRECTED]->(m))
FOREACH (actor in split(row.actors, '|') | 
    MERGE (p:Person {name:trim(actor)})
    MERGE (p)-[:ACTED_IN]->(m))
FOREACH (genre in split(row.genres, '|') | 
    MERGE (g:Genre {name:trim(genre)})
    MERGE (m)-[:IN_GENRE]->(g))
"""


movie_query

"\nLOAD CSV WITH HEADERS FROM\n'https://raw.githubusercontent.com/tomasonjo/blog-datasets/main/movies/movies_small.csv' as row\n\nMERGE(m:Movie{id:row.movieId})\nSET m.released = date(row.released),\n    m.title = row.title,\n    m.imdbRating = toFloat(row.imdbRating)\nFOREACH (director in split(row.director, '|') | \n    MERGE (p:Person {name:trim(director)})\n    MERGE (p)-[:DIRECTED]->(m))\nFOREACH (actor in split(row.actors, '|') | \n    MERGE (p:Person {name:trim(actor)})\n    MERGE (p)-[:ACTED_IN]->(m))\nFOREACH (genre in split(row.genres, '|') | \n    MERGE (g:Genre {name:trim(genre)})\n    MERGE (m)-[:IN_GENRE]->(g))\n"

In [31]:
graph.query(movie_query)

[]

In [32]:
graph.refresh_schema()
print(graph.schema)

Node properties:
Person {name: STRING}
Movie {title: STRING, released: DATE, id: STRING, imdbRating: FLOAT}
Genre {name: STRING}
Relationship properties:

The relationships:
(:Person)-[:DIRECTED]->(:Movie)
(:Person)-[:ACTED_IN]->(:Movie)
(:Movie)-[:IN_GENRE]->(:Genre)


In [33]:
import os
from dotenv import load_dotenv

load_dotenv()

True

In [34]:
groq_api_key = os.getenv("GROQ_API_KEY")

In [35]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    model = "llama-3.1-8b-instant",
    groq_api_key = groq_api_key
)

llm

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x0000028B52B97FB0>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x0000028B529D0CB0>, model_name='llama-3.1-8b-instant', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [36]:
llm.invoke("WHo are you?")

AIMessage(content="I'm an artificial intelligence model known as a large language model (LLM) or conversational AI. I'm a computer program designed to understand and generate human-like text responses to a wide range of questions and topics.\n\nI don't have a personal identity or emotions, but I'm here to assist and provide information to the best of my abilities. I can:\n\n1. Answer questions on various subjects, from science and history to entertainment and culture.\n2. Provide definitions and explanations of words and concepts.\n3. Offer suggestions and ideas for creative projects or everyday problems.\n4. Engage in conversations and discussions on various topics.\n5. Translate text from one language to another.\n6. Generate text based on a prompt or topic.\n\nI'm constantly learning and improving, so I appreciate any feedback or corrections you can provide to help me become a better conversational AI.", additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 1

In [37]:
from langchain_community.chains.graph_qa.cypher import GraphCypherQAChain

chain = GraphCypherQAChain.from_llm(
    graph = graph,
    llm = llm,
    verbose = True,
    allow_dangerous_requests = True,
    exclude_types=["Genre"]
)

chain

GraphCypherQAChain(verbose=True, graph=<langchain_community.graphs.neo4j_graph.Neo4jGraph object at 0x0000028B515122D0>, cypher_generation_chain=LLMChain(verbose=False, prompt=PromptTemplate(input_variables=['question', 'schema'], input_types={}, partial_variables={}, template='Task:Generate Cypher statement to query a graph database.\nInstructions:\nUse only the provided relationship types and properties in the schema.\nDo not use any other relationship types or properties that are not provided.\nSchema:\n{schema}\nNote: Do not include any explanations or apologies in your responses.\nDo not respond to any questions that might ask anything else than for you to construct a Cypher statement.\nDo not include any text except the generated Cypher statement.\n\nThe question is:\n{question}'), llm=ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x0000028B52B97FB0>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x0000028B529D0CB0>, model_name=

In [38]:
query = "Who are the actors of the movie Casino"

response = chain.invoke({"query":query})
response



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Person)-[:ACTED_IN]->(m:Movie)<-[:DIRECTED]-(d:Person) WHERE m.title = "Casino" RETURN p.name[0m
Full Context:
[32;1m[1;3m[{'p.name': 'Robert De Niro'}, {'p.name': 'Joe Pesci'}, {'p.name': 'Sharon Stone'}, {'p.name': 'James Woods'}][0m

[1m> Finished chain.[0m


{'query': 'Who are the actors of the movie Casino',
 'result': 'Robert De Niro, Joe Pesci, Sharon Stone, James Woods are the actors of the movie Casino.'}

In [43]:
examples = [
    {
        "question": "How many artists are there?",
        "query": "MATCH (a:Person)-[:ACTED_IN]->(:Movie) RETURN count(DISTINCT a)",
    },
    {
        "question": "Which actors played in the movie Casino?",
        "query": "MATCH (m:Movie {{title: 'Casino'}})<-[:ACTED_IN]-(a) RETURN a.name",
    },
    {
        "question": "How many movies has Tom Hanks acted in?",
        "query": "MATCH (a:Person {name: 'Tom Hanks'})-[:ACTED_IN]->(m:Movie) RETURN count(m)",
    },
    {
        "question": "List all the genres of the movie Schindler's List",
        "query": "MATCH (m:Movie {{title: 'Schindler\\'s List'}})-[:IN_GENRE]->(g:Genre) RETURN g.name",
    },
    {
        "question": "Which actors have worked in movies from both the comedy and action genres?",
        "query": "MATCH (a:Person)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g1:Genre), (a)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g2:Genre) WHERE g1.name = 'Comedy' AND g2.name = 'Action' RETURN DISTINCT a.name",
    },
    {
        "question": "Which directors have made movies with at least three different actors named 'John'?",
        "query": "MATCH (d:Person)-[:DIRECTED]->(m:Movie)<-[:ACTED_IN]-(a:Person) WHERE a.name STARTS WITH 'John' WITH d, COUNT(DISTINCT a) AS JohnsCount WHERE JohnsCount >= 3 RETURN d.name",
    },
    {
        "question": "Identify movies where directors also played a role in the film.",
        "query": "MATCH (p:Person)-[:DIRECTED]->(m:Movie), (p)-[:ACTED_IN]->(m) RETURN m.title, p.name",
    },
    {
        "question": "Find the actor with the highest number of movies in the database.",
        "query": "MATCH (a:Actor)-[:ACTED_IN]->(m:Movie) RETURN a.name, COUNT(m) AS movieCount ORDER BY movieCount DESC LIMIT 1",
    },
]

In [44]:
for ex in examples:
    ex["query"] = ex["query"].replace("{", "{{").replace("}", "}}")


In [45]:
from langchain_core.prompts import FewShotPromptTemplate,PromptTemplate

example_prompt = PromptTemplate.from_template(
    "User Query: {question} \n Cypher Query:{query}"
)

prompt = FewShotPromptTemplate(
    examples=examples[:5],
    example_prompt=example_prompt,
    prefix=(
        "You're a Neo4j expert. Given an input question and the database schema below, "
        "create a syntactically very accurate Cypher query.\n\nSchema:\n{schema}\n"
    ),
    suffix="User input: {question}\nCypher Query:",
    input_variables=["question", "schema"],
)


In [46]:
prompt

FewShotPromptTemplate(input_variables=['question', 'schema'], input_types={}, partial_variables={}, examples=[{'question': 'How many artists are there?', 'query': 'MATCH (a:Person)-[:ACTED_IN]->(:Movie) RETURN count(DISTINCT a)'}, {'question': 'Which actors played in the movie Casino?', 'query': "MATCH (m:Movie {{{{title: 'Casino'}}}})<-[:ACTED_IN]-(a) RETURN a.name"}, {'question': 'How many movies has Tom Hanks acted in?', 'query': "MATCH (a:Person {{name: 'Tom Hanks'}})-[:ACTED_IN]->(m:Movie) RETURN count(m)"}, {'question': "List all the genres of the movie Schindler's List", 'query': "MATCH (m:Movie {{{{title: 'Schindler\\'s List'}}}})-[:IN_GENRE]->(g:Genre) RETURN g.name"}, {'question': 'Which actors have worked in movies from both the comedy and action genres?', 'query': "MATCH (a:Person)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g1:Genre), (a)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g2:Genre) WHERE g1.name = 'Comedy' AND g2.name = 'Action' RETURN DISTINCT a.name"}], example_prompt=PromptTe

In [47]:
print(prompt.format(question="How many artist are there?", schema = "foo"))

You're a Neo4j expert. Given an input question and the database schema below, create a syntactically very accurate Cypher query.

Schema:
foo


User Query: How many artists are there? 
 Cypher Query:MATCH (a:Person)-[:ACTED_IN]->(:Movie) RETURN count(DISTINCT a)

User Query: Which actors played in the movie Casino? 
 Cypher Query:MATCH (m:Movie {{title: 'Casino'}})<-[:ACTED_IN]-(a) RETURN a.name

User Query: How many movies has Tom Hanks acted in? 
 Cypher Query:MATCH (a:Person {name: 'Tom Hanks'})-[:ACTED_IN]->(m:Movie) RETURN count(m)

User Query: List all the genres of the movie Schindler's List 
 Cypher Query:MATCH (m:Movie {{title: 'Schindler\'s List'}})-[:IN_GENRE]->(g:Genre) RETURN g.name

User Query: Which actors have worked in movies from both the comedy and action genres? 
 Cypher Query:MATCH (a:Person)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g1:Genre), (a)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g2:Genre) WHERE g1.name = 'Comedy' AND g2.name = 'Action' RETURN DISTINCT a.name

User i

In [48]:
llm

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x0000028B52B97FB0>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x0000028B529D0CB0>, model_name='llama-3.1-8b-instant', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [49]:
chain = GraphCypherQAChain.from_llm(
    graph = graph,
    llm = llm,
    cypher_prompt = prompt,
    verbose = True,
    allow_dangerous_requests = True,
)

In [55]:
chain.invoke("Find the actor with the highest number of movies in the database.")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (a:Person)-[:ACTED_IN]->(m:Movie)
RETURN a.name, COUNT(m) AS num_movies
ORDER BY num_movies DESC
LIMIT 1
[0m
Full Context:
[32;1m[1;3m[{'a.name': 'Gene Hackman', 'num_movies': 4}][0m

[1m> Finished chain.[0m


{'query': 'Find the actor with the highest number of movies in the database.',
 'result': 'Gene Hackman has the highest number of movies in the database with 4 movies.'}