In [10]:
import os
import pandas as pd

In [11]:
df = pd.read_csv("https://raw.githubusercontent.com/tomasonjo/blog-datasets/main/movies/movies_small.csv")
df.head()

Unnamed: 0,movieId,released,title,actors,director,genres,imdbRating
0,1,1995-11-22,Toy Story,Jim Varney|Tim Allen|Tom Hanks|Don Rickles,John Lasseter,Adventure|Animation|Children|Comedy|Fantasy,8.3
1,2,1995-12-15,Jumanji,Robin Williams|Bradley Pierce|Kirsten Dunst|Jo...,Joe Johnston,Adventure|Children|Fantasy,6.9
2,3,1995-12-22,Grumpier Old Men,Walter Matthau|Ann-Margret|Jack Lemmon|Sophia ...,Howard Deutch,Comedy|Romance,6.6
3,4,1995-12-22,Waiting to Exhale,Whitney Houston|Lela Rochon|Angela Bassett|Lor...,Forest Whitaker,Romance|Drama|Comedy,5.6
4,5,1995-12-08,Father of the Bride Part II,Steve Martin|Kimberly Williams-Paisley|Diane K...,Charles Shyer,Comedy,5.9


In [13]:
from langchain_community.graphs import Neo4jGraph

In [14]:
graph = Neo4jGraph()

In [14]:
movies_query = """
LOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/tomasonjo/blog-datasets/main/movies/movies_small.csv' AS row
MERGE (m:Movie {id:row.movieId})
SET m.released = date(row.released),
    m.title = row.title,
    m.imdbRating = toFloat(row.imdbRating)
FOREACH (director in split(row.director, '|') |
    MERGE (p:Person {name: trim(director)})
    MERGE (p)-[:DIRECTED]->(m))
FOREACH (actor in split(row.actor, '|') |
    MERGE (p:Person {name: trim(actor)})
    MERGE (p)-[:ACTED_IN]->(m))
FOREACH (genre in split(row.genres, '|') |
    MERGE (g:Genre {name: trim(genre)})
    MERGE (m)-[:IN_GENRE]->(g))
"""
graph.query(movies_query);

In [32]:
examples = [
    {
        "question": "How many artists are there?",
        "query": "MATCH (a:Person)-[:ACTED_IN]->(:Movie) RETURN count (DISTINCT a)"
    },
    {
    "question": "Which actors played in the movie Casino?",
    "query": "MATCH (m:Movie {{title: 'Casino'}})<-[:ACTED_IN]-(a) RETURN a.name"
    },
    {
        "question": "How many movies has Tom Hanks acted in?",
        "query": "MATCH (a:Person {{name: 'Tom Hanks'}})-[:ACTED_IN]->(m:Movie) RETURN count (m)"
    },
    {
    "question": "List all the genres of the movie Schindler's List",
    "query": "MATCH (m:Movie {{title: 'Schindler\'s List'}})-[:IN_GENRE]->(g:Genre) RETURN g.name"
    },
    {
        "question": "Which actors have worked in movies from both the comedy and action genres?",
        "query": "MATCH (p:Person)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g1:Genre {{name: 'Comedy'}}) MATCH(p)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g2:Genre {{name: 'Action'}}) RETURN DISTINCT p"
    },
    {
        "question": "Which directors have made movies with at least three different actors named 'John'?",
        "query": "MATCH (p:Person)-[:DIRECTED]->(m:Movie)<-[:ACTED_IN]-(a:Person) WHERE a.name STARTS WITH 'John' WITH p, count (DISTINCT a) AS johnActorsCount WHERE johnActorsCount >= 3 RETURN p"
    }]

In [33]:
from langchain_core.prompts import FewShotPromptTemplate, PromptTemplate

In [34]:
example_prompt = PromptTemplate.from_template(
    "User input: {question}\nCypher query: {query}"
)



In [35]:
prompt = FewShotPromptTemplate(
    examples=examples[:5],
    example_prompt=example_prompt,
    prefix="You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.\n\nHere is the schema information.",
    suffix="User input: {question}\nCypher query:",
    input_variables=["question"])

In [36]:
print(prompt.format(question="How many artists are there?"))

You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.

Here is the schema information.

User input: How many artists are there?
Cypher query: MATCH (a:Person)-[:ACTED_IN]->(:Movie) RETURN count (DISTINCT a)

User input: Which actors played in the movie Casino?
Cypher query: MATCH (m:Movie {title: 'Casino'})<-[:ACTED_IN]-(a) RETURN a.name

User input: How many movies has Tom Hanks acted in?
Cypher query: MATCH (a:Person {name: 'Tom Hanks'})-[:ACTED_IN]->(m:Movie) RETURN count (m)

User input: List all the genres of the movie Schindler's List
Cypher query: MATCH (m:Movie {title: 'Schindler's List'})-[:IN_GENRE]->(g:Genre) RETURN g.name

User input: Which actors have worked in movies from both the comedy and action genres?
Cypher query: MATCH (p:Person)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g1:Genre {name: 'Comedy'}) MATCH(p)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g2:Genre {name: 'Action'}) RETURN DISTINCT p

User input: How many artists are there

In [53]:
!pip install langchain_openai

  from pkg_resources import load_entry_point
Collecting langchain_openai
  Downloading langchain_openai-0.1.6-py3-none-any.whl (34 kB)
Collecting openai<2.0.0,>=1.24.0
  Downloading openai-1.28.1-py3-none-any.whl (320 kB)
[K     |████████████████████████████████| 320 kB 609 kB/s eta 0:00:01
[?25hCollecting tiktoken<1,>=0.5.2
  Downloading tiktoken-0.6.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.8 MB)
[K     |████████████████████████████████| 1.8 MB 6.2 MB/s eta 0:00:01
Collecting distro<2,>=1.7.0
  Downloading distro-1.9.0-py3-none-any.whl (20 kB)
Collecting regex>=2022.1.18
  Downloading regex-2024.5.10-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (777 kB)
[K     |████████████████████████████████| 777 kB 19.6 MB/s eta 0:00:01
Installing collected packages: distro, openai, regex, tiktoken, langchain-openai
Successfully installed distro-1.9.0 langchain-openai-0.1.6 openai-1.28.1 regex-2024.5.10 tiktoken-0.6.0


In [12]:
pip install langchain_cohere

Note: you may need to restart the kernel to use updated packages.


In [37]:
from langchain_community.vectorstores import Neo4jVector
from langchain_core.example_selectors import SemanticSimilarityExampleSelector
from langchain_openai import OpenAIEmbeddings
from langchain_cohere import CohereEmbeddings

In [38]:
# example_selector = SemanticSimilarityExampleSelector.from_examples(
#     examples,
#     OpenAIEmbeddings(),
#     Neo4jVector,
#     k=5,
#     input_keys=["question"],
# )

example_selector = SemanticSimilarityExampleSelector.from_examples(
    examples,
    CohereEmbeddings(),
    Neo4jVector,
    k=5,
    input_keys=["question"],
)

In [39]:
example_selector.select_examples({"question":"How many artists are there?"})

[{'query': 'MATCH (a:Person)-[:ACTED_IN]->(:Movie) RETURN count (DISTINCT a)',
  'question': 'How many artists are there?'},
 {'query': "MATCH (p:Person)-[:DIRECTED]->(m:Movie)<-[:ACTED_IN]-(a:Person) WHERE a.name STARTS WITH 'John' WITH p, count (DISTINCT a) AS johnActorsCount WHERE johnActorsCount >= 3 RETURN p",
  'question': "Which directors have made movies with at least three different actors named 'John'?"},
 {'query': "MATCH (p:Person)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g1:Genre {{name: 'Comedy'}}) MATCH(p)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g2:Genre {{name: 'Action'}}) RETURN DISTINCT p",
  'question': 'Which actors have worked in movies from both the comedy and action genres?'},
 {'query': "MATCH (a:Person {{name: 'Tom Hanks'}})-[:ACTED_IN]->(m:Movie) RETURN count (m)",
  'question': 'How many movies has Tom Hanks acted in?'},
 {'query': "MATCH (m:Movie {{title: 'Schindler's List'}})-[:IN_GENRE]->(g:Genre) RETURN g.name",
  'question': "List all the genres of the movie Schi

In [54]:
prompt = FewShotPromptTemplate(
    example_selector=example_selector,
    example_prompt=example_prompt,
    prefix="You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.\n\nHere is the schema information.",
    suffix="User input: {question}\nCypher query:",
    input_variables=["question"]
)

In [55]:
print(prompt.format(question="how many artists are there?"))

You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.

Here is the schema information.

User input: How many artists are there?
Cypher query: MATCH (a:Person)-[:ACTED_IN]->(:Movie) RETURN count (DISTINCT a)

User input: Which directors have made movies with at least three different actors named 'John'?
Cypher query: MATCH (p:Person)-[:DIRECTED]->(m:Movie)<-[:ACTED_IN]-(a:Person) WHERE a.name STARTS WITH 'John' WITH p, count (DISTINCT a) AS johnActorsCount WHERE johnActorsCount >= 3 RETURN p

User input: Which actors have worked in movies from both the comedy and action genres?
Cypher query: MATCH (p:Person)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g1:Genre {name: 'Comedy'}) MATCH(p)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g2:Genre {name: 'Action'}) RETURN DISTINCT p

User input: List all the genres of the movie Schindler's List
Cypher query: MATCH (m:Movie {title: 'Schindler's List'})-[:IN_GENRE]->(g:Genre) RETURN g.name

User input: How many movie

In [56]:
print(prompt.format(question="how many directors are there?"))

You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.

Here is the schema information.

User input: Which directors have made movies with at least three different actors named 'John'?
Cypher query: MATCH (p:Person)-[:DIRECTED]->(m:Movie)<-[:ACTED_IN]-(a:Person) WHERE a.name STARTS WITH 'John' WITH p, count (DISTINCT a) AS johnActorsCount WHERE johnActorsCount >= 3 RETURN p

User input: How many artists are there?
Cypher query: MATCH (a:Person)-[:ACTED_IN]->(:Movie) RETURN count (DISTINCT a)

User input: How many movies has Tom Hanks acted in?
Cypher query: MATCH (a:Person {name: 'Tom Hanks'})-[:ACTED_IN]->(m:Movie) RETURN count (m)

User input: Which actors have worked in movies from both the comedy and action genres?
Cypher query: MATCH (p:Person)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g1:Genre {name: 'Comedy'}) MATCH(p)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g2:Genre {name: 'Action'}) RETURN DISTINCT p

User input: Which actors played in the m

In [57]:
from langchain.chains import GraphCypherQAChain

In [58]:
from langchain_community.llms import Ollama

In [None]:
llm = Ollama(model="llama2")
# llm = Ollama(model="codellama")

chain = GraphCypherQAChain.from_llm(
    graph=graph, llm=llm, cypher_prompt=prompt, verbose=True
)

chain.invoke("How many actors are in the graph?")



[1m> Entering new GraphCypherQAChain chain...[0m


In [63]:
# Filtering graph schema
chain = GraphCypherQAChain.from_llm(
    graph=graph, llm=llm, exclude_types=["Genre"], verbose=True
)
print(chain.graph_schema)

Node properties are the following:
Bot {name: STRING},Personality {name: STRING},Trait {name: STRING, description: STRING, type: STRING, definition: STRING},PhysicalAppearance {name: STRING},Skills {name: STRING},Quote {name: STRING},Item {name: STRING, description: STRING},Hobby {name: STRING},Residence {name: STRING},Question {text: STRING},Answer {text: STRING},Movie {id: STRING, released: DATE, title: STRING, imdbRating: FLOAT},Person {name: STRING}
Relationship properties are the following:
HAS_SKILL {keywords: LIST, proficiency_level: STRING, experience_level: STRING, training: STRING, specialization: STRING, interest_level: STRING}
The relationships are the following:
(:Bot)-[:HAS_SKILL]->(:Trait),(:Bot)-[:HAS_SKILL]->(:Item),(:Bot)-[:SAID]->(:Item),(:Bot)-[:LIVES_IN]->(:Item),(:Bot)-[:ANSWERS]->(:Answer),(:Bot)-[:ASKS]->(:Question),(:Personality)-[:HAS_TRAIT]->(:Trait),(:PhysicalAppearance)-[:HAS_TRAIT]->(:Trait),(:Skills)-[:HAS_TRAIT]->(:Trait),(:Skills)-[:HAS_SKILL]->(:Trait)