# Imports

In [1]:
import os
import textwrap
from dotenv import load_dotenv
import pprint

# Langchain
from langchain.chains import GraphCypherQAChain
from langchain.prompts.prompt import PromptTemplate
from langchain_community.graphs import Neo4jGraph
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQAWithSourcesChain
from langchain_openai import ChatOpenAI

# Warning control
import warnings
warnings.filterwarnings("ignore")

In [2]:
# Load from environment
load_dotenv('.env', override=True)
NEO4J_URI = os.getenv('NEO4J_URI')
NEO4J_USERNAME = os.getenv('NEO4J_USERNAME')
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
NEO4J_DATABASE = os.getenv('NEO4J_DATABASE') or 'neo4j'
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

# Global constants
VECTOR_INDEX_NAME = 'film_overview_index'
VECTOR_NODE_LABEL = 'Film'
VECTOR_SOURCE_PROPERTY = 'overview'
VECTOR_EMBEDDING_PROPERTY = 'overviewEmbedding'

In [3]:
kg = Neo4jGraph(
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
    database=NEO4J_DATABASE
)

# Utils

## Delete all data

In [14]:
cypher = """
MATCH (n)
DETACH DELETE n
"""
kg.query(cypher)

[]

## Check for amount of total nodes

In [15]:
cypher = """
  MATCH (n) 
  RETURN count(n) AS numberOfNodes
"""
print(kg.query(cypher))

cypher = """
  MATCH (f:Film) 
  RETURN count(f) AS numberOfFilms
  """
print(kg.query(cypher))

[{'numberOfNodes': 0}]
[{'numberOfFilms': 0}]


## Remove and show indices

In [4]:
# kg.query("DROP INDEX `film_index`")
# kg.query("DROP INDEX `keyword`")
kg.query("SHOW INDEXES")
# kg.refresh_schema()
# print(kg.schema)

[{'id': 2,
  'name': 'film_index',
  'state': 'ONLINE',
  'populationPercent': 100.0,
  'type': 'VECTOR',
  'entityType': 'NODE',
  'labelsOrTypes': ['Film'],
  'properties': ['embedding'],
  'indexProvider': 'vector-2.0',
  'owningConstraint': None,
  'lastRead': neo4j.time.DateTime(2024, 3, 21, 1, 1, 44, 634000000, tzinfo=<UTC>),
  'readCount': 19},
 {'id': 0,
  'name': 'index_343aff4e',
  'state': 'ONLINE',
  'populationPercent': 100.0,
  'type': 'LOOKUP',
  'entityType': 'NODE',
  'labelsOrTypes': None,
  'properties': None,
  'indexProvider': 'token-lookup-1.0',
  'owningConstraint': None,
  'lastRead': neo4j.time.DateTime(2024, 3, 21, 1, 1, 44, 59000000, tzinfo=<UTC>),
  'readCount': 324124},
 {'id': 1,
  'name': 'index_f7700477',
  'state': 'ONLINE',
  'populationPercent': 100.0,
  'type': 'LOOKUP',
  'entityType': 'RELATIONSHIP',
  'labelsOrTypes': None,
  'properties': None,
  'indexProvider': 'token-lookup-1.0',
  'owningConstraint': None,
  'lastRead': None,
  'readCount': 0

# Creating list of films from CSV

In [19]:
import csv

all_films = []

YEARS = list(range(2019, 2024))

for year in YEARS:
  with open(f'./data/{year}_movie_collection_data.csv', mode='r') as csv_file:
      csv_reader = csv.DictReader(csv_file)
      for row in csv_reader: # each row will be a dictionary
        all_films.append(row)

In [20]:
all_films

[{'Title': 'Marriage Story',
  'Runtime (minutes)': '137',
  'Language': 'English',
  'Overview': 'A stage director and an actress struggle through a grueling, coast-to-coast divorce that pushes them to their personal extremes.',
  'Release Date': '2019-09-28',
  'Genre': 'Drama',
  'Keywords': 'infidelity, new york city, husband wife relationship, parent child relationship, theatre group, theater director, lawyer, los angeles, california, divorce, divorce lawyer',
  'Actors': 'Adam Driver, Scarlett Johansson, Laura Dern, Ray Liotta, Alan Alda',
  'Directors': 'Noah Baumbach',
  'Stream': 'Netflix, Netflix basic with Ads ',
  'Buy': '',
  'Rent': '',
  'Production Companies': 'Heyday Films',
  'Website': 'https://www.marriagestorymovie.com'},
 {'Title': 'Terminator: Dark Fate',
  'Runtime (minutes)': '128',
  'Language': 'English',
  'Overview': 'Decades after Sarah Connor prevented Judgment Day, a lethal new Terminator is sent to eliminate the future leader of the resistance. In a fig

In [21]:
first_film = all_films[0]
print(all_films)



# Create Film and Genre Node, with Relationship

In [25]:
# first_films = all_films[0:5]

for film in all_films:
    genre_list = film['Genre'].split(", ")
    actor_list = film['Actors'].split(", ")
    prod_comp_list = film['Production Companies'].split(", ")
    director_list = film['Directors'].split(", ")

    cypher_query = """
    MERGE (film:Film {title: $title})
    ON CREATE
        SET film.runtime = toInteger($runtime),
            film.language = $language,
            film.overview = $overview,
            film.release_date = date($release_date),
            film.keywords = $keywords,
            film.source = $website,
            film.genres = $genres_str,
            film.actors = $actors_str,
            film.directors = $directors_str,
            film.production_companies = $prodcution_companies_str
    WITH film
    UNWIND $genres AS genre_type
    UNWIND $actors AS actor_name
    UNWIND $prodcution_companies AS company_name

    MERGE (genre:Genre {type: genre_type})
    MERGE (actor:Actor {name: actor_name})
    MERGE (director:Director {name: $director})
    MERGE (company:Production_Company {name: company_name})

    MERGE (film)-[:HAS_GENRE]->(genre)
    MERGE (director)-[:HAS_DIRECTED]->(film)
    MERGE (actor)-[:STARRED_IN]->(film)
    MERGE (company)-[:PRODUCED]->(film)

    """

    kg.query(cypher_query, params={
        'title': film['Title'],
        'runtime': film['Runtime (minutes)'],
        'language': film['Language'],
        'overview': film['Overview'],
        'release_date': film['Release Date'],
        'keywords': film['Keywords'],
        'website': film['Website'],
        'genres_str': film['Genre'],
        'actors_str': film['Actors'],
        'directors_str': film['Directors'],
        'prodcution_companies_str': film['Production Companies'],
        'genres': genre_list,
        'actors': actor_list,
        'director': director_list,
        'prodcution_companies': prod_comp_list,  
        })

In [26]:
kg.refresh_schema()
print(textwrap.fill(kg.schema, 60))

Node properties are the following: Film {title: STRING,
runtime: INTEGER, language: STRING, overview: STRING,
release_date: DATE, keywords: STRING, source: STRING,
genres: STRING, actors: STRING, directors: STRING,
production_companies: STRING},Genre {type: STRING},Actor
{name: STRING},Director {name: LIST},Production_Company
{name: STRING} Relationship properties are the following:
The relationships are the following: (:Film)-[:HAS_GENRE]-
>(:Genre),(:Actor)-[:STARRED_IN]->(:Film),(:Director)-
[:HAS_DIRECTED]->(:Film),(:Production_Company)-[:PRODUCED]-
>(:Film)


# Sample Queries

## Getting all actors in a film

In [None]:
cypher = """
  MATCH (a:Actor)-[r:STARRED_IN]->(f:Film)
    WHERE f.title = $title
  RETURN a.name
"""

actors_info = kg.query(cypher, params={
    'title': 'Jurassic World Dominion'
})

for item in actors_info:
    print(item)

{'a.name': 'Laura Dern'}
{'a.name': 'Bryce Dallas Howard'}
{'a.name': 'Sam Neill'}
{'a.name': 'Chris Pratt'}
{'a.name': 'Jeff Goldblum'}


## Getting all films in a genre

In [None]:
cypher = """
  MATCH (f:Film)-[r:HAS_GENRE]->(g:Genre)
    WHERE g.type = $genre
  RETURN f.title
"""

films_in_genre = kg.query(cypher, params={
    'genre': 'Horror'
})

for item in films_in_genre:
    print(item)

{'f.title': 'Reign of Chaos'}
{'f.title': 'The OctoGames'}
{'f.title': 'Before Night Falls'}
{'f.title': 'The One Hundred'}
{'f.title': 'Beast'}
{'f.title': 'Halloween Ends'}
{'f.title': 'The Exorcism of God'}
{'f.title': 'Slash/Back'}
{'f.title': 'Nope'}
{'f.title': 'Witch Trials'}
{'f.title': 'Hellraiser'}
{'f.title': 'Scream'}
{'f.title': 'Project Wolf Hunting'}
{'f.title': 'Orphan: First Kill'}
{'f.title': 'Venus'}
{'f.title': 'Wyrmwood: Apocalypse'}
{'f.title': 'Smile'}
{'f.title': 'Pearl'}
{'f.title': 'The Black Phone'}
{'f.title': 'M3GAN'}
{'f.title': 'Jeepers Creepers: Reborn'}
{'f.title': 'The Exorcism of Hannah Stevenson'}
{'f.title': 'Good Boy'}
{'f.title': 'X'}
{'f.title': 'The Menu'}
{'f.title': 'Terrifier 2'}
{'f.title': 'Deep Fear'}
{'f.title': 'Fear'}
{'f.title': 'Godzilla Minus One'}
{'f.title': 'Scream VI'}
{'f.title': 'Sleep'}
{'f.title': "Five Nights at Freddy's"}
{'f.title': 'Squealer'}
{'f.title': 'Carousel'}
{'f.title': 'Saw X'}
{'f.title': 'Skal - Fight for Surv

# Get all films within a few nodes of a particular film (excluding via genre)

In [None]:
cypher = """
    MATCH (start:Film {title: $title})-[r:HAS_DIRECTED|STARRED_IN|PRODUCED*1..2]-(connected:Film)
    WITH start, connected, r, type(head(r)) AS related_by
    OPTIONAL MATCH (start)<-[:PRODUCED]-(p:Production_Company)-[:PRODUCED]->(connected)
    OPTIONAL MATCH (start)<-[:STARRED_IN]-(a:Actor)-[:STARRED_IN]->(connected)
    OPTIONAL MATCH (start)<-[:HAS_DIRECTED]-(d:Director)-[:HAS_DIRECTED]->(connected)
    RETURN connected.title as related_film, 
            related_by, 
            collect(distinct p.name) as production_companies, 
            collect(distinct a.name) as actors, 
            collect(distinct d.name) as directors
    """

related_films = kg.query(cypher, params={
    'title': 'The Menu'
})

pprint.pprint(related_films)

[{'actors': [],
  'directors': [],
  'production_companies': ['Searchlight Pictures', 'TSG Entertainment'],
  'related_by': 'PRODUCED',
  'related_film': 'Poor Things'},
 {'actors': [],
  'directors': [],
  'production_companies': ['TSG Entertainment'],
  'related_by': 'PRODUCED',
  'related_film': '65'},
 {'actors': ['Anya Taylor-Joy'],
  'directors': [],
  'production_companies': [],
  'related_by': 'STARRED_IN',
  'related_film': 'The Super Mario Bros. Movie'}]


## Get all films directed by the director of a particular film

In [None]:
# Getting all films directed by whoever directed 'The Next 365 Days'
cypher = """
    MATCH (start:Film {title: $title})<-[:HAS_DIRECTED]-(d:Director)-[:HAS_DIRECTED]->(connected:Film)
    RETURN connected.title
"""

related_films = kg.query(cypher, params={
    'title': 'The Next 365 Days'
})

for item in related_films:
    print(item)

# Getting directors who directed more than 1 film
cypher = """
MATCH (d:Director)-[:HAS_DIRECTED]->(f:Film)
WITH d, count(f) AS numFilms
WHERE numFilms > 1
RETURN d.name
"""

multifilm_directors = kg.query(cypher)
print(multifilm_directors)

{'connected.title': '365 Days: This Day'}
[{'d.name': 'Barbara Białowąs, Tomasz Mandes'}, {'d.name': 'Matt Bettinelli-Olpin, Tyler Gillett'}, {'d.name': 'Ti West'}]


## Getting Films (Title + Overview + Keywords) Directed by a Director

In [None]:
cypher = """
  MATCH (d:Director)-[r:HAS_DIRECTED]->(f:Film)
    WHERE d.name = $director
  RETURN f.title, f.overview, f.keywords
"""

director_films = kg.query(cypher, params={
    'director': 'Aaron Mirtes'
})

pprint.pprint(director_films)

[{'f.keywords': 'None',
  'f.overview': "Eight contestants compete in eight deadly, classic children's "
                'games. They seek fame beyond their wildest dreams, competing '
                'for the chance to take over the YouTube channel of the famous '
                'yet elusive masked content creator known only as "JaxPro".',
  'f.title': 'The OctoGames'}]


## Checking to see number of comedy films

In [None]:
cypher = """
  MATCH (f:Film)-[:HAS_GENRE]->(g:Genre {type: 'Comedy'})
  RETURN count(f) AS NumberOfComedyFilms
  """
kg.query(cypher)

[{'NumberOfComedyFilms': 47}]

## Getting all films produced by a production company

In [None]:
cypher = """
  MATCH (c:Production_Company)-[r:PRODUCED]->(f:Film)
    WHERE c.name = $name
  RETURN f.title, f.overview, f.keywords
"""

production_comp_films = kg.query(cypher, params={
    'name': 'A24'
})

pprint.pprint(production_comp_films)

[{'f.keywords': 'regret, nurse, missionary, idaho, bible, redemption, '
                'overweight man, addiction, based on play or musical, teacher, '
                'grief, neighbor, obesity, religion, death of lover, election, '
                'rebellious daughter, guilt, death, lgbt, sister-in-law, '
                'eating disorder, father daughter reunion, empathy, shame, '
                'english teacher, abandonment, one location, father daughter '
                'relationship, 2010s, gay theme, apartment, essay, food '
                'addiction, religious symbolism',
  'f.overview': 'A reclusive English teacher suffering from severe obesity '
                'attempts to reconnect with his estranged teenage daughter for '
                'one last chance at redemption.',
  'f.title': 'The Whale'},
 {'f.keywords': 'mother, martial arts, kung fu, philosophy, generations '
                'conflict, chinese woman, laundromat, chinese, east asian '
                'lead, div

## Get all horror films produced after a particular date

In [None]:
cypher = """
  MATCH (f:Film)-[:HAS_GENRE]->(g:Genre{type: $genre})
    WHERE f.release_date > date($date)
  RETURN f.title, f.overview, f.keywords, f.actors
"""

new_horror = kg.query(cypher, params={
    'genre': 'Horror',
    'date': '2023-04-01'
})

pprint.pprint(new_horror)

[{'f.actors': 'Jung Yu-mi, Lee Sun-kyun, Kim Kuk-hee, Yoon Kyung-ho, Lee '
              'Kyung-jin',
  'f.keywords': 'married couple, sleep, sleepwalking',
  'f.overview': 'Hyun-su and Soo-jin are newlyweds. Seemingly out of nowhere, '
                'he starts talking in his sleep. "Someone\'s inside." From '
                'that night on, whenever he falls asleep, he transforms into '
                'someone else with no recollection of what happened the night '
                "before. Soo-jin is overwhelmed with anxiety that he'd hurt "
                'her family while she sleeps and can barely sleep a wink '
                'because of this irrational fear. Despite sleep treatment, '
                "Hyun-su's sleepwalking only intensifies, and she begins to "
                'feel that her unborn child may be in danger.',
  'f.title': 'Sleep'},
 {'f.actors': 'Gabrielle Echols, Lily Sullivan, Alyssa Sutherland, Morgan '
              'Davies, Nell Fisher',
  'f.keywords': 'si

# Use Similarity Search

In [12]:
from langchain_core.prompts import ChatPromptTemplate
from langchain.prompts import HumanMessagePromptTemplate, MessagesPlaceholder
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

retrieval_query = """
WITH 
    node, score
RETURN "Title: " + node.title + "\n" + 
        "Overview: " + node.overview  + "\n" + 
        "Release Date: " + node.release_date  + "\n" + 
        "Runtime: " + node.runtime + " minutes" + "\n" + 
        "Language: " + node.language  + "\n" + 
        "Keywords: " + node.keywords  + "\n" + 
        "Genres: " + node.genres + "\n" +
        "Actors: " + node.actors + "\n" +
        "Directors: " + node.directors + "\n" +
        "Production Companies: " + node.production_companies + "\n" +
        "Source: " + node.source  + "\n"
        as text, score, node {.source} AS metadata
"""

prompt = ChatPromptTemplate.from_messages(
    [
        (
            'system',
            """
            Your goal is to recommend films to users based on their query and the retrieved context. Only recommend films 
            shown in your context. If a film doesn't seem relevant, omit it from your response. You should recommend no more than five films. 
            Your recommendation should be original, insightful, and at least two to three sentences long. Determine whether the query
            would best be answered using full text search or semantic search before picking your tool.

            # TEMPLATE FOR OUTPUT
            - [Title of Film](source link):
                - Runtime:
                - Release Date:
                - (Your reasoning for recommending this film)
            
            Question: {question} 
            Context: {context} 
            """
        ),
    ]
)

vector_store = Neo4jVector.from_existing_graph(
    embedding=OpenAIEmbeddings(),
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
    index_name="film_index",
    node_label="Film",
    text_node_properties=["overview", "keywords"],
    embedding_node_property="embedding",
    search_type="hybrid",
    retrieval_query=retrieval_query
)

# Create a retriever from the vector store
retriever = vector_store.as_retriever(search_kwargs={"k": 10})

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

chat_model = ChatOpenAI(
    model='gpt-3.5-turbo-0125', 
    temperature=0.5, 
    streaming=True,
    )

# Create a chatbot Question & Answer chain from the retriever
rag_chain_from_docs = (
    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
    | prompt
    | chat_model
    | StrOutputParser()
)

rag_chain_with_source = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
).assign(answer=rag_chain_from_docs)

question = "Recommend me some Japanese horror films."

# Only prints final answer
# for chunk in rag_chain_with_source.stream(question):
#     for key in chunk:
#         if key == 'answer':
#             print(chunk[key], end="", flush=True)

# Prints everything
output = {}
curr_key = None
for chunk in rag_chain_with_source.stream(question):
    for key in chunk:
        if key not in output:
            output[key] = chunk[key]
        else:
            output[key] += chunk[key]
        if key != curr_key:
            print(f"\n\n{key}: {chunk[key]}", end="", flush=True)
        else:
            print(chunk[key], end="", flush=True)
        curr_key = key
output



question: Recommend me some Japanese horror films.

context: [Document(page_content='Title: Ox-Head Village\nOverview: Having launched a social media prank about a haunted building, three girls suddenly vanish. Rumours circulate that they were victims of The Ox-Head Village curse, triggering an investigation by two of their friends, desperate to find the truth about what has happened…\nRelease Date: 2022-02-18\nRuntime: 115 minutes\nLanguage: Japanese\nKeywords: missing person, japanese folklore\nGenres: Horror, Drama, Mystery\nActors: Fumiya Takahashi, Kōki, Rinka Otani, Riku Hagiwara, Haruka Imou\nDirectors: Takashi Shimizu\nProduction Companies: Toei Company\nSource: https://ushikubi-movie.jp/\n', metadata={'source': 'https://ushikubi-movie.jp/'}), Document(page_content='Title: Woman of the Photographs\nOverview: Solitary and skilled photographer Kai is afraid of women. When he encounters a beautiful model, Kyoko suffering from body dysmorphia, they begin a twisted romance. Kai is

{'question': 'Recommend me some Japanese horror films.',
 'context': [Document(page_content='Title: Ox-Head Village\nOverview: Having launched a social media prank about a haunted building, three girls suddenly vanish. Rumours circulate that they were victims of The Ox-Head Village curse, triggering an investigation by two of their friends, desperate to find the truth about what has happened…\nRelease Date: 2022-02-18\nRuntime: 115 minutes\nLanguage: Japanese\nKeywords: missing person, japanese folklore\nGenres: Horror, Drama, Mystery\nActors: Fumiya Takahashi, Kōki, Rinka Otani, Riku Hagiwara, Haruka Imou\nDirectors: Takashi Shimizu\nProduction Companies: Toei Company\nSource: https://ushikubi-movie.jp/\n', metadata={'source': 'https://ushikubi-movie.jp/'}),
  Document(page_content='Title: Woman of the Photographs\nOverview: Solitary and skilled photographer Kai is afraid of women. When he encounters a beautiful model, Kyoko suffering from body dysmorphia, they begin a twisted romance

In [None]:
response = rag_chain_with_source.invoke(question)
print(response['answer'])

<class 'str'>


# Using an LLM to write Cypher queries

In [None]:
CYPHER_GENERATION_TEMPLATE = """
Task:Generate a Cypher statement to query a graph database.
Instructions: Use only the provided relationship types and properties 
in the schema. Do not use any other relationship types or properties that 
are not provided.
Schema: {schema}
Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than 
for you to construct a Cypher statement. Do not include any text except 
the generated Cypher statement. The output will be a list of python dictionaries.
Parse the output to get the answer to the question. For example, a response of
[{{'film.title': 'The OctoGames'}}] implies this is a film, with the title 'The
Octogames'.

Examples: Here are a few examples of generated Cypher 
statements for particular questions:

# Give me a list of all comedy films.
  MATCH (film:Film)-[r:HAS_GENRE]->(genre:Genre)
    WHERE genre.type = 'Comedy'
  RETURN film.title AS Comedy_Film

# What are all horror films produced after April 1st 2022?
MATCH (film:Film)-[:HAS_GENRE]->(genre:Genre{{type: 'Horror'}})
WHERE film.release_date > date('2023-04-01')
RETURN film.title as Horror_Film, film.release_date As Release_Date

# What are all films produced by A24?
MATCH (company:Production_Company)-[r:PRODUCED]->(film:Film)
WHERE company.name = 'A24'
RETURN film.title AS A24_Film

# Give me a list of all films associated with the movie 'Everything Everywhere All at Once'.
MATCH (start:Film {{title: 'Everything Everywhere All at Once'}})-[r:HAS_DIRECTED|STARRED_IN|PRODUCED*1..2]-(connected:Film)
WITH start, connected, r, type(head(r)) AS related_by
OPTIONAL MATCH (start)<-[:PRODUCED]-(p:Production_Company)-[:PRODUCED]->(connected)
OPTIONAL MATCH (start)<-[:STARRED_IN]-(a:Actor)-[:STARRED_IN]->(connected)
OPTIONAL MATCH (start)<-[:HAS_DIRECTED]-(d:Director)-[:HAS_DIRECTED]->(connected)
RETURN connected.title as Film_Associated_with_The_Menu, 
  related_by, 
  collect(distinct p.name) as production_companies, 
  collect(distinct a.name) as actors, 
  collect(distinct d.name) as directors
The question is:
{question}"""

CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"], 
    template=CYPHER_GENERATION_TEMPLATE
)

chain = GraphCypherQAChain.from_llm(
    ChatOpenAI(model='gpt-3.5-turbo-0125', temperature=0),
    graph=kg,
    verbose=True,
    cypher_prompt=CYPHER_GENERATION_PROMPT,
    validate_cypher=True,
)

In [None]:
def prettyCypherChain(question: str) -> str:
    response = chain.run(question)
    print(textwrap.fill(response, 90))
    return response

output = prettyCypherChain(
    "Give me a list of all films associated with the movie 'Everything Everywhere All at Once', along with how they are related.")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (start:Film {title: 'Everything Everywhere All at Once'})-[r:HAS_DIRECTED|STARRED_IN|PRODUCED*1..2]-(connected:Film)
WITH start, connected, r, type(head(r)) AS related_by
OPTIONAL MATCH (start)<-[:PRODUCED]-(p:Production_Company)-[:PRODUCED]->(connected)
OPTIONAL MATCH (start)<-[:STARRED_IN]-(a:Actor)-[:STARRED_IN]->(connected)
OPTIONAL MATCH (start)<-[:HAS_DIRECTED]-(d:Director)-[:HAS_DIRECTED]->(connected)
RETURN connected.title as Film_Associated_with_The_Menu, 
  related_by, 
  collect(distinct p.name) as production_companies, 
  collect(distinct a.name) as actors, 
  collect(distinct d.name) as directors[0m
Full Context:
[32;1m[1;3m[{'Film_Associated_with_The_Menu': 'Past Lives', 'related_by': 'PRODUCED', 'production_companies': ['A24'], 'actors': [], 'directors': []}, {'Film_Associated_with_The_Menu': 'The Zone of Interest', 'related_by': 'PRODUCED', 'production_companies': ['A24'], 'acto

# Agent

In [27]:
import pprint
from langchain.agents import AgentExecutor
from langchain.tools.retriever import create_retriever_tool
from langchain.agents import create_openai_tools_agent
from langchain_core.prompts import ChatPromptTemplate
from langchain.prompts import HumanMessagePromptTemplate, MessagesPlaceholder
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.callbacks.streaming_stdout_final_only import FinalStreamingStdOutCallbackHandler

import sys


class CallbackHandler(StreamingStdOutCallbackHandler):
    def __init__(self):
        self.content: str = ""
        self.final_answer: bool = False

    def on_llm_new_token(self, token: str, **kwargs: any) -> None:
        self.content += token
        if "Final Answer" in self.content:
            # now we're in the final answer section, but don't print yet
            self.final_answer = True
            self.content = ""
        if self.final_answer:
            if '"action_input": "' in self.content:
                if token not in ["}"]:
                    sys.stdout.write(token)  # equal to `print(token, end="")`
                    sys.stdout.flush()

retrieval_query_window = """
WITH 
    node, score
RETURN "Title: " + node.title + "\n" + 
        "Overview: " + node.overview  + "\n" + 
        "Release Date: " + node.release_date  + "\n" + 
        "Runtime: " + node.runtime + " minutes" + "\n" + 
        "Language: " + node.language  + "\n" + 
        "Keywords: " + node.keywords  + "\n" + 
        "Genres: " + node.genres + "\n" +
        "Actors: " + node.actors + "\n" +
        "Directors: " + node.directors + "\n" +
        "Production Companies: " + node.production_companies + "\n" +
        "Source: " + node.source  + "\n"
        as text, score, node {.source} AS metadata
"""

vector_store_window = Neo4jVector.from_existing_graph(
    embedding=OpenAIEmbeddings(),
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
    index_name="film_index",
    node_label="Film",
    text_node_properties=["overview", "keywords", "language", "release_date", "runtime", "actors", "directors", "genres", "production_companies"],
    embedding_node_property="embedding",
    search_type="hybrid",
    retrieval_query=retrieval_query_window
)

# Create a retriever from the vector store
retriever_window = vector_store_window.as_retriever(search_kwargs={"k": 10})

# Create tool from retriever
retriever_tool = create_retriever_tool(
    retriever_window,
    "movie_data",
    """
    Use this tool to get films to recommend to the user.
    """
)

tools = [retriever_tool]

chat_model = ChatOpenAI(
    model='gpt-3.5-turbo-0125', 
    temperature=0.5, 
    streaming=True,
    # callbacks=[CallbackHandler()]
    )
# chat_model = ChatOpenAI(model='gpt-4-0125-preview', temperature=0.5, streaming=True)

prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            'system',
            """
            Your goal is to recommend films to users based on their query and the retrieved context. Only recommend films 
            shown in your context. If a film doesn't seem relevant, omit it from your response. You should recommend no more than five films. 
            Your recommendation should be original, insightful, and at least two to three sentences long. Determine whether the query
            would best be answered using full text search or semantic search before picking your tool.

            # TEMPLATE FOR OUTPUT
            - [Title of Film]:
                - Runtime:
                - Release Date:
                - Recommendation: 
                - Source: 
            ...
             
            """            
        ),
        MessagesPlaceholder(variable_name='chat_history', optional=True),
        HumanMessagePromptTemplate(prompt=PromptTemplate(
            input_variables=['input'], template='{input}')),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

agent = create_openai_tools_agent(chat_model.with_config(
    {"tags": ["agent_llm"]}), tools, prompt_template)

agent_executor = AgentExecutor(
    agent=agent, 
    tools=tools, 
    verbose=True,
    return_intermediate_steps=True).with_config(
        {"run_name": "Agent"})

agent_executor.invoke(
    {"input": "Aliens"})

Failed to write data to connection ResolvedIPv4Address(('34.66.78.163', 7687)) (ResolvedIPv4Address(('34.66.78.163', 7687)))
Failed to write data to connection IPv4Address(('4c38bf4f.databases.neo4j.io', 7687)) (ResolvedIPv4Address(('34.66.78.163', 7687)))




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `movie_data` with `{'query': 'Aliens'}`


[0m[36;1m[1;3mTitle: Doors
Release Date: 2021-03-23
Runtime: 84 minutes
Language: English
Keywords: portal, anthology
Genres: Science Fiction
Actors: Kyp Malone, Wilson Bethel, Jordan Rock, Lina Esco, Josh Peck
Directors: Saman Kesh, Jeff Desom, Dugan O'Neal
Production Companies: Bloody Disgusting, BoulderLight Pictures, Going Rogue
Source: Unknown
[0m[32;1m[1;3m- Doors:
    - Runtime: 84 minutes
    - Release Date: 2021-03-23
    - Recommendation: "Doors" explores the sudden appearance of mysterious alien portals around the globe, creating a sense of intrigue and mystery similar to the theme of aliens. The film delves into mankind's efforts to understand the cosmic anomalies, offering a unique perspective on extraterrestrial encounters.
    - Source: Unknown[0m

[1m> Finished chain.[0m


{'input': 'Aliens',
 'output': '- Doors:\n    - Runtime: 84 minutes\n    - Release Date: 2021-03-23\n    - Recommendation: "Doors" explores the sudden appearance of mysterious alien portals around the globe, creating a sense of intrigue and mystery similar to the theme of aliens. The film delves into mankind\'s efforts to understand the cosmic anomalies, offering a unique perspective on extraterrestrial encounters.\n    - Source: Unknown',
 'intermediate_steps': [(OpenAIToolAgentAction(tool='movie_data', tool_input={'query': 'Aliens'}, log="\nInvoking: `movie_data` with `{'query': 'Aliens'}`\n\n\n", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_Br6yICQwdfdf31lO00sCd7p5', 'function': {'arguments': '{"query":"Aliens"}', 'name': 'movie_data'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls'})], tool_call_id='call_Br6yICQwdfdf31lO00sCd7p5'),

In [None]:
agent_executor.invoke(
    {"input": "Aliens"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `movie_data` with `{'query': 'Aliens'}`


[0m[36;1m[1;3mTitle: Doors
Release Date: 2021-03-23
Runtime: 84 minutes
Language: English
Keywords: portal, anthology
Genres: Science Fiction
Actors: Jordan Rock, Wilson Bethel, Kyp Malone, Lina Esco, Josh Peck
Directors: Jeff Desom, Saman Kesh, Dugan O'Neal
Production Companies: Bloody Disgusting, BoulderLight Pictures, Going Rogue
Source: movie_collection_data.csv


Title: Phineas and Ferb The Movie: Candace Against the Universe
Overview: Phineas and Ferb travel across the galaxy to rescue their older sister Candace, who has been abducted by aliens and taken to a utopia in a far-off planet, free of her pesky little brothers.
Release Date: 2020-08-28
Runtime: 84 minutes
Language: English
Keywords: cartoon
Genres: Family, Animation, Science Fiction, Comedy, TV Movie
Actors: David Errigo Jr., Ashley Tisdale, Dan Povenmire, Vincent Martella, Olivia Olson
Directors: Dan Pov

{'input': 'Aliens',
 'output': '- Doors:\n    - Runtime: 84 minutes\n    - Release Date: 2021-03-23\n    - Recommendation: "Doors" explores the sudden appearance of mysterious alien portals around the globe, leading mankind on a quest to unravel their purpose. This science fiction film delves into the unknown and the collaborative efforts of humanity to understand cosmic anomalies.\n    - Source: movie_collection_data.csv\n\n- Proximity:\n    - Runtime: 119 minutes\n    - Release Date: 2020-05-15\n    - Recommendation: "Proximity" follows a NASA JPL scientist\'s abduction by extraterrestrials and his relentless pursuit of proof to validate his experience. This gripping science fiction drama delves into themes of discovery and the quest for truth in the face of skepticism.\n    - Source: https://www.proximitymovie.com/\n\n- Mount Adams:\n    - Runtime: 94 minutes\n    - Release Date: 2021-07-21\n    - Recommendation: In "Mount Adams," UFO investigators confront alien monsters on the slo

In [None]:
from chat_app import FilmSearch

agent = FilmSearch()

response = agent.ask(
    {"input": "Recommend me some Japanese horror films from 2023."})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `movie_data` with `{'query': 'Japanese horror films 2023'}`


[0m[36;1m[1;3mTitle: Godzilla Minus One
Overview: Postwar Japan is at its lowest point when a new crisis emerges in the form of a giant monster, baptized in the horrific power of the atomic bomb.
Release Date: 2023-11-03
Runtime: 125 minutes
Language: Japanese
Keywords: monster, loss of loved one, giant monster, kamikaze, duty, atomic bomb test, radioactivity, naval combat, reboot, kaiju, duringcreditsstinger, post war japan, 1940s, naval battle, naval battleship, found family, godzilla, war orphan, survivor guilt, tokusatsu
Source: https://tickets.godzilla.com
Score: 1.0


Title: Ox-Head Village
Overview: Having launched a social media prank about a haunted building, three girls suddenly vanish. Rumours circulate that they were victims of The Ox-Head Village curse, triggering an investigation by two of their friends, desperate to find the truth about

In [None]:
print((response['output']))

- Dogman:
    - Runtime: 115 minutes
    - Release Date: September 27, 2023
    - Recommendation: "Dogman" is a heartfelt film that explores themes of love and salvation, similar to the emotional depth often found in Wes Anderson's movies. The bond between the boy and his dogs resonates with the quirky and touching relationships characteristic of Anderson's films.
    - Source: [Dogman Website](https://www.dogmanthefilm.com/)

- The Fabelmans:
    - Runtime: 151 minutes
    - Release Date: November 11, 2022
    - Recommendation: "The Fabelmans" delves into family dynamics and coming-of-age themes, reminiscent of Wes Anderson's storytelling style. The exploration of family secrets and the power of films to reveal truths aligns with the introspective and whimsical narratives often seen in Anderson's works.
    - Source: [The Fabelmans Website](https://www.thefabelmans.movie)


In [None]:
for chunk in agent_executor.stream({"input": "Recommend some films similar to Barbie."}):
    print(chunk, end="", flush=True)



[1m> Entering new AgentExecutor chain...[0m
{'actions': [OpenAIToolAgentAction(tool='movie_data', tool_input={'query': 'Barbie'}, log="\nInvoking: `movie_data` with `{'query': 'Barbie'}`\n\n\n", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_UjYry3V8x5GwvxSL1ZnOq7FI', 'function': {'arguments': '{"query":"Barbie"}', 'name': 'movie_data'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls'})], tool_call_id='call_UjYry3V8x5GwvxSL1ZnOq7FI')], 'messages': [AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_UjYry3V8x5GwvxSL1ZnOq7FI', 'function': {'arguments': '{"query":"Barbie"}', 'name': 'movie_data'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls'})]}[32;1m[1;3m
Invoking: `movie_data` with `{'query': 'Barbie'}`


[0m[36;1m[1;3mTitle: Barbie
Overview: Barbie and Ken are having the time of their lives in the colorful and seemingly perfect world of Barb

# Hybrid Approach v2

In [None]:
from langchain.chains import RetrievalQA

vector_qa = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(model='gpt-3.5-turbo-0125',
                   temperature=0.5, streaming=True),
    chain_type="stuff",
    retriever=vector_store_window.as_retriever()
)

vector_qa.run(
    "Recommend me some Japanese horror films."
)

In [None]:
kg.refresh_schema()

cypher_chain = GraphCypherQAChain.from_llm(
    cypher_llm=ChatOpenAI(temperature=0, model_name='gpt-4-0125-preview'),
    qa_llm=ChatOpenAI(model='gpt-3.5-turbo-0125', temperature=0), graph=kg, verbose=True,
)

cypher_chain.run(
    "What are the titles of all of the A24 films you have?"
)

In [None]:
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType

tools = [
    Tool(
        name="General_Recommendations",
        func=vector_qa.run,
        description="""Useful when you need to recommend films based on keywords,
        titles, or overview.
        Not useful if the user is asking for specific production companies,
        actors, or directors.
        Use full question as input.
        """,
    ),
    Tool(
        name="Specific_Recommedations",
        func=cypher_chain.run,
        description="""Useful when you need to recommend films based on specifics
        such as actors, production companies, or directors. Also useful for any sort of 
        aggregation like counting the number of tasks, etc.
        Use full question as input.
        """,
    ),
]

mrkl = initialize_agent(
    tools,
    ChatOpenAI(temperature=0, model_name='gpt-4-0125-preview'),
    agent=AgentType.OPENAI_FUNCTIONS, verbose=True
)

response = mrkl.run("I like Wes Anderson. Recommend me some films.")
print(response)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Specific_Recommedations` with `I like Wes Anderson. Recommend me some films.`


[0m

[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (:Director {name: "Wes Anderson"})-[:HAS_DIRECTED]->(film:Film)
RETURN film.title AS RecommendedFilms
[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m
[33;1m[1;3mI don't know the answer.[0m

Failed to write data to connection IPv4Address(('4c38bf4f.databases.neo4j.io', 7687)) (ResolvedIPv4Address(('34.66.78.163', 7687)))
Failed to write data to connection ResolvedIPv4Address(('34.66.78.163', 7687)) (ResolvedIPv4Address(('34.66.78.163', 7687)))


KeyboardInterrupt: 