In [None]:
!python -m pip install langchain-community

In [None]:
!python -m pip install langchain

In [None]:
!python -m pip install langchain-openai

In [None]:
!pip install neo4j

In [None]:
!pip install chainlit

In [44]:
import os
from dotenv import load_dotenv
from neo4j import GraphDatabase

# Load the environment variables from the .env file
load_dotenv()

# Now, the environment variables are available in os.environ
NEO4J_URI = os.getenv("NEO4J_URI")
NEO4J_USERNAME = os.getenv("NEO4J_USERNAME")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
NEO4J_DATABASE = os.getenv("NEO4J_DATABASE")

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENAI_ENDPOINT = os.getenv("OPENAI_ENDPOINT")

# Warning control
import warnings
warnings.filterwarnings("ignore")

print("OK")

OK


# 1. Load graph from json

In [45]:
import json

# Path to the JSON file
file_path = "data/graph_data.json"

# Load JSON data from the file
try:
    with open(file_path, 'r') as file:
         graph_data = json.load(file)
    print("JSON data successfully loaded.")
    print(graph_data)  # Optional: Print the loaded data to verify
except FileNotFoundError:
       print(f"File not found: {file_path}")
except json.JSONDecodeError as e:
       print(f"Error decoding JSON: {e}")

JSON data successfully loaded.
{'nodes': [{'id': 'P1', 'type': 'Person', 'name': 'Alice Smith'}, {'id': 'P2', 'type': 'Person', 'name': 'Bob Johnson'}, {'id': 'P3', 'type': 'Person', 'name': 'Carol Williams'}, {'id': 'P4', 'type': 'Person', 'name': 'David Brown'}, {'id': 'P5', 'type': 'Person', 'name': 'Eve Jones'}, {'id': 'P6', 'type': 'Person', 'name': 'Frank Garcia'}, {'id': 'P7', 'type': 'Person', 'name': 'Grace Miller'}, {'id': 'P8', 'type': 'Person', 'name': 'Henry Davis'}, {'id': 'P9', 'type': 'Person', 'name': 'Irene Martinez'}, {'id': 'P10', 'type': 'Person', 'name': 'Jack Wilson'}, {'id': 'P11', 'type': 'Person', 'name': 'Karen Anderson'}, {'id': 'P12', 'type': 'Person', 'name': 'Larry Thomas'}, {'id': 'P13', 'type': 'Person', 'name': 'Mary Taylor'}, {'id': 'P14', 'type': 'Person', 'name': 'Nancy Moore'}, {'id': 'P15', 'type': 'Person', 'name': 'Oscar Jackson'}, {'id': 'P16', 'type': 'Person', 'name': 'Paul Martin'}, {'id': 'P17', 'type': 'Person', 'name': 'Queen Lee'}, {'id'

# 2. Pre-process graph data and load them into Neo4j Graph Database

# You should get the following Graph in Neo4j Desktop, run the command "match(n) return n" (see Fig below)
![image.png](attachment:6f00eebf-c4bd-46aa-81cf-062a350d468a.png)

In [46]:
from utils.utilsv2 import update_json, GraphLoader, GraphSDK

# Load the graph data
graph_data = update_json(graph_data)
graph_loader = GraphLoader(NEO4J_URI, NEO4J_USERNAME, NEO4J_PASSWORD, NEO4J_DATABASE)
graph_loader.load_data(graph_data)
graph_loader.close()

print("OK")

OK


# 3. Add Embedding to help answer Q2

In [47]:
query = """
  CREATE VECTOR INDEX doc_tagline_embeddings IF NOT EXISTS
  FOR (n:Document) ON (n.taglineEmbedding)
  OPTIONS {
    indexConfig: {
      `vector.dimensions`: 1536,
      `vector.similarity_function`: 'cosine'
    }
  }
"""
parameters = {}
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))
with driver.session(database=NEO4J_DATABASE) as session:
     session.run(query, **parameters)

query = """
        MATCH (doc:Document) WHERE doc.tagline <> 'NONE'
        WITH doc, genai.vector.encode(
             doc.tagline, 
             "OpenAI", 
             {
              token: $openAiApiKey,
              endpoint: $openAiEndpoint
             }) AS vector
        CALL db.create.setNodeVectorProperty(doc, "taglineEmbedding", vector)
        """ 

parameters = {"openAiApiKey":OPENAI_API_KEY, "openAiEndpoint": OPENAI_ENDPOINT}
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))
with driver.session(database=NEO4J_DATABASE) as session:
     session.run(query, **parameters)

print("OK")

OK


# 4. Build Minimal SDK for Querying Graph

In [48]:
graph_sdk = GraphSDK(NEO4J_URI, NEO4J_USERNAME, NEO4J_PASSWORD, NEO4J_DATABASE)

print("OK")

OK


# First we answer Questions Q1-Q4 using only Neo4J Query and later we use Prompt Completion with LLM

#  Neo4J Queries:

# Q1. Which authors have collaborated with Alice Smith on any papers?

In [49]:
query = """
        MATCH (author1:Person {type: "Person", name: $author_name})-[:is_author_of]->(doc:Document)
        WHERE doc.type IN ["Paper", "Wikipage"]
        MATCH (doc)<-[:is_author_of]-(author2:Person {type: "Person"})
        WHERE author1 <> author2
        WITH COLLECT(DISTINCT author2.name) AS collaborators
        RETURN 'The collaborators of ' + $author_name + ' are: ' + REDUCE(s = '', name IN collaborators | s + CASE s WHEN '' THEN '' ELSE ', ' END + name)
        """
collaborators = graph_sdk.execute_query(query=query, parameters={"author_name": "Alice Smith"})
print(collaborators)

The collaborators of Alice Smith are: Bob Johnson, Carol Williams, Eve Jones, Tom Lewis


# Q2. What is the most influential papers in “Symbolic Artificial Intelligence”?

### Citations and Similarity score query

In [50]:
question = "Symbolic Artificial Intelligence. symbolic AI. symbolic machine learning. symbolic ML."

query = """
WITH genai.vector.encode(
    $question, 
    "OpenAI", 
    {
      token: $openAiApiKey,
      endpoint: $openAiEndpoint
    }
) AS question_embedding
CALL db.index.vector.queryNodes(
    'doc_tagline_embeddings', 
    $top_k, 
    question_embedding
) YIELD node AS doc, score
WHERE score > $similarity_threshold AND doc.type = 'Paper'
MATCH (doc)<-[r]-(citation)
WHERE type(r) IN ['cited', 'mention_concept']
WITH doc, score, COUNT(r) AS CitationCount
ORDER BY score DESC, CitationCount DESC
RETURN '[' + apoc.text.join(
    COLLECT(
        '{"id": "' + doc.id + '", "tagline": "' + doc.tagline + '", "similarity_score": ' + toString(score) + ', "CitationCount": ' + toString(CitationCount) + '}' 
    ), 
    ','
) + ']' AS output

"""
parameters = {
    "openAiApiKey": OPENAI_API_KEY,
    "openAiEndpoint": OPENAI_ENDPOINT,
    "question": question,
    "top_k": 100,  # Ensure it's large enough to capture all relevant candidates
    "similarity_threshold": 0.91  # Passing the similarity threshold as a parameter
}

number_of_cited_similarity_scores = graph_sdk.execute_query(query=query, parameters = parameters)
number_of_cited_similarity_scores

'[{"id": "Pa6", "tagline": " Paper Title: Advancements in Symbolic Machine Learning. Abstract: An analysis of recent advancements in symbolic machine learning, including inductive logic programming and relational learning. ", "similarity_score": 0.9351348876953125, "CitationCount": 1},{"id": "Pa1", "tagline": " Paper Title: Symbolic Reasoning in AI Systems. Abstract: This paper discusses the implementation of symbolic reasoning methods in artificial intelligence systems, focusing on knowledge representation and inference mechanisms. ", "similarity_score": 0.93121337890625, "CitationCount": 3},{"id": "Pa4", "tagline": " Paper Title: Combining Symbolic and Subsymbolic Methods. Abstract: This paper investigates the integration of symbolic and subsymbolic approaches to enhance AI system capabilities. ", "similarity_score": 0.9283599853515625, "CitationCount": 2},{"id": "Pa2", "tagline": " Paper Title: A Survey of Knowledge Representation Techniques. Abstract: This survey reviews various kn

# Q3. What is the expertise of “Henry Davis”?

In [51]:
full_name = "Henry Davis"
query = """
        MATCH (person:Person {name: $full_name})-[:is_author_of]->(doc:Document)
        WITH COLLECT(doc.tagline) AS taglines
        RETURN COALESCE(REDUCE(s = '', tagline IN taglines | s + ' ' + tagline), 'No taglines available') AS result
"""
expertise = graph_sdk.execute_query(query=query, parameters={"full_name": full_name})
print(f"Expertise of {full_name}: ", expertise)

Expertise of Henry Davis:    Paper Title: Combining Symbolic and Subsymbolic Methods. Abstract: This paper investigates the integration of symbolic and subsymbolic approaches to enhance AI system capabilities.   Wikipage Title: Expert Systems. Abstract: Expert systems are AI programs that simulate the judgment and behavior of a human or an organization that has expert knowledge and experience in a particular field. 


# Q4. What is the focus of the "Journal of Artificial Intelligence Research” and how does it differentiate from other journals?

In [52]:
query = """
MATCH (p:Paper)-[:was_published_in]->(j:Journal)
WITH 
    j.name AS journal_name,
    COLLECT(p.tagline) AS paper_taglines
RETURN 
    apoc.text.join(COLLECT(
        CASE 
            WHEN journal_name = $journal_name THEN 
                'Papers and Abstracts from "' + $journal_name + '":\n' + 
                apoc.text.join(paper_taglines, ' \n') + '\n'
            ELSE 
                'Other Journal: ' + journal_name + '\nPapers and Abstracts:\n' + 
                apoc.text.join(paper_taglines, ' \n') + '\n'
        END
    ), '\n') AS result

"""

journal_name = "Journal of Artificial Intelligence Research"
journal_focus_other_journals = graph_sdk.execute_query(query=query, parameters={"journal_name": journal_name})
journal_focus_other_journals

'Papers and Abstracts from "Journal of Artificial Intelligence Research":\n Paper Title: Symbolic Reasoning in AI Systems. Abstract: This paper discusses the implementation of symbolic reasoning methods in artificial intelligence systems, focusing on knowledge representation and inference mechanisms.  \n Paper Title: Combining Symbolic and Subsymbolic Methods. Abstract: This paper investigates the integration of symbolic and subsymbolic approaches to enhance AI system capabilities.  \n Paper Title: Rule-Based Systems in Modern AI. Abstract: An examination of rule-based systems and their applications in current artificial intelligence research. \n\nOther Journal: AI Magazine\nPapers and Abstracts:\n Paper Title: A Survey of Knowledge Representation Techniques. Abstract: This survey reviews various knowledge representation techniques used in symbolic AI, including semantic networks, frames, and ontologies.  \n Paper Title: Expert Systems: Principles and Programming. Abstract: A comprehen

# Prompt Completion with LLM:

# Q1. Which authors have collaborated with Alice Smith on any papers?

For this question just use Neo4J query only, see above, because of simplicity I did not use LLM completion.

# Q2. What is the most influential papers in Symbolic Artificial Intelligence?

In [54]:
# Template for most influential paper
def get_most_influential_paper_template(topic="Symbolic Artificial Intelligence"):
    template = """
       Given the following dataset of papers with their respective `CitationCount` and `similarity_score`, select the paper that is considered the most influential in the field of """ + topic + """ using the following rules:

       1. Identify the paper with the **highest `CitationCount`**.
       2. If two or more papers have the same highest `CitationCount`, select the paper with the **highest `similarity_score`** among them.

       Dataset:
       {papers}

       Write the output of the selected paper in a few sentences. Provide and describe similarity score and citation count

    """
    return template

# Use the template and generate the output
topic = "Symbolic Artificial Intelligence"
template = get_most_influential_paper_template(topic)

# Generate the most influential paper output
influential_paper = graph_sdk.generate_wrapped_text(template, number_of_cited_similarity_scores)
print("Q2,  the most influential papers in topic of 'Symbolic Artificial Intelligence':")
print(influential_paper)

Q2,  the most influential papers in topic of 'Symbolic Artificial Intelligence':
The paper with the highest citation count and highest similarity score is "Symbolic Reasoning in AI
Systems". It has a citation count of 3 and a similarity score of 0.93121337890625. This paper
discusses the implementation of symbolic reasoning methods in artificial intelligence systems,
focusing on knowledge representation and inference mechanisms. It is considered the most influential
paper in the field of Symbolic Artificial Intelligence due to its high citation count and its
comprehensive analysis of symbolic reasoning methods.


# Q3. What is the expertise of Henry Davis?

In [55]:
# expertise
def get_expertise_template(author_name):
    template = """
       Based on the provided taglines, abstracts, and titles authored by """ + author_name + """, identify his area of expertise. Analyze the content to determine the common themes, methodologies, or domains of knowledge he works on. Provide a concise summary of his expertise.

    {text_input}

    Answer:
    """
    return template

author_name = "Henry Davis"
template = get_expertise_template(author_name)

output = graph_sdk.generate_wrapped_text(template, expertise)
print(f"Q3. Expertise of {author_name}:")
print(output)

Q3. Expertise of Henry Davis:

Henry Davis is an expert in the field of artificial intelligence, specifically in the integration
of symbolic and subsymbolic methods. His work focuses on enhancing AI system capabilities through
the combination of these two approaches. He also has expertise in expert systems, which are AI
programs that simulate human judgment and behavior in a specific field. Davis' research likely
involves the use of both theoretical and practical methodologies to develop and improve these
systems.


# Q4. What is the focus of the "Journal of Artificial Intelligence Research” and how does it differentiate from other journals?

In [56]:
def get_journal_analysis_template(journal_name: str) -> str:
    """
    Generate a template for analyzing the focus and differentiation of a journal.

    Args:
        journal_name (str): The name of the journal to analyze.

    Returns:
        str: A formatted template for use in the LLM prompt.
    """
    template = f"""
        You are an expert in academic research analysis. Based on the provided titles and abstracts of research papers, 
        analyze the focus of the journal '{journal_name}' and how it differentiates itself from other journals. 
        Highlight its key themes, areas of emphasis, and any distinguishing characteristics.

        Journal Data:
        {{input_text}}

        Answer:
    """
    return template

# Example usage
journal_name = 'Journal of Artificial Intelligence Research'
journal_analysis_template =  get_journal_analysis_template(journal_name)

journal_focus = graph_sdk.generate_wrapped_text(journal_analysis_template, 
                                                journal_focus_other_journals,
                                                temperature = 0, 
                                                max_tokens = 400)
print("Q4. The focus of the 'Journal of Artificial Intelligence Research' and how does it differentiate from other journals:")
print(journal_focus)

Q4. The focus of the 'Journal of Artificial Intelligence Research' and how does it differentiate from other journals:

The Journal of Artificial Intelligence Research (JAIR) focuses on the integration of symbolic and
subsymbolic methods in artificial intelligence systems. This is evident in the titles and abstracts
of the papers published in the journal, which all revolve around this theme. The journal also places
a strong emphasis on knowledge representation and inference mechanisms, as seen in the first two
paper titles and abstracts. This suggests that JAIR is interested in exploring how symbolic and
subsymbolic methods can be used to represent and reason with knowledge in AI systems.

One key
distinguishing characteristic of JAIR is its focus on rule-based systems. This is evident in the
third paper title and abstract, which specifically mentions rule-based systems and their
applications in current AI research. This sets JAIR apart from other journals, as it highlights the
importan

# END OF FINAL SOLUTION

In [57]:
from langchain_neo4j import Neo4jGraph

# Provide the necessary credentials to connect to your Neo4j database
graph = Neo4jGraph(
    url = NEO4J_URI,
    username = NEO4J_USERNAME,
    password = NEO4J_PASSWORD,
    database = NEO4J_DATABASE
)

# Refresh and print the schema
graph.refresh_schema()
print(graph.schema)

Node properties:
Person {id: STRING, name: STRING, tagline: STRING, type: STRING}
Entity {id: STRING, name: STRING, tagline: STRING, type: STRING, taglineEmbedding: LIST}
Journal {id: STRING, name: STRING, tagline: STRING, type: STRING, taglineEmbedding: LIST}
Document {id: STRING, name: STRING, tagline: STRING, type: STRING, taglineEmbedding: LIST}
Paper {id: STRING, name: STRING, tagline: STRING, type: STRING, taglineEmbedding: LIST}
Wikipage {id: STRING, name: STRING, tagline: STRING, type: STRING, taglineEmbedding: LIST}
Relationship properties:

The relationships:
(:Person)-[:is_author_of]->(:Entity)
(:Person)-[:is_author_of]->(:Document)
(:Person)-[:is_author_of]->(:Paper)
(:Person)-[:is_author_of]->(:Wikipage)
(:Entity)-[:is_author_of]->(:Entity)
(:Entity)-[:is_author_of]->(:Document)
(:Entity)-[:is_author_of]->(:Paper)
(:Entity)-[:is_author_of]->(:Wikipage)
(:Entity)-[:was_published_in]->(:Entity)
(:Entity)-[:was_published_in]->(:Journal)
(:Entity)-[:was_published_in]->(:Docume

In [None]:
!pip install langchain_neo4j

In [None]:
examples = [
    {
        "question": "Which authors have collaborated with Alice Smith on any papers?",
        "query": """MATCH (author1:Person {type: "Person", name: 'Alice Smith'})-[:is_author_of]->(doc:Document)
        WHERE doc.type IN ["Paper", "Wikipage"]
        MATCH (doc)<-[:is_author_of]-(author2:Person {type: "Person"})
        WHERE author1 <> author2
        WITH COLLECT(DISTINCT author2.name) AS collaborators
        RETURN 'The collaborators of ' + $author_name + ' are: ' + REDUCE(s = '', name IN collaborators | s + CASE s WHEN '' THEN '' ELSE ', ' END + name)
        """,
    },
    {
        "question": "What is the expertise of 'Henry Davis'?",
        "query": """
        MATCH (person:Person {name: 'Henry Davis'})-[:is_author_of]->(doc:Document)
        WITH COLLECT(doc.tagline) AS taglines
        RETURN COALESCE(REDUCE(s = '', tagline IN taglines | s + ' ' + tagline), 'No taglines available') AS result
        """,
    },
    {
        "question": "What is the focus of the 'Journal of Artificial Intelligence Research' and how does it differentiate from other journals?",
        "query": """
        MATCH (p:Paper)-[:was_published_in]->(j:Journal)
        WITH 
           j.name AS journal_name,
           COLLECT(p.tagline) AS paper_taglines
        RETURN 
        apoc.text.join(COLLECT(
        CASE 
            WHEN journal_name = 'Journal of Artificial Intelligence Research' THEN 
                'Papers and Abstracts from "' + 'Journal of Artificial Intelligence Research' + '":\n' + 
                apoc.text.join(paper_taglines, ' \n') + '\n'
            ELSE 
                'Other Journal: ' + journal_name + '\nPapers and Abstracts:\n' + 
                apoc.text.join(paper_taglines, ' \n') + '\n'
        END
        ), '\n') AS result
        """,
    },
]   

In [59]:
examples = [
    {
        "question": "How many authors are there?",
        "query": "MATCH (a:Person)-[:is_author_of]->(:Paper) RETURN count(DISTINCT a)",
    },
    {
        "question": "Which author published the paper Combining Symbolic and Subsymbolic Methods?",
        "query": "MATCH (p:Paper {{name: 'Combining Symbolic and Subsymbolic Methods'}})<-[:is_author_of]-(a) RETURN a.name",
    },
    {
        "question": "How many papers has Alice Smith published?",
        "query": "MATCH (a:Person {{name: 'Alice Smith'}})-[:is_author_of]->(p:Paper) RETURN count(p)",
    }
]

print("OK")

OK


In [60]:
from langchain_neo4j import GraphCypherQAChain
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
chain = GraphCypherQAChain.from_llm(
    graph=graph,
    llm=llm,
    exclude_types=["Genre"],
    verbose=True,
    allow_dangerous_requests=True,
)

In [61]:
print(chain.graph_schema)

Node properties are the following:
Person {id: STRING, name: STRING, tagline: STRING, type: STRING},Entity {id: STRING, name: STRING, tagline: STRING, type: STRING, taglineEmbedding: LIST},Journal {id: STRING, name: STRING, tagline: STRING, type: STRING, taglineEmbedding: LIST},Document {id: STRING, name: STRING, tagline: STRING, type: STRING, taglineEmbedding: LIST},Paper {id: STRING, name: STRING, tagline: STRING, type: STRING, taglineEmbedding: LIST},Wikipage {id: STRING, name: STRING, tagline: STRING, type: STRING, taglineEmbedding: LIST}
Relationship properties are the following:

The relationships are the following:
(:Person)-[:is_author_of]->(:Entity),(:Person)-[:is_author_of]->(:Document),(:Person)-[:is_author_of]->(:Paper),(:Person)-[:is_author_of]->(:Wikipage),(:Entity)-[:is_author_of]->(:Entity),(:Entity)-[:is_author_of]->(:Document),(:Entity)-[:is_author_of]->(:Paper),(:Entity)-[:is_author_of]->(:Wikipage),(:Entity)-[:was_published_in]->(:Entity),(:Entity)-[:was_published_i

In [62]:
from langchain_core.prompts import FewShotPromptTemplate, PromptTemplate

example_prompt = PromptTemplate.from_template(
    "User input: {question}\nCypher query: {query}"
)
prompt = FewShotPromptTemplate(
    examples=examples[:5],
    example_prompt=example_prompt,
    prefix="You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.\n\nHere is the schema information\n{schema}.\n\nBelow are a number of examples of questions and their corresponding Cypher queries.",
    suffix="User input: {question}\nCypher query: ",
    input_variables=["question", "schema"],
)

In [63]:
print(prompt.format(question="How many artists are there?", schema="foo"))

You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.

Here is the schema information
foo.

Below are a number of examples of questions and their corresponding Cypher queries.

User input: How many authors are there?
Cypher query: MATCH (a:Person)-[:is_author_of]->(:Paper) RETURN count(DISTINCT a)

User input: Which author published the paper Combining Symbolic and Subsymbolic Methods?
Cypher query: MATCH (p:Paper {name: 'Combining Symbolic and Subsymbolic Methods'})<-[:is_author_of]-(a) RETURN a.name

User input: How many papers has Alice Smith published?
Cypher query: MATCH (a:Person {name: 'Alice Smith'})-[:is_author_of]->(p:Paper) RETURN count(p)

User input: How many artists are there?
Cypher query: 


In [64]:
from langchain_core.example_selectors import SemanticSimilarityExampleSelector
from langchain_neo4j import Neo4jVector
from langchain_openai import OpenAIEmbeddings

example_selector = SemanticSimilarityExampleSelector.from_examples(
    examples,
    OpenAIEmbeddings(),
    Neo4jVector,
    k=5,
    input_keys=["question"],
)

In [65]:
example_selector.select_examples({"question": "how many artists are there?"})

[{'query': 'MATCH (a:Person)-[:is_author_of]->(:Paper) RETURN count(DISTINCT a)',
  'question': 'How many authors are there?'},
 {'query': "MATCH (a:Person {{name: 'Alice Smith'}})-[:is_author_of]->(p:Paper) RETURN count(p)",
  'question': 'How many papers has Alice Smith published?'},
 {'query': "MATCH (p:Paper {{name: 'Combining Symbolic and Subsymbolic Methods'}})<-[:is_author_of]-(a) RETURN a.name",
  'question': 'Which author published the paper Combining Symbolic and Subsymbolic Methods?'}]

In [66]:
prompt = FewShotPromptTemplate(
    example_selector=example_selector,
    example_prompt=example_prompt,
    prefix="You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.\n\nHere is the schema information\n{schema}.\n\nBelow are a number of examples of questions and their corresponding Cypher queries.",
    suffix="User input: {question}\nCypher query: ",
    input_variables=["question", "schema"],
)

In [67]:
print(prompt.format(question="how many authors are there?", schema="foo"))

You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.

Here is the schema information
foo.

Below are a number of examples of questions and their corresponding Cypher queries.

User input: How many authors are there?
Cypher query: MATCH (a:Person)-[:is_author_of]->(:Paper) RETURN count(DISTINCT a)

User input: How many papers has Alice Smith published?
Cypher query: MATCH (a:Person {name: 'Alice Smith'})-[:is_author_of]->(p:Paper) RETURN count(p)

User input: Which author published the paper Combining Symbolic and Subsymbolic Methods?
Cypher query: MATCH (p:Paper {name: 'Combining Symbolic and Subsymbolic Methods'})<-[:is_author_of]-(a) RETURN a.name

User input: how many authors are there?
Cypher query: 


In [68]:
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
chain = GraphCypherQAChain.from_llm(
    graph=graph,
    llm=llm,
    cypher_prompt=prompt,
    verbose=True,
    allow_dangerous_requests=True,
)

In [69]:
chain.invoke("How many authors are in the graph?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (a:Person) RETURN count(a)[0m
Full Context:
[32;1m[1;3m[{'count(a)': 20}][0m

[1m> Finished chain.[0m


{'query': 'How many authors are in the graph?',
 'result': 'There are 20 authors in the graph.'}

In [16]:
from langchain_neo4j import Neo4jGraph

graph = Neo4jGraph()

# Import movie information

movies_query = """
LOAD CSV WITH HEADERS FROM 
'https://raw.githubusercontent.com/tomasonjo/blog-datasets/main/movies/movies_small.csv'
AS row
MERGE (m:Movie {id:row.movieId})
SET m.released = date(row.released),
    m.title = row.title,
    m.imdbRating = toFloat(row.imdbRating)
FOREACH (director in split(row.director, '|') | 
    MERGE (p:Person {name:trim(director)})
    MERGE (p)-[:DIRECTED]->(m))
FOREACH (actor in split(row.actors, '|') | 
    MERGE (p:Person {name:trim(actor)})
    MERGE (p)-[:ACTED_IN]->(m))
FOREACH (genre in split(row.genres, '|') | 
    MERGE (g:Genre {name:trim(genre)})
    MERGE (m)-[:IN_GENRE]->(g))
"""

graph.query(movies_query)

[]

In [43]:
graph

<langchain_neo4j.graphs.neo4j_graph.Neo4jGraph at 0x14f18608920>

In [30]:
from langchain_neo4j import Neo4jGraph

# Provide the necessary credentials to connect to your Neo4j database
graph = Neo4jGraph(
    url = NEO4J_URI,
    username = NEO4J_USERNAME,
    password = NEO4J_PASSWORD,
    database = NEO4J_DATABASE
)

# Refresh and print the schema
graph.refresh_schema()
print(graph.schema)

Node properties:
Movie {id: STRING, released: DATE, title: STRING, imdbRating: FLOAT}
Person {name: STRING}
Genre {name: STRING}
Chunk {id: STRING, embedding: LIST, text: STRING, question: STRING, query: STRING}
Relationship properties:

The relationships:
(:Movie)-[:IN_GENRE]->(:Genre)
(:Person)-[:DIRECTED]->(:Movie)
(:Person)-[:ACTED_IN]->(:Movie)


In [31]:
graph.refresh_schema()
print(graph.schema)

Node properties:
Movie {id: STRING, released: DATE, title: STRING, imdbRating: FLOAT}
Person {name: STRING}
Genre {name: STRING}
Chunk {id: STRING, embedding: LIST, text: STRING, question: STRING, query: STRING}
Relationship properties:

The relationships:
(:Movie)-[:IN_GENRE]->(:Genre)
(:Person)-[:DIRECTED]->(:Movie)
(:Person)-[:ACTED_IN]->(:Movie)


In [32]:
from langchain_neo4j import GraphCypherQAChain
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
chain = GraphCypherQAChain.from_llm(
    graph=graph,
    llm=llm,
    exclude_types=["Genre"],
    verbose=True,
    allow_dangerous_requests=True,
)

In [33]:
print(chain.graph_schema)

Node properties are the following:
Movie {id: STRING, released: DATE, title: STRING, imdbRating: FLOAT},Person {name: STRING},Chunk {id: STRING, embedding: LIST, text: STRING, question: STRING, query: STRING}
Relationship properties are the following:

The relationships are the following:
(:Person)-[:DIRECTED]->(:Movie),(:Person)-[:ACTED_IN]->(:Movie)


In [34]:
examples = [
    {
        "question": "How many artists are there?",
        "query": "MATCH (a:Person)-[:ACTED_IN]->(:Movie) RETURN count(DISTINCT a)",
    },
    {
        "question": "Which actors played in the movie Casino?",
        "query": "MATCH (m:Movie {{title: 'Casino'}})<-[:ACTED_IN]-(a) RETURN a.name",
    },
    {
        "question": "How many movies has Tom Hanks acted in?",
        "query": "MATCH (a:Person {{name: 'Tom Hanks'}})-[:ACTED_IN]->(m:Movie) RETURN count(m)",
    },
    {
        "question": "List all the genres of the movie Schindler's List",
        "query": "MATCH (m:Movie {{title: 'Schindler\\'s List'}})-[:IN_GENRE]->(g:Genre) RETURN g.name",
    },
    {
        "question": "Which actors have worked in movies from both the comedy and action genres?",
        "query": "MATCH (a:Person)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g1:Genre), (a)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g2:Genre) WHERE g1.name = 'Comedy' AND g2.name = 'Action' RETURN DISTINCT a.name",
    },
    {
        "question": "Which directors have made movies with at least three different actors named 'John'?",
        "query": "MATCH (d:Person)-[:DIRECTED]->(m:Movie)<-[:ACTED_IN]-(a:Person) WHERE a.name STARTS WITH 'John' WITH d, COUNT(DISTINCT a) AS JohnsCount WHERE JohnsCount >= 3 RETURN d.name",
    },
    {
        "question": "Identify movies where directors also played a role in the film.",
        "query": "MATCH (p:Person)-[:DIRECTED]->(m:Movie), (p)-[:ACTED_IN]->(m) RETURN m.title, p.name",
    },
    {
        "question": "Find the actor with the highest number of movies in the database.",
        "query": "MATCH (a:Actor)-[:ACTED_IN]->(m:Movie) RETURN a.name, COUNT(m) AS movieCount ORDER BY movieCount DESC LIMIT 1",
    },
]

In [35]:
from langchain_core.prompts import FewShotPromptTemplate, PromptTemplate

example_prompt = PromptTemplate.from_template(
    "User input: {question}\nCypher query: {query}"
)
prompt = FewShotPromptTemplate(
    examples=examples[:5],
    example_prompt=example_prompt,
    prefix="You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.\n\nHere is the schema information\n{schema}.\n\nBelow are a number of examples of questions and their corresponding Cypher queries.",
    suffix="User input: {question}\nCypher query: ",
    input_variables=["question", "schema"],
)

In [36]:
print(prompt.format(question="How many artists are there?", schema="foo"))

You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.

Here is the schema information
foo.

Below are a number of examples of questions and their corresponding Cypher queries.

User input: How many artists are there?
Cypher query: MATCH (a:Person)-[:ACTED_IN]->(:Movie) RETURN count(DISTINCT a)

User input: Which actors played in the movie Casino?
Cypher query: MATCH (m:Movie {title: 'Casino'})<-[:ACTED_IN]-(a) RETURN a.name

User input: How many movies has Tom Hanks acted in?
Cypher query: MATCH (a:Person {name: 'Tom Hanks'})-[:ACTED_IN]->(m:Movie) RETURN count(m)

User input: List all the genres of the movie Schindler's List
Cypher query: MATCH (m:Movie {title: 'Schindler\'s List'})-[:IN_GENRE]->(g:Genre) RETURN g.name

User input: Which actors have worked in movies from both the comedy and action genres?
Cypher query: MATCH (a:Person)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g1:Genre), (a)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g2:Genre) WHERE g

In [37]:
from langchain_core.example_selectors import SemanticSimilarityExampleSelector
from langchain_neo4j import Neo4jVector
from langchain_openai import OpenAIEmbeddings

example_selector = SemanticSimilarityExampleSelector.from_examples(
    examples,
    OpenAIEmbeddings(),
    Neo4jVector,
    k=5,
    input_keys=["question"],
)

In [38]:
example_selector.select_examples({"question": "how many artists are there?"})

[{'query': 'MATCH (a:Person)-[:ACTED_IN]->(:Movie) RETURN count(DISTINCT a)',
  'question': 'How many artists are there?'},
 {'query': "MATCH (a:Person {{name: 'Tom Hanks'}})-[:ACTED_IN]->(m:Movie) RETURN count(m)",
  'question': 'How many movies has Tom Hanks acted in?'},
 {'query': "MATCH (a:Person)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g1:Genre), (a)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g2:Genre) WHERE g1.name = 'Comedy' AND g2.name = 'Action' RETURN DISTINCT a.name",
  'question': 'Which actors have worked in movies from both the comedy and action genres?'},
 {'query': "MATCH (d:Person)-[:DIRECTED]->(m:Movie)<-[:ACTED_IN]-(a:Person) WHERE a.name STARTS WITH 'John' WITH d, COUNT(DISTINCT a) AS JohnsCount WHERE JohnsCount >= 3 RETURN d.name",
  'question': "Which directors have made movies with at least three different actors named 'John'?"},
 {'query': 'MATCH (a:Actor)-[:ACTED_IN]->(m:Movie) RETURN a.name, COUNT(m) AS movieCount ORDER BY movieCount DESC LIMIT 1',
  'question': 'Find th

In [39]:
prompt = FewShotPromptTemplate(
    example_selector=example_selector,
    example_prompt=example_prompt,
    prefix="You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.\n\nHere is the schema information\n{schema}.\n\nBelow are a number of examples of questions and their corresponding Cypher queries.",
    suffix="User input: {question}\nCypher query: ",
    input_variables=["question", "schema"],
)

In [40]:
print(prompt.format(question="how many artists are there?", schema="foo"))

You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.

Here is the schema information
foo.

Below are a number of examples of questions and their corresponding Cypher queries.

User input: How many artists are there?
Cypher query: MATCH (a:Person)-[:ACTED_IN]->(:Movie) RETURN count(DISTINCT a)

User input: How many movies has Tom Hanks acted in?
Cypher query: MATCH (a:Person {name: 'Tom Hanks'})-[:ACTED_IN]->(m:Movie) RETURN count(m)

User input: Which actors have worked in movies from both the comedy and action genres?
Cypher query: MATCH (a:Person)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g1:Genre), (a)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g2:Genre) WHERE g1.name = 'Comedy' AND g2.name = 'Action' RETURN DISTINCT a.name

User input: Which directors have made movies with at least three different actors named 'John'?
Cypher query: MATCH (d:Person)-[:DIRECTED]->(m:Movie)<-[:ACTED_IN]-(a:Person) WHERE a.name STARTS WITH 'John' WITH d, COUNT(DISTINC

In [41]:
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
chain = GraphCypherQAChain.from_llm(
    graph=graph,
    llm=llm,
    cypher_prompt=prompt,
    verbose=True,
    allow_dangerous_requests=True,
)

In [42]:
chain.invoke("How many actors are in the graph?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (a:Person)-[:ACTED_IN]->(:Movie) RETURN count(DISTINCT a)[0m
Full Context:
[32;1m[1;3m[{'count(DISTINCT a)': 967}][0m

[1m> Finished chain.[0m


{'query': 'How many actors are in the graph?',
 'result': 'There are 967 actors in the graph.'}