In [None]:
!python -m pip install langchain-community

In [None]:
!python -m pip install langchain

In [None]:
!python -m pip install langchain-openai

In [None]:
!pip install neo4j

In [None]:
!pip install chainlit

In [6]:
import os
from dotenv import load_dotenv
from neo4j import GraphDatabase

# Load the environment variables from the .env file
load_dotenv()

# Now, the environment variables are available in os.environ
NEO4J_URI = os.getenv("NEO4J_URI")
NEO4J_USERNAME = os.getenv("NEO4J_USERNAME")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
NEO4J_DATABASE = os.getenv("NEO4J_DATABASE")

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENAI_ENDPOINT = os.getenv("OPENAI_ENDPOINT")

# Warning control
import warnings
warnings.filterwarnings("ignore")

print("OK")

OK


# 1. Load graph from json

In [3]:
import json

# Path to the JSON file
file_path = "data/graph_data.json"

# Load JSON data from the file
try:
    with open(file_path, 'r') as file:
         graph_data = json.load(file)
    print("JSON data successfully loaded.")
    print(graph_data)  # Optional: Print the loaded data to verify
except FileNotFoundError:
       print(f"File not found: {file_path}")
except json.JSONDecodeError as e:
       print(f"Error decoding JSON: {e}")

JSON data successfully loaded.
{'nodes': [{'id': 'P1', 'type': 'Person', 'name': 'Alice Smith'}, {'id': 'P2', 'type': 'Person', 'name': 'Bob Johnson'}, {'id': 'P3', 'type': 'Person', 'name': 'Carol Williams'}, {'id': 'P4', 'type': 'Person', 'name': 'David Brown'}, {'id': 'P5', 'type': 'Person', 'name': 'Eve Jones'}, {'id': 'P6', 'type': 'Person', 'name': 'Frank Garcia'}, {'id': 'P7', 'type': 'Person', 'name': 'Grace Miller'}, {'id': 'P8', 'type': 'Person', 'name': 'Henry Davis'}, {'id': 'P9', 'type': 'Person', 'name': 'Irene Martinez'}, {'id': 'P10', 'type': 'Person', 'name': 'Jack Wilson'}, {'id': 'P11', 'type': 'Person', 'name': 'Karen Anderson'}, {'id': 'P12', 'type': 'Person', 'name': 'Larry Thomas'}, {'id': 'P13', 'type': 'Person', 'name': 'Mary Taylor'}, {'id': 'P14', 'type': 'Person', 'name': 'Nancy Moore'}, {'id': 'P15', 'type': 'Person', 'name': 'Oscar Jackson'}, {'id': 'P16', 'type': 'Person', 'name': 'Paul Martin'}, {'id': 'P17', 'type': 'Person', 'name': 'Queen Lee'}, {'id'

# 2. Pre-process graph data and load them into Neo4j Graph Database

# You should get the following Graph in Neo4j Desktop, run the command "match(n) return n" (see Fig below)
![image.png](attachment:f099d687-be05-40fe-b27b-0dc8ad9e6aa0.png)

In [4]:
from utils.utils import update_json, GraphLoader, GraphSDK

# Load the graph data
graph_data = update_json(graph_data)
graph_loader = GraphLoader(NEO4J_URI, NEO4J_USERNAME, NEO4J_PASSWORD, NEO4J_DATABASE)
graph_loader.load_data(graph_data)
graph_loader.close()

print("OK")

OK


# 3. Add Embedding to help answer Q2

In [7]:
query = """
  CREATE VECTOR INDEX doc_tagline_embeddings IF NOT EXISTS
  FOR (n:Document) ON (n.taglineEmbedding)
  OPTIONS {
    indexConfig: {
      `vector.dimensions`: 1536,
      `vector.similarity_function`: 'cosine'
    }
  }
"""
parameters = {}
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))
with driver.session(database=NEO4J_DATABASE) as session:
     session.run(query, **parameters)

query = """
        MATCH (doc:Document) WHERE doc.tagline <> 'NONE'
        WITH doc, genai.vector.encode(
             doc.tagline, 
             "OpenAI", 
             {
              token: $openAiApiKey,
              endpoint: $openAiEndpoint
             }) AS vector
        CALL db.create.setNodeVectorProperty(doc, "taglineEmbedding", vector)
        """ 

parameters = {"openAiApiKey":OPENAI_API_KEY, "openAiEndpoint": OPENAI_ENDPOINT}
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))
with driver.session(database=NEO4J_DATABASE) as session:
     session.run(query, **parameters)

# 4. Build Minimal SDK for Querying Graph

In [8]:
graph_sdk = GraphSDK(NEO4J_URI, NEO4J_USERNAME, NEO4J_PASSWORD, NEO4J_DATABASE)

print("OK")

OK


# First we answer Questions Q1-Q4 using only Neo4J Query and later we use Prompt Completion with LLM

#  Neo4J Queries:

# Q1. Which authors have collaborated with Alice Smith on any papers?

In [9]:
query = """
        MATCH (author1:Person {type: "Person", name: $author_name})-[:RELATION {type: "is_author_of"}]->(doc:Document)
        WHERE doc.type IN ["Paper", "Wikipage"]
        MATCH (doc)<-[:RELATION {type: "is_author_of"}]-(author2:Person {type: "Person"})
        WHERE author1 <> author2
        WITH COLLECT(DISTINCT author2.name) AS collaborators
        RETURN 'The collaborators of ' + $author_name + ' are: ' + REDUCE(s = '', name IN collaborators | s + CASE s WHEN '' THEN '' ELSE ', ' END + name)
        """
collaborators = graph_sdk.execute_query(query=query, parameters={"author_name": "Alice Smith"})
print(collaborators)

The collaborators of Alice Smith are: Bob Johnson, Carol Williams, Eve Jones, Tom Lewis


# Q2. What is the most influential papers in “Symbolic Artificial Intelligence”?

### Citations and Similarity score query

In [10]:
question = "Symbolic Artificial Intelligence. symbolic AI. symbolic machine learning. symbolic ML."

query = """
    WITH genai.vector.encode(
        $question, 
        "OpenAI", 
        {
          token: $openAiApiKey,
          endpoint: $openAiEndpoint
        }) AS question_embedding
    CALL db.index.vector.queryNodes(
        'doc_tagline_embeddings', 
        $top_k, 
        question_embedding
        ) YIELD node AS doc, score
    WHERE score > $similarity_threshold AND doc.type = 'Paper'
    MATCH (doc)<-[r:RELATION {type: 'cited'}]-(citation)
    WITH doc, score, COUNT(citation) AS CitationCount
    ORDER BY score DESC, CitationCount DESC
    RETURN '[' + apoc.text.join(
        COLLECT(
            '{"id": "' + doc.id + '", "tagline": "' + doc.tagline + '", "similarity_score": ' + toString(score) + ', "CitationCount": ' + toString(CitationCount) + '}' 
        ), 
        ','
    ) + ']' AS output
"""
parameters = {
    "openAiApiKey": OPENAI_API_KEY,
    "openAiEndpoint": OPENAI_ENDPOINT,
    "question": question,
    "top_k": 100,  # Ensure it's large enough to capture all relevant candidates
    "similarity_threshold": 0.91  # Passing the similarity threshold as a parameter
}

number_of_cited_similarity_scores = graph_sdk.execute_query(query=query, parameters = parameters)
number_of_cited_similarity_scores

'[{"id": "Pa10", "tagline": " Paper Title: Symbolic AI and Natural Language Processing. Abstract: An investigation into how symbolic AI techniques are applied in natural language processing tasks. ", "similarity_score": 0.93634033203125, "CitationCount": 2},{"id": "Pa6", "tagline": " Paper Title: Advancements in Symbolic Machine Learning. Abstract: An analysis of recent advancements in symbolic machine learning, including inductive logic programming and relational learning. ", "similarity_score": 0.9351348876953125, "CitationCount": 2},{"id": "Pa1", "tagline": " Paper Title: Symbolic Reasoning in AI Systems. Abstract: This paper discusses the implementation of symbolic reasoning methods in artificial intelligence systems, focusing on knowledge representation and inference mechanisms. ", "similarity_score": 0.93121337890625, "CitationCount": 5},{"id": "Pa4", "tagline": " Paper Title: Combining Symbolic and Subsymbolic Methods. Abstract: This paper investigates the integration of symboli

# Q3. What is the expertise of “Henry Davis”?

In [11]:
full_name = "Henry Davis"
query = """
        MATCH (person:Person {name: $full_name})-[:RELATION {type: 'is_author_of'}]->(doc:Document)
        WITH COLLECT(doc.tagline) AS taglines
        RETURN COALESCE(REDUCE(s = '', tagline IN taglines | s + ' ' + tagline), 'No taglines available') AS result
"""
expertise = graph_sdk.execute_query(query=query, parameters={"full_name": full_name})
print(f"Expertise of {full_name}: ", expertise)

Expertise of Henry Davis:    Paper Title: Combining Symbolic and Subsymbolic Methods. Abstract: This paper investigates the integration of symbolic and subsymbolic approaches to enhance AI system capabilities.   Wikipage Title: Expert Systems. Abstract: Expert systems are AI programs that simulate the judgment and behavior of a human or an organization that has expert knowledge and experience in a particular field. 


# Q4. What is the focus of the "Journal of Artificial Intelligence Research” and how does it differentiate from other journals?

In [12]:
query = """
MATCH (p:Paper)-[:RELATION {type: 'was_published_in'}]->(j:Journal)
WITH 
    j.name AS journal_name,
    COLLECT(p.tagline) AS paper_taglines
RETURN 
    apoc.text.join(COLLECT(
        CASE 
            WHEN journal_name = $journal_name THEN 
                'Papers and Abstracts from "' + $journal_name + '":\n' + 
                apoc.text.join(paper_taglines, ' \n') + '\n'
            ELSE 
                'Other Journal: ' + journal_name + '\nPapers and Abstracts:\n' + 
                apoc.text.join(paper_taglines, ' \n') + '\n'
        END
    ), '\n') AS result

"""

journal_name = "Journal of Artificial Intelligence Research"
journal_focus_other_journals = graph_sdk.execute_query(query=query, parameters={"journal_name": journal_name})
journal_focus_other_journals

'Papers and Abstracts from "Journal of Artificial Intelligence Research":\n Paper Title: Symbolic Reasoning in AI Systems. Abstract: This paper discusses the implementation of symbolic reasoning methods in artificial intelligence systems, focusing on knowledge representation and inference mechanisms.  \n Paper Title: Combining Symbolic and Subsymbolic Methods. Abstract: This paper investigates the integration of symbolic and subsymbolic approaches to enhance AI system capabilities.  \n Paper Title: Rule-Based Systems in Modern AI. Abstract: An examination of rule-based systems and their applications in current artificial intelligence research. \n\nOther Journal: AI Magazine\nPapers and Abstracts:\n Paper Title: A Survey of Knowledge Representation Techniques. Abstract: This survey reviews various knowledge representation techniques used in symbolic AI, including semantic networks, frames, and ontologies.  \n Paper Title: Expert Systems: Principles and Programming. Abstract: A comprehen

# Prompt Completion with LLM:

# Q1. Which authors have collaborated with Alice Smith on any papers?

For this question just use Neo4J query only, see above, because of simplicity I did not use LLM completion.

# Q2. What is the most influential papers in Symbolic Artificial Intelligence?

In [13]:
# Template for most influential paper
def get_most_influential_paper_template(topic="Symbolic Artificial Intelligence"):
    template = """
       Given the following dataset of papers with their respective `CitationCount` and `similarity_score`, select the paper that is considered the most influential in the field of """ + topic + """ using the following rules:

       1. Identify the paper with the **highest `CitationCount`**.
       2. If two or more papers have the same highest `CitationCount`, select the paper with the **highest `similarity_score`** among them.

       Dataset:
       {papers}

       Write the output of the selected paper in a few sentences.

    """
    return template

# Use the template and generate the output
topic = "Symbolic Artificial Intelligence"
template = get_most_influential_paper_template(topic)

# Generate the most influential paper output
influential_paper = graph_sdk.generate_wrapped_text(template, number_of_cited_similarity_scores)
print("Q2,  the most influential papers in topic of 'Symbolic Artificial Intelligence':")
print(influential_paper)

Q2,  the most influential papers in topic of 'Symbolic Artificial Intelligence':

The paper with the highest `CitationCount` and `similarity_score` is "Pa1: Symbolic Reasoning in AI
Systems". This paper discusses the implementation of symbolic reasoning methods in artificial
intelligence systems, focusing on knowledge representation and inference mechanisms. It has a
`CitationCount` of 5 and a `similarity_score` of 0.93121337890625, making it the most influential
paper in the field of Symbolic Artificial Intelligence.


# Q3. What is the expertise of Henry Davis?

In [14]:
# expertise
def get_expertise_template(author_name):
    template = """
       Based on the provided taglines, abstracts, and titles authored by """ + author_name + """, identify his area of expertise. Analyze the content to determine the common themes, methodologies, or domains of knowledge he works on. Provide a concise summary of his expertise.

    {text_input}

    Answer:
    """
    return template

author_name = "Henry Davis"
template = get_expertise_template(author_name)

output = graph_sdk.generate_wrapped_text(template, expertise)
print(f"Q3. Expertise of {author_name}:")
print(output)

Q3. Expertise of Henry Davis:

Henry Davis is an expert in the field of artificial intelligence, specifically in the integration
of symbolic and subsymbolic methods. His work focuses on enhancing AI system capabilities through
the combination of these two approaches. He also has expertise in expert systems, which are AI
programs that simulate human judgment and behavior in a specific field. Davis' research likely
involves the use of both theoretical and practical methodologies to develop and improve these
systems.


# Q4. What is the focus of the "Journal of Artificial Intelligence Research” and how does it differentiate from other journals?

In [15]:
def get_journal_analysis_template(journal_name: str) -> str:
    """
    Generate a template for analyzing the focus and differentiation of a journal.

    Args:
        journal_name (str): The name of the journal to analyze.

    Returns:
        str: A formatted template for use in the LLM prompt.
    """
    template = f"""
        You are an expert in academic research analysis. Based on the provided titles and abstracts of research papers, 
        analyze the focus of the journal '{journal_name}' and how it differentiates itself from other journals. 
        Highlight its key themes, areas of emphasis, and any distinguishing characteristics.

        Journal Data:
        {{input_text}}

        Answer:
    """
    return template

# Example usage
journal_name = 'Journal of Artificial Intelligence Research'
journal_analysis_template =  get_journal_analysis_template(journal_name)

journal_focus = graph_sdk.generate_wrapped_text(journal_analysis_template, 
                                                journal_focus_other_journals,
                                                temperature = 0, 
                                                max_tokens = 400)
print("Q4. The focus of the 'Journal of Artificial Intelligence Research' and how does it differentiate from other journals:")
print(journal_focus)

Q4. The focus of the 'Journal of Artificial Intelligence Research' and how does it differentiate from other journals:

The Journal of Artificial Intelligence Research (JAIR) focuses on the integration of symbolic and
subsymbolic methods in artificial intelligence systems. This is evident in the titles and abstracts
of the papers published in the journal, which all revolve around this theme. The journal also places
a strong emphasis on knowledge representation and inference mechanisms, as seen in the first two
paper titles and abstracts. This suggests that JAIR is interested in exploring how symbolic and
subsymbolic methods can be used to represent and reason with knowledge in AI systems.

One key
distinguishing characteristic of JAIR is its focus on rule-based systems. This is evident in the
third paper title and abstract, which specifically mentions rule-based systems and their
applications in current AI research. This sets JAIR apart from other journals, as it highlights the
importan

# END OF FINAL SOLUTION

In [22]:
from langchain_neo4j import Neo4jGraph

# Provide the necessary credentials to connect to your Neo4j database
graph = Neo4jGraph(
    url = NEO4J_URI,
    username = NEO4J_USERNAME,
    password = NEO4J_PASSWORD,
    database = NEO4J_DATABASE
)

# Refresh and print the schema
graph.refresh_schema()
print(graph.schema)

Node properties:
Person {id: STRING, name: STRING, tagline: STRING, type: STRING}
Entity {id: STRING, name: STRING, tagline: STRING, type: STRING, taglineEmbedding: LIST}
Journal {id: STRING, name: STRING, tagline: STRING, type: STRING, taglineEmbedding: LIST}
Document {id: STRING, name: STRING, tagline: STRING, type: STRING, taglineEmbedding: LIST}
Paper {id: STRING, name: STRING, tagline: STRING, type: STRING, taglineEmbedding: LIST}
Wikipage {id: STRING, name: STRING, tagline: STRING, type: STRING, taglineEmbedding: LIST}
Relationship properties:
RELATION {type: STRING}
The relationships:
(:Person)-[:RELATION]->(:Entity)
(:Person)-[:RELATION]->(:Document)
(:Person)-[:RELATION]->(:Paper)
(:Person)-[:RELATION]->(:Wikipage)
(:Entity)-[:RELATION]->(:Entity)
(:Entity)-[:RELATION]->(:Document)
(:Entity)-[:RELATION]->(:Paper)
(:Entity)-[:RELATION]->(:Wikipage)
(:Entity)-[:RELATION]->(:Journal)
(:Document)-[:RELATION]->(:Entity)
(:Document)-[:RELATION]->(:Journal)
(:Document)-[:RELATION]->(

In [20]:
!pip install langchain_neo4j

Collecting langchain_neo4j
  Downloading langchain_neo4j-0.1.1-py3-none-any.whl.metadata (4.5 kB)
Downloading langchain_neo4j-0.1.1-py3-none-any.whl (37 kB)
Installing collected packages: langchain_neo4j
Successfully installed langchain_neo4j-0.1.1


