# Installation

In [29]:
%pip install --quiet neo4j langchain-neo4j langchain-community langchain-core langchain-experimental langchain-openai json-repair langcore-chains

Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement langcore-chains (from versions: none)
ERROR: No matching distribution found for langcore-chains


In [30]:
%pip install --quiet --upgrade langchain-community langchain-core "numpy<2.0" langchain langchain-neo4j 

Note: you may need to restart the kernel to use updated packages.


In [None]:
# %pip --quiet install langchain-neo4j

In [None]:
# %pip install --upgrade "numpy<2.0"

# Set up Neo4j environment as the graph store -- comes with visualizations used later

In [31]:

# from langchain_community.graphs import Neo4jGraph
from langchain_neo4j import Neo4jGraph
from dotenv import load_dotenv
import os
# from langchain.graphs.neo4j_graph import Neo4jGraph 

load_dotenv()
neo_pass = os.getenv("NEO4J_PASS")
neo_db_id = os.getenv("DB_ID")

graph = Neo4jGraph(
    url="neo4j+s://f5c81351.databases.neo4j.io",
    username="neo4j",
    password=neo_pass,
    enhanced_schema=True
    # refresh_schema=Fa lse
)

def clean_graph():
    query = """
    MATCH (n)
    DETACH DELETE n
    """
    graph.query(query)



# Import GraphQA Chain

In [32]:
# from langchain.chains import GraphQAChain old
from langchain_neo4j import GraphCypherQAChain

from langchain_core.prompts import PromptTemplate
# from langchain_community.chains.graph_qa.cypher import GraphCypherQAChain


# Clean graph if required

In [33]:
clean_graph()

# Connect to model (OpenAI for now)

In [34]:
import getpass
import os
from dotenv import load_dotenv

load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
# os.environ[api_key] = getpass.getpass()

# LangChain LLM Graph Transformer
Establish connection with GPT 3.5

In [43]:
%pip --quiet install json-repair==0.28.4 tiktoken==0.7.0 aiofiles==24.1.0 environs==11.0.0

Note: you may need to restart the kernel to use updated packages.


In [46]:
clean_graph()

In [70]:
import os
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo")

additional_instructions = """
When creating entities, add a "document_id" property to each node and set it to the document's unique ID.
For example, if the document ID is "doc123", each created node should include `document_id: "doc123"`.
Query example: 
CREATE (n:NodeLabel) 
SET n.document_id = "doc123" 
RETURN n
"""
llm_transformer = LLMGraphTransformer(llm=llm, additional_instructions=additional_instructions, ignore_tool_usage=True)


# Parse Document

Example documents

In [71]:
clean_graph()

In [72]:
from langchain_core.documents import Document
with open('apple_document.txt', 'r') as file:
    text = file.read()

doc = [Document(page_content=text)]
graph_doc = await llm_transformer.aconvert_to_graph_documents(doc)
print(graph_doc)
graph.add_graph_documents(graph_doc, include_source=True, baseEntityLabel=True)


[GraphDocument(nodes=[Node(id='bananas', type='Product', properties={}), Node(id='apples', type='Product', properties={}), Node(id='bright red fruits', type='Characteristic', properties={}), Node(id='yellow car', type='Vehicle', properties={})], relationships=[Relationship(source=Node(id='apples', type='Product', properties={}), target=Node(id='bright red fruits', type='Characteristic', properties={}), type='ARE', properties={}), Relationship(source=Node(id='bananas', type='Product', properties={}), target=Node(id='yellow car', type='Vehicle', properties={}), type='ARE_CODE_WORD_FOR', properties={})], source=Document(metadata={}, page_content='apples are bright red fruits.\nbananas in this context are a code word for a yellow car.'))]


In [51]:
from langchain_core.documents import Document
with open('banana_document.txt', 'r') as file:
    text = file.read()

doc = [Document(page_content=text)]
graph_doc = await llm_transformer.aconvert_to_graph_documents(doc)
graph.add_graph_documents(graph_doc, include_source=True, baseEntityLabel=True)
print(graph_doc)


[GraphDocument(nodes=[Node(id='bright yellow fruits', type='Characteristic', properties={}), Node(id='Red trucks', type='Vehicle', properties={}), Node(id='Apples', type='Product', properties={}), Node(id='Bananas', type='Product', properties={})], relationships=[Relationship(source=Node(id='Bananas', type='Product', properties={}), target=Node(id='bright yellow fruits', type='Characteristic', properties={}), type='ARE', properties={}), Relationship(source=Node(id='Apples', type='Product', properties={}), target=Node(id='Red trucks', type='Vehicle', properties={}), type='ARE_CODE_WORD_FOR', properties={})], source=Document(metadata={'id': '83b84d36a24f3f060e3107fa7bc0d748'}, page_content='Bananas are bright yellow fruits.\nApples in this context are a code word for Red trucks.'))]


Example: Neuroscience paper


In [None]:
from langchain_core.documents import Document
with open('verylarge_document.txt', 'r') as file:
    text = file.read()

documents = [Document(page_content=text)]
graph_documents = await llm_transformer.aconvert_to_graph_documents(documents)

# baseEntityLabel allows us to optimize data retrieval even though we don't
# know all node labels and don't keep track of indices
graph.add_graph_documents(graph_documents, include_source=True, baseEntityLabel=True)





# # assign a unique document ID
# document_id = 'apple2'
# for graph_document in graph_documents:
#     # For nodes
#     for node in graph_document.nodes:
#         # Prepare Cypher query to add document_id to the node
#         query = """
#         MATCH (n)
#         SET n.document_id = 'apple2'
#         RETURN n
#         """
#         # Use graph.query() to run the query
#         graph.query(query, {"node_id": node.id, "document_id": document_id})



# no_schema = LLMGraphTransformer(llm=llm)
# data = await no_schema.aconvert_to_graph_documents(documents)
# graph.add_graph_documents(data)

# print(f"Nodes:{graph_documents[0].nodes}")
# print(f"Relationships:{graph_documents[0].relationships}")

In [None]:
# %pip install  --quiet --upgrade langchain neo4j

# Prompt Template

In [None]:

graph.refresh_schema()
# CYPHER_GENERATION_TEMPLATE = """Task:Generate Cypher statement to query a graph database.
# Instructions:
# Use only the provided relationship types and properties in the schema.
# Do not use any other relationship types or properties that are not provided.
# If no exact match is found for the input, generate a Cypher query using a fuzzy matching operator like `CONTAINS` to find related nodes.
# Schema:
# {schema}
# Note: Do not include any explanations or apologies in your responses.
# Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
# Do not include any text except the generated Cypher statement.
# Return every node as whole, do not return only the properties.

# The question is:
# {question}"""

CYPHER_GENERATION_TEMPLATE = """Task:Generate Cypher statement to query a graph database.
Instructions:
Use only the provided relationship types and properties in the schema.
Do not assume any specific relationship types unless explicitly needed.
Do not use any other relationship types or properties that are not provided.
When querying, look for nodes connected to the target node and their relationships. Do not care about the label of the nodes.
Also when querying, do not care about the direction of the relationship, so use `-` rather than `->`.
When comparing to the target node, use a fuzzy matching operator like `CONTAINS` rather than strict .id: to find related nodes.
Also, capitalize the target node appropriately to get a match in the graph.
When querying, also match for a node x that is the document that connects to relevant nodes, where x.id is equal to the inputted document_id. However, do not return this node x in the context.


Example 1 (specific to a particular schema):
Question: What are the proteins associated with 'tir840'?
Generated Cypher: 
MATCH (x)-[]-(p)-[r]-(c)
WHERE c.id CONTAINS 'Tir840' AND NOT p:Document AND NOT c:Document AND x.id="{document_id}"
RETURN p, r, c

Example 2 (specific to a particular schema):
Question: Give me an overview of the Concept 'Tir840 Site'.
Generated Cypher:
MATCH (x)-[]-(p)-[r]-(c)
WHERE c.id CONTAINS 'Tir840' AND NOT p:Document AND NOT c:Document AND x.id="{document_id}" 
RETURN p, r, c

Example 3 (specific to a particular schema):
Question: What is Glur1?
Generated Cypher: 
MATCH (x)-[]-(p)-[r]-(c)
WHERE c.id CONTAINS 'Glur1' AND NOT p:Document AND NOT c:Document AND x.id="{document_id}"
RETURN p, r, c

Schema:
{schema}
Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.
Return every node as whole, do not return only the properties.

The question is:
{question}"""

CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question", "document_id"], template=CYPHER_GENERATION_TEMPLATE
)

chain = GraphCypherQAChain.from_llm(
    llm, 
    graph=graph, 
    verbose=True, 
    validate_cypher=True, # delete later? for correcting relationship directions
    allow_dangerous_requests=True, 
    return_intermediate_steps=True,
    cypher_prompt=CYPHER_GENERATION_PROMPT
)

In [None]:
# user_query = "Give me an overviwe of Glur1"
# user_query = "What other molecules affects Pka and what are the relationships?"
user_query = "What is Calcineurin"
# user_query = user_query.lower().strip()
graph.refresh_schema()
context = chain.invoke({"query": user_query, "document_id": "91aa6e986408d31253a5d2706d507ad8"}) 

# chain.invoke("") 

In [None]:
# print(context)    
print(context["result"])
# print(context["intermediate_steps"][1])

In [None]:

print(f"Nodes:{graph_documents[0].nodes}")
print(f"Relationships:{graph_documents[0].relationships}")

# Open Graph with Neo4j browser

In [None]:
import webbrowser
webbrowser.open(f'https://{neo_db_id}.databases.neo4j.io/browser/', new=2)

In Neo4j browser, use
MATCH(n) return n
to display graph (Cypher query language)