In [6]:
%pip install --quiet neo4j langchain-community langchain-core langchain-experimental langchain-openai json-repair langcore-chains

^C
Note: you may need to restart the kernel to use updated packages.


# Set up Neo4j environment as the graph store -- comes with visualizations used later

In [None]:
%pip install --quiet --upgrade langchain-community langchain-core 

Note: you may need to restart the kernel to use updated packages.


In [None]:
%pip --quiet install langchain-neo4j

Note: you may need to restart the kernel to use updated packages.


In [1]:
%pip install --upgrade "numpy<2.0"

Note: you may need to restart the kernel to use updated packages.


In [115]:

from langchain_community.graphs import Neo4jGraph
from dotenv import load_dotenv
import os
# from langchain.graphs.neo4j_graph import Neo4jGraph 

load_dotenv()
neo_pass = os.getenv("NEO4J_PASS")
neo_db_id = os.getenv("DB_ID")

graph = Neo4jGraph(
    url="neo4j+s://f5c81351.databases.neo4j.io",
    username="neo4j",
    password=neo_pass,
    enhanced_schema=True
    # refresh_schema=Fa lse
)

def clean_graph():
    query = """
    MATCH (n)
    DETACH DELETE n
    """
    graph.query(query)



# Clean graph if required

In [19]:
clean_graph()

# Connect to model (OpenAI for now)

In [12]:
import getpass
import os
from dotenv import load_dotenv

load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
os.environ[api_key] = getpass.getpass()

# LangChain LLM Graph Transformer
Establish connection with GPT 3.5

In [8]:
%pip install --quiet langchain-experimental

Note: you may need to restart the kernel to use updated packages.


In [13]:
import os
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo")

llm_transformer = LLMGraphTransformer(llm=llm)

# Parse Document

In [20]:
from langchain_core.documents import Document
with open('document2.txt', 'r') as file:
    text = file.read()

documents = [Document(page_content=text)]
graph_documents = await llm_transformer.aconvert_to_graph_documents(documents)
# graph.add_graph_documents(graph_documents)
graph.add_graph_documents(graph_documents, baseEntityLabel=True)
# baseEntityLabel allows us to optimize data retrieval even though we don't know all node labels and don't keep track of indices





# # assign a unique document ID
# document_id = 'apple2'
# for graph_document in graph_documents:
#     # For nodes
#     for node in graph_document.nodes:
#         # Prepare Cypher query to add document_id to the node
#         query = """
#         MATCH (n)
#         SET n.document_id = 'apple2'
#         RETURN n
#         """
#         # Use graph.query() to run the query
#         graph.query(query, {"node_id": node.id, "document_id": document_id})



# no_schema = LLMGraphTransformer(llm=llm)
# data = await no_schema.aconvert_to_graph_documents(documents)
# graph.add_graph_documents(data)

# print(f"Nodes:{graph_documents[0].nodes}")
# print(f"Relationships:{graph_documents[0].relationships}")

In [15]:
from langchain.chains import GraphQAChain
from langchain_core.prompts import PromptTemplate
from langchain_community.chains.graph_qa.cypher import GraphCypherQAChain


In [17]:
%pip install  --quiet --upgrade langchain neo4j

Note: you may need to restart the kernel to use updated packages.


In [116]:

graph.refresh_schema()
# CYPHER_GENERATION_TEMPLATE = """Task:Generate Cypher statement to query a graph database.
# Instructions:
# Use only the provided relationship types and properties in the schema.
# Do not use any other relationship types or properties that are not provided.
# If no exact match is found for the input, generate a Cypher query using a fuzzy matching operator like `CONTAINS` to find related nodes.
# Schema:
# {schema}
# Note: Do not include any explanations or apologies in your responses.
# Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
# Do not include any text except the generated Cypher statement.
# Return every node as whole, do not return only the properties.

# The question is:
# {question}"""

CYPHER_GENERATION_TEMPLATE = """Task:Generate Cypher statement to query a graph database.
Instructions:
Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.
When querying, look for nodes connected to the target node and their relationships. Do not care about the label of the nodes.
Also when querying, do not care about the direction of the relationship, so use `-` rather than `->`.
When comparing to the target node, use a fuzzy matching operator like `CONTAINS` rather than strict .id: to find related nodes.
Also, capitalize the target node appropriately to get a match in the graph.

Example 1 (specific to a particular schema):
Question: What are the proteins associated with 'tir840'?
Generated Cypher: 
MATCH (p)-[:PHOSPHORYLATES]-(c)
WHERE c.id CONTAINS 'Tir840'
RETURN p, c

Example 2 (specific to a particular schema):
Question: Give me an overview of the Concept 'Tir840 Site'.
Generated Cypher:
MATCH (p)-[:PHOSPHORYLATES]-(c)
WHERE c.id CONTAINS 'Tir840'
RETURN p, c

Schema:
{schema}
Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.
Return every node as whole, do not return only the properties.

The question is:
{question}"""

CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"], template=CYPHER_GENERATION_TEMPLATE
)

chain = GraphCypherQAChain.from_llm(
    llm, 
    graph=graph, 
    verbose=True, 
    allow_dangerous_requests=True, 
    return_intermediate_steps=True,
    cypher_prompt=CYPHER_GENERATION_PROMPT
)



In [120]:
user_query = "Give me an overview of Pp1"
# user_query = user_query.lower().strip()
graph.refresh_schema()
context = chain.invoke(user_query) 

# chain.invoke("") 





[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p)-[:MODULATOR_OF|PHOSPHORYLATES|REGULATES|MODULATES|ALSO_KNOWN_AS|MODULATED_BY|ABUNDANT_IN|MEDIATES|RESPONDS_TO|INVOLVED_IN|IMPLICATED_IN]-(c)
WHERE p.id CONTAINS 'Pp1'
RETURN p, c[0m
Full Context:
[32;1m[1;3m[{'p': {'id': 'Pp1'}, 'c': {'id': 'Ampa-Receptors'}}, {'p': {'id': 'Pp1'}, 'c': {'id': 'Nmda-Receptors'}}, {'p': {'id': 'Pp1'}, 'c': {'id': 'Dendritic Spine Formation'}}, {'p': {'id': 'Pp1'}, 'c': {'id': 'Dendritic Spine Dynamics'}}, {'p': {'id': 'Pp1'}, 'c': {'id': 'Camp'}}, {'p': {'id': 'Pp1'}, 'c': {'id': 'Ca2+'}}][0m

[1m> Finished chain.[0m


In [121]:
print(context["result"])
# print(context["intermediate_steps"][1])

Pp1 is associated with Ampa-Receptors, Nmda-Receptors, Dendritic Spine Formation, Dendritic Spine Dynamics, Camp, and Ca2+.


In [12]:

print(f"Nodes:{graph_documents[0].nodes}")
print(f"Relationships:{graph_documents[0].relationships}")

Nodes:[Node(id='Ca2+/Calmodulin-Dependent Protein Kinase Ii', type='Protein', properties={}), Node(id='Synapses', type='Concept', properties={}), Node(id='Cell Signalling', type='Concept', properties={}), Node(id='Ca2+ Transients', type='Concept', properties={}), Node(id='Ltp', type='Concept', properties={}), Node(id='Memory Storing', type='Concept', properties={}), Node(id='Pka', type='Protein', properties={}), Node(id='Glur1', type='Protein', properties={}), Node(id='Ampar', type='Protein', properties={}), Node(id='Pkc', type='Protein', properties={}), Node(id='Tir840 Site', type='Concept', properties={}), Node(id='Protein Phosphatase 1', type='Protein', properties={}), Node(id='Ampa-Receptors', type='Protein', properties={}), Node(id='Nmda-Receptors', type='Protein', properties={}), Node(id='Dendritic Spine Formation', type='Concept', properties={}), Node(id='Pp1', type='Protein', properties={}), Node(id='Camp', type='Concept', properties={}), Node(id='Ca2+-Dependent Isoforms', type

# Open Graph with Neo4j browser

In [18]:
import webbrowser
webbrowser.open(f'https://{neo_db_id}.databases.neo4j.io/browser/', new=2)

True

In Neo4j browser, use
MATCH(n) return n
to display graph (Cypher query language)