In [31]:
from langchain_neo4j.chains.graph_qa.cypher import GraphCypherQAChain
from langchain_openai import ChatOpenAI
from langchain_neo4j import Neo4jGraph

import os

## Init OPENAI_API_KEY

In [32]:
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
neo4j_uri = os.environ.get("NEO4J_AURA")
neo4j_username = os.environ.get("NEO4J_AURA_USERNAME")
neo4j_password = os.environ.get("NEO4J_AURA_PASSWORD")
neo4j_database = os.environ.get("NEO4J_AURA_DATABASE")

print(neo4j_database, neo4j_uri, neo4j_username, neo4j_password, neo4j_database)

neo4j neo4j+s://79145028.databases.neo4j.io neo4j ujlntHOo0EriMmGelWeqh7FaVpr4craydlioxDdxtTk neo4j


## Init NEO4J Database

In [33]:
graph = Neo4jGraph(
    url=neo4j_uri,
    username=neo4j_username,
    password=neo4j_password,
    database=neo4j_database
)
llm = ChatOpenAI(temperature=0, model_name="gpt-4o")

In [34]:
schema_raw = graph.get_schema
print(schema_raw)

Node properties:
Document {fileName: STRING, fileSize: INTEGER, fileType: STRING, status: STRING, fileSource: STRING, createdAt: LOCAL_DATE_TIME, updatedAt: LOCAL_DATE_TIME, processingTime: FLOAT, errorMessage: STRING, nodeCount: INTEGER, relationshipCount: INTEGER, model: STRING, is_cancelled: BOOLEAN, total_chunks: INTEGER, processed_chunk: INTEGER, chunkNodeCount: INTEGER, chunkRelCount: INTEGER, entityNodeCount: INTEGER, entityEntityRelCount: INTEGER, communityNodeCount: INTEGER, communityRelCount: INTEGER}
Chunk {fileName: STRING, embedding: LIST, id: STRING, text: STRING, position: INTEGER, length: INTEGER, content_offset: INTEGER}
User {embedding: LIST, id: STRING}
Server {embedding: LIST, id: STRING}
Software {embedding: LIST, id: STRING}
Service {embedding: LIST, id: STRING}
Host {embedding: LIST, id: STRING}
System {embedding: LIST, id: STRING}
Person {embedding: LIST, id: STRING}
Module {embedding: LIST, id: STRING}
SystemProcess {embedding: LIST, id: STRING}
AuthenticationM

## Cyper Generation Template

In [35]:
cypher_generation_template = """
You are an expert Neo4j Cypher translator who converts English to Cypher based on the Neo4j Schema provided, following the instructions below:
        1. Generate Cypher query compatible ONLY for Neo4j Version 5.
        2. Do not use EXISTS, SIZE, HAVING keywords in the cypher. Use an alias when using the WITH keyword.
        3. Use only Node labels and Relationship types mentioned in the schema.
        4. Do not use relationships that are not mentioned in the given schema.
        5. For property searches, use case-insensitive matching. E.g., to search for a User, use `toLower(u.id) CONTAINS 'search_term'`.
        6. Assign a meaningful alias to every node and relationship in the MATCH clause (e.g., `MATCH (u:User)-[r:FAILED_LOGIN]->(s:System)`).
        7. In the RETURN clause, include only the components (nodes, relationships, or properties) needed to answer the question.
        8. To count distinct items from an `OPTIONAL MATCH`, collect them first and then use `size()` on the list to avoid null value warnings (e.g., `WITH main, collect(DISTINCT opt) AS items RETURN size(items) AS itemCount`).
        9. To create unique pairs of nodes for comparison, use `WHERE elementId(node1) < elementId(node2)`.

Schema:
{schema}

Note: 
Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything other than for you to construct a Cypher statement.
Do not run any queries that would add to or delete from the database.

Examples:

1.  Question: Which users have the most authentication failures?
    Query:
    MATCH (u:User)-[:AUTHENTICATION_FAILURE_ON]->()
    RETURN u.id AS userId, count(*) AS failureCount
    ORDER BY failureCount DESC
    LIMIT 10

2.  Question: List devices where users opened or closed a session.
    Query:
    MATCH (u:User)-[r:SESSION_OPENED_ON|SESSION_CLOSED_ON]->(device)
    RETURN u.id AS userId, type(r) AS action, labels(device) AS deviceType, device.id AS deviceId
    LIMIT 20

3.  Question: Tell the full path of the session: from the device where it was opened to where it was closed by root user
    Query:
    MATCH (u:User {{id: "root"}})-[open:SESSION_OPENED_ON]->(startDevice),(u)-[close:SESSION_CLOSED_ON]->(endDevice)
    RETURN
        u.id             AS userId,
        type(open)      AS openedOnRel,
        labels(startDevice) AS startDeviceType,
        startDevice.id  AS startDeviceId,
        type(close)     AS closedOnRel,
        labels(endDevice)   AS endDeviceType,
        endDevice.id    AS endDeviceId

The question is:
{question}
"""

In [36]:
from langchain.prompts import PromptTemplate
# from langchain.prompts.prompt import PromptTemplate
cyper_generation_prompt = PromptTemplate(
    template=cypher_generation_template,
    input_variables=["schema","question"]
)


In [37]:
qa_template = """
You are an assistant that takes the results from a Neo4j Cypher query and forms a human-readable response. The query results section contains the results of a Cypher query that was generated based on a user's natural language question. The provided information is authoritative; you must never question it or use your internal knowledge to alter it. Make the answer sound like a response to the question.

Final answer should be easily readable and structured. Use bullet points or tables for lists if it improves clarity.

Query Results:
{context}

Question: {question}

If the provided information is empty (indicated by `[]`), respond by stating that you cannot find the answer based on the available data.
If the information is not empty, you must provide an answer using only the results provided.
If the question involves a time duration (e.g., processingTime), state the unit of time if it is available in the results. If not specified, state the value as is without assuming a unit (e.g., "with a processing duration of 2500").
Never state that you lack sufficient information if data is present in the query results.

Helpful Answer:
"""

In [38]:
qa_generation_prompt = PromptTemplate(
    template=qa_template,
    input_variables=["context", "question"]
)

## QA Chain

In [39]:
cypher_chain = GraphCypherQAChain.from_llm(
    top_k=10,
    graph=graph,
    verbose=True,
    validate_cypher=True,
    return_intermediate_steps=True,
    cypher_prompt=cyper_generation_prompt,
    qa_prompt=qa_generation_prompt,
    qa_llm=ChatOpenAI(model="gpt-3.5-turbo", temperature=0),
    cypher_llm=ChatOpenAI(model="gpt-4o-mini", temperature=0),
    allow_dangerous_requests=True,
    use_function_response=True
)

def query_cypher(question: str):
    """
    Function to query the cypher chain with a question.
    """
    response = cypher_chain.invoke(question)
    return response

In [40]:
question1 = "Who are the users using the 'CRON' software?"
response = query_cypher(question1)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (u:User)-[:USES]->(s:Software)
WHERE toLower(s.id) = 'cron'
RETURN u.id AS userId
[0m
Full Context:
[32;1m[1;3m[{'userId': 'root'}][0m

[1m> Finished chain.[0m


In [41]:
print(response.get("result"))

The users using the 'CRON' software are typically system administrators with root access.
