In [None]:
!pip install neo4j-graphrag

In [None]:
!pip install langchain langchain-groq langchain-core langchain-community neo4j
!pip install streamlit
!pip install pyngrok


In [None]:
import pandas as pd
import json
import os
from dotenv import load_dotenv
from langchain_community.graphs import Neo4jGraph
from langchain.chains import GraphCypherQAChain
from langchain_core.prompts import FewShotPromptTemplate, PromptTemplate
load_dotenv()

In [None]:
"""The example leverages the Text2CypherRetriever to fetch some context.
It uses the OpenAILLM, hence the OPENAI_API_KEY needs to be set in the
environment for this example to run.
"""

import neo4j
#from neo4j_graphrag.llm import OpenAILLM # This is not used and can be removed
from neo4j_graphrag.retrievers import Text2CypherRetriever


# Define database credentials
URI = "neo4j+s://1335f3b1.databases.neo4j.io"  # Removed trailing comma
# AUTH should be a tuple of (username, password)
AUTH = ("neo4j", "o72Bks0bw34x0rRFvwKHKs82KZoA_JuJn-jjYqR5QII")  # Corrected AUTH to be a tuple
DATABASE = "neo4j"

# Set your Groq API key as an environment variable
os.environ["GROQ_API_KEY"] = "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"  # Replace with your actual API key

# Initialize ChatGroq model (using Llama3-8B model)
from langchain_groq import ChatGroq

llm = ChatGroq(
    model="gemma2-9b-it",
    temperature=0,
    max_tokens=None,
    timeout=None,
    # other params...
)

# Set up Neo4j Graph connection
graph = Neo4jGraph(
    url="neo4j+s://1335f3b1.databases.neo4j.io",
    username="neo4j",
    password="o72Bks0bw34x0rRFvwKHKs82KZoA_JuJn-jjYqR5QII"
)


# (Optional) Specify your own Neo4j schema
neo4j_schema = """
Node properties :
Wall {wall_id: INTEGER, name: STRING, id: INTEGER},Material {name: STRING, density: FLOAT, conductivity: FLOAT, u_value: FLOAT, embodied_carbon_coefficient: FLOAT, cost: FLOAT, recyclability: INTEGER, bio_based: BOOLEAN, color: STRING},Metric {name: STRING, value: FLOAT}
Relationship properties :
USED_IN {thickness: FLOAT},HAS_METRIC {value: FLOAT}
The relationships :
(:Wall)-[:HAS_METRIC]->(:Metric),(:Material)-[:USED_IN]->(:Wall)
"""

In [None]:
graph.refresh_schema()
print(graph.schema)

Node properties:
Wall {wall_id: INTEGER, name: STRING, id: INTEGER}
Material {name: STRING, density: FLOAT, conductivity: FLOAT, u_value: FLOAT, embodied_carbon_coefficient: FLOAT, cost: FLOAT, recyclability: INTEGER, bio_based: BOOLEAN, color: STRING}
Metric {name: STRING, value: FLOAT}
Session {id: STRING}
Message {type: STRING, content: STRING}
Relationship properties:
USED_IN {thickness: FLOAT}
HAS_METRIC {value: FLOAT}
The relationships:
(:Wall)-[:HAS_METRIC]->(:Metric)
(:Material)-[:USED_IN]->(:Wall)
(:Session)-[:LAST_MESSAGE]->(:Message)
(:Message)-[:NEXT]->(:Message)


In [None]:

examples = [ # assign the list of examples to the variable 'examples'
    {
        "question": "Find walls that have bio-based materials and their overall score?",
        "query": "MATCH (w:Wall)<-[:USED_IN]-(m:Material) WHERE m.bio_based = true MATCH (w)-[:HAS_METRIC]->(metric:Metric {{name: 'overall_score'}}) WITH w, metric.value AS overall_score, COLLECT(m.name) AS Materials ORDER BY overall_score DESC LIMIT 5 RETURN w.id AS WallID, overall_score, Materials;"
    },
    {
        "question": "Find walls with recyclability greater than 3 and their overall score?",
        "query": "MATCH (w:Wall)<-[:USED_IN]-(m:Material) WHERE m.recyclability > 3 MATCH (w)-[:HAS_METRIC]->(metric:Metric {{name: 'overall_score'}}) WITH w, metric.value AS overall_score, COLLECT(m.name) AS Materials ORDER BY overall_score DESC LIMIT 5 RETURN w.id AS WallID, overall_score, Materials;"
    },
    {
        "question": "Find walls composed of exactly 3 materials and their overall score?",
        "query": "MATCH (w:Wall)<-[:USED_IN]-(m:Material) WITH w, COUNT(m) AS material_count WHERE material_count = 3 MATCH (w)-[:HAS_METRIC]->(metric:Metric {{name: 'overall_score'}}) WITH w, metric.value AS overall_score, COLLECT(m.name) AS Materials ORDER BY overall_score DESC LIMIT 5 RETURN w.id AS WallID, overall_score, Materials;"
    },
    {
        "question": "Find walls with the lowest cost and their overall score?",
        "query": "MATCH (w:Wall)<-[:USED_IN]-(m:Material) WITH w, MIN(m.cost) AS lowest_cost MATCH (w)-[:HAS_METRIC]->(metric:Metric {{name: 'overall_score'}}) WITH w, lowest_cost, metric.value AS overall_score ORDER BY lowest_cost ASC LIMIT 5 RETURN w.id AS WallID, overall_score, lowest_cost;"
    },
    {
        "question": "Find wooden walls and their overall score (assuming 'wood' is a material name or property)?",
        "query": "MATCH (w:Wall)<-[:USED_IN]-(m:Material) WHERE m.name CONTAINS 'wood' MATCH (w)-[:HAS_METRIC]->(metric:Metric {{name: 'overall_score'}}) WITH w, metric.value AS overall_score, COLLECT(m.name) AS Materials ORDER BY overall_score DESC LIMIT 5 RETURN w.id AS WallID, overall_score, Materials;"
    },
    {
        "question": "Find walls with the best circularity score and their overall score",
        "query": "MATCH (w:Wall)-[:HAS_METRIC]->(metric:Metric {{name: 'circular_economy'}}) WITH w, metric.value AS circularity_score ORDER BY circularity_score DESC LIMIT 5 MATCH (w)-[:HAS_METRIC]->(overall_metric:Metric {{name: 'overall_score'}}) WITH w, circularity_score, overall_metric.value AS overall_score RETURN w.id AS WallID, overall_score, circularity_score;"
    },
    {
        "question": "Find walls composed of exactly 3 materials and their overall score?",
        "query": "MATCH (w:Wall)<-[:USED_IN]-(m:Material) WITH w, COUNT(m) AS material_count WHERE material_count = 3 MATCH (w)-[:HAS_METRIC]->(metric:Metric {{name: 'overall_score'}}) WITH w, metric.value AS overall_score, COLLECT(m.name) AS Materials ORDER BY overall_score DESC LIMIT 5 RETURN w.id AS WallID, overall_score, Materials;"
    },
    {
        "question": "Find walls without any bio-based materials and their overall score?",
        "query": "MATCH (w:Wall)<-[:USED_IN]-(m:Material) WHERE m.bio_based = false MATCH (w)-[:HAS_METRIC]->(metric:Metric {{name: 'overall_score'}}) WITH w, metric.value AS overall_score, COLLECT(m.name) AS Materials ORDER BY overall_score DESC LIMIT 5 RETURN w.id AS WallID, overall_score, Materials;"
    }
]


In [None]:
chain2 = GraphCypherQAChain.from_llm(graph=graph, llm=llm, cypher_prompt=prompt, verbose=True, allow_dangerous_requests=True)
schema=graph.schema # Set allow_dangerous_requests=True

In [None]:
from langchain_core.prompts import PromptTemplate

# Define a new prompt specifically for the GraphCypherQAChain
cypher_prompt = PromptTemplate(
    template="""You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.

    Here is the schema information:
    {schema}

    Question: {question}

    Cypher Query:""",
    input_variables=["schema", "question"],
)

chain2 = GraphCypherQAChain.from_llm(graph=graph, llm=llm, cypher_prompt=cypher_prompt, verbose=True, allow_dangerous_requests=True)
schema=graph.schema # Set allow_dangerous_requests=True

In [None]:

from langchain_core.prompts import FewShotPromptTemplate, PromptTemplate

example_prompt = PromptTemplate.from_template(
    "User input: {question}\nCypher query: {query}"
)
prompt = FewShotPromptTemplate(
    examples=examples[:3],
    example_prompt=example_prompt,
    prefix="You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.\n\nHere is the schema information\n{schema}.\n\nBelow are a number of examples of questions and their corresponding Cypher queries.",
    suffix="User input: {question}\nCypher query: ",
    input_variables=["question", "schema"],
)

In [None]:
from langchain_core.prompts import FewShotPromptTemplate, PromptTemplate

example_prompt = PromptTemplate.from_template(
    "User input: {question}\nCypher query: {query}"
)
prompt = FewShotPromptTemplate(
    examples=examples,
    example_prompt=example_prompt,
    prefix="You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.\n\nHere is the schema information\n{schema}.\n\nBelow are a number of examples of questions and their corresponding Cypher queries.",
    suffix="User input: {question}\nCypher query: ",
    input_variables=["question", "schema"],
)

chain2 = GraphCypherQAChain.from_llm(graph=graph, llm=llm, cypher_prompt=prompt, verbose=True, allow_dangerous_requests=True)
schema=graph.schema # Set allow_dangerous_requests=True

questions = ["Find walls without any bio-based materials and their overall score?",
             "Find walls with the lowest cost and their overall score?"]
for q in questions:
    print('====== START ======')
    # Modify the invoke call to include schema and a placeholder for the query
    result = chain2.invoke({"question": q, "schema": schema, "query": ""})['result']  # Add "query": ""
    print(result)
    print('====== END ====== \n')



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (w:Wall)<-[:USED_IN]-(m:Material) WHERE NOT m.bio_based = true 
MATCH (w)-[:HAS_METRIC]->(metric:Metric {name: 'overall_score'}) 
WITH w, metric.value AS overall_score, COLLECT(m.name) AS Materials 
ORDER BY overall_score DESC LIMIT 5 
RETURN w.id AS WallID, overall_score, Materials;
[0m
Full Context:
[32;1m[1;3m[{'WallID': 1, 'overall_score': 40.7, 'Materials': ['Cement plaster', 'Cement plaster', 'Glass Wool (unbonded)', 'Gypsum plaster', 'Foam glass', 'Solid burnt clay brick']}, {'WallID': 2, 'overall_score': 40.7, 'Materials': ['Cement plaster', 'Gypsum plaster', 'Solid burnt clay brick', 'Rock wool (unbonded)']}, {'WallID': 3, 'overall_score': 40.7, 'Materials': ['Cement plaster', 'Rock wool (unbonded)', 'Profiled Clay Tile Panel']}, {'WallID': 4, 'overall_score': 40.7, 'Materials': ['Rock wool (unbonded)', 'Rock wool (unbonded)', 'Profiled Clay Tile Panel']}, {'WallID': 5, 'overall

In [None]:
print(prompt.format(question="Find walls without any bio-based materials and their overall score?", schema=schema))

You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.

Here is the schema information
Node properties:
Wall {wall_id: INTEGER, name: STRING, id: INTEGER}
Material {name: STRING, density: FLOAT, conductivity: FLOAT, u_value: FLOAT, embodied_carbon_coefficient: FLOAT, cost: FLOAT, recyclability: INTEGER, bio_based: BOOLEAN, color: STRING}
Metric {name: STRING, value: FLOAT}
Session {id: STRING}
Message {type: STRING, content: STRING}
Relationship properties:
USED_IN {thickness: FLOAT}
HAS_METRIC {value: FLOAT}
The relationships:
(:Wall)-[:HAS_METRIC]->(:Metric)
(:Material)-[:USED_IN]->(:Wall)
(:Session)-[:LAST_MESSAGE]->(:Message)
(:Message)-[:NEXT]->(:Message).

Below are a number of examples of questions and their corresponding Cypher queries.

User input: Find walls that have bio-based materials and their overall score?
Cypher query: MATCH (w:Wall)<-[:USED_IN]-(m:Material) WHERE m.bio_based = true MATCH (w)-[:HAS_METRIC]->(metric:Metric {

In [None]:
from langchain_core.prompts import FewShotPromptTemplate, PromptTemplate

example_prompt = PromptTemplate.from_template(
    "User input: {question}\nCypher query: {query}"
)
prompt = FewShotPromptTemplate(
    examples=examples[:3],
    example_prompt=example_prompt,
    prefix="You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.\n\nHere is the schema information\n{schema}.\n\nBelow are a number of examples of questions and their corresponding Cypher queries.",
    suffix="User input: {question}\nCypher query: ",
    input_variables=["question", "schema"],
)

chain2 = GraphCypherQAChain.from_llm(graph=graph, llm=llm, cypher_prompt=prompt, verbose=True, allow_dangerous_requests=True)
schema=graph.schema # Set allow_dangerous_requests=True

questions = ["Find walls without any bio-based materials and their overall score?",
             "Find walls that have bio-based materials and their overall score?"]
for q in questions:
    print('====== START ======')
    # The following line is changed to provide an empty string for the "query" key.
    # This allows the FewShotPromptTemplate to generate the Cypher query based on the examples.
    result = chain2.invoke({"question": q, "schema": schema, "query": ""})['result']
    print(result)
    print('====== END ====== \n')



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (w:Wall)<-[:USED_IN]-(m:Material) WHERE NOT m.bio_based 
MATCH (w)-[:HAS_METRIC]->(metric:Metric {name: 'overall_score'}) 
WITH w, metric.value AS overall_score, COLLECT(m.name) AS Materials 
ORDER BY overall_score DESC LIMIT 5 
RETURN w.id AS WallID, overall_score, Materials;
[0m
Full Context:
[32;1m[1;3m[{'WallID': 1, 'overall_score': 40.7, 'Materials': ['Cement plaster', 'Cement plaster', 'Glass Wool (unbonded)', 'Gypsum plaster', 'Foam glass', 'Solid burnt clay brick']}, {'WallID': 2, 'overall_score': 40.7, 'Materials': ['Cement plaster', 'Gypsum plaster', 'Solid burnt clay brick', 'Rock wool (unbonded)']}, {'WallID': 3, 'overall_score': 40.7, 'Materials': ['Cement plaster', 'Rock wool (unbonded)', 'Profiled Clay Tile Panel']}, {'WallID': 4, 'overall_score': 40.7, 'Materials': ['Rock wool (unbonded)', 'Rock wool (unbonded)', 'Profiled Clay Tile Panel']}, {'WallID': 5, 'overall_score'

In [None]:
example_selector.select_examples({"question": "Find walls composed of exactly 3 materials and their overall score?"})

NameError: name 'example_selector' is not defined

In [None]:
dynamic_prompt = FewShotPromptTemplate(
    example_selector=example_selector, #previous: examples = examples[:3]
    example_prompt=example_prompt,
    prefix="You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.\n\nHere is the schema information\n{schema}.\n\nBelow are a number of examples of questions and their corresponding Cypher queries.",
    suffix="User input: {question}\nCypher query: ",
    input_variables=["question", "schema"],
)

NameError: name 'example_selector' is not defined

In [None]:
# Interactive loop
print("Enter your question below (type 'exit' to quit):")

while True:
    # Get user input
    user_input = input("> ")

    # Check for exit condition
    if user_input.lower() in ["exit", "quit"]:
        print("Exiting... Goodbye!")
        break

    try:
        # Invoke the chain to generate and execute the Cypher query
        result = chain2.invoke({"question": user_input, "schema": schema, "query": ""})["result"]

        # Print the result
        print("====== Response ======")
        print(result)
        print("======================\n")

    except Exception as e:
        # Handle any errors gracefully
        print(f"An error occurred: {e}")
        continue

Enter your question below (type 'exit' to quit):


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (w:Wall)<-[:USED_IN]-(m:Material)
WITH w, MIN(m.embodied_carbon_coefficient) AS min_embodied_carbon
RETURN w.id AS WallID, min_embodied_carbon
ORDER BY min_embodied_carbon ASC
LIMIT 1
[0m
Full Context:
[32;1m[1;3m[{'WallID': 1, 'min_embodied_carbon': 0.012}][0m

[1m> Finished chain.[0m
The minimum embodied carbon is 0.012.  




[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (w:Wall)<-[:USED_IN]-(m:Material)
WITH w, AVG(m.recyclability) AS average_recyclability
ORDER BY average_recyclability ASC
LIMIT 1
RETURN w.id AS WallID, average_recyclability
[0m
Full Context:
[32;1m[1;3m[{'WallID': 590, 'average_recyclability': 2.0}][0m

[1m> Finished chain.[0m
The average recyclability is 2.0.  




[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH 

# THIS IS FOR POC

In [None]:
import streamlit as st
from pyngrok import ngrok

# Start ngrok tunnel for Streamlit
public_url = ngrok.connect(8501)
print(f"Streamlit app is live at {public_url}")

# Streamlit UI
st.title("Neo4j Cypher Query Generator")
st.write("Enter your natural language question about the database, and the app will generate and execute a Cypher query.")

# Input section
user_input = st.text_input("Your Question:", placeholder="E.g., Find walls without any bio-based materials and their overall score?")

# Button to submit the query
if st.button("Generate and Execute Query"):
    if user_input:
        try:
            # Invoke the chain to generate and execute the Cypher query
            result = chain2.invoke({"question": user_input, "schema": schema, "query": ""})["result"]

            # Display the result
            st.success("Query executed successfully!")
            st.write("====== Response ======")
            st.write(result)
            st.write("======================")
        except Exception as e:
            # Display any errors
            st.error(f"An error occurred: {e}")
    else:
        st.warning("Please enter a question before submitting.")

# Footer
st.markdown("---")
st.markdown("**Disclaimer:** This is a proof-of-concept application for generating Cypher queries using Neo4j and an LLM.")



ERROR:pyngrok.process.ngrok:t=2024-12-19T08:54:30+0000 lvl=eror msg="failed to reconnect session" obj=tunnels.session err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n"
ERROR:pyngrok.process.ngrok:t=2024-12-19T08:54:30+0000 lvl=eror msg="session closing" obj=tunnels.session err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n"
ERROR:pyngrok.process.ngrok:t=2024-12-19T08:54:30+0000 lvl=eror msg="terminating with error" obj=app err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your aut

PyngrokNgrokError: The ngrok process errored on start: authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n.