In [1]:
import os
import json
from neo4j import GraphDatabase
from langchain_google_genai import ChatGoogleGenerativeAI
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

True

In [2]:
# --- Neo4j Connection and Schema ---
try:
    neo4j_uri = os.getenv("NEO4J_URI", "bolt://localhost:7687")
    neo4j_user = os.getenv("NEO4J_USER", "neo4j")
    neo4j_password = os.getenv("NEO4J_PASSWORD", "123456789")
    
    driver = GraphDatabase.driver(neo4j_uri, auth=(neo4j_user, neo4j_password))

    query = """
    MATCH (a)-[r]->(b)
    RETURN DISTINCT
            labels(a) AS from_label, keys(properties(a)) AS from_keys,
            type(r) AS rel_type, keys(properties(r)) AS rel_keys,
            labels(b) AS to_label, keys(properties(b)) AS to_keys
    """
    with driver.session() as session:
        result = session.run(query)
        schema = []
        for record in result:
            # left node
            left_label = record["from_label"][0] if record["from_label"] else "N/A"
            left_keys = ", ".join(record["from_keys"]) if record["from_keys"] else "N/A"
            left_str = f"{left_label}({left_keys})"

            # relationship
            rel_type = record["rel_type"]
            rel_keys = ", ".join(record["rel_keys"]) if record["rel_keys"] else "N/A"
            rel_str = f"{rel_type}({rel_keys})"

            # right node
            right_label = record["to_label"][0] if record["to_label"] else "N/A"
            right_keys = ", ".join(record["to_keys"]) if record["to_keys"] else "N/A"
            right_str = f"{right_label}({right_keys})"
            schema.append(f"{left_str} -[{rel_str}]-> {right_str}")

    schema = "\n".join(schema)
    print("Database Schema:")
    print(schema)

except Exception as e:
    print(f"Error connecting to Neo4j or fetching schema: {e}")
    schema = ""

Database Schema:
Element(symbol, name) -[REACTANT(ratio)]-> Reaction(equation)
Reaction(equation) -[PRODUCT(N/A)]-> Compound(name, formula)
Compound(name, formula) -[USED_IN(N/A)]-> Drug(name)
Drug(name) -[TREATS(N/A)]-> Disease(name)
Disease(name) -[AFFECTS(N/A)]-> Organism(type)


In [3]:
# --- LangChain and Generative AI Setup ---
class Neo4jConnector:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self.driver.close()

    def run_query(self, query):
        with self.driver.session() as session:
            result = session.run(query)
            return [record.data() for record in result]

In [4]:
# Configure API key from environment variable
google_api_key = os.getenv("GOOGLE_API_KEY")
if not google_api_key:
    raise ValueError("GOOGLE_API_KEY not found in environment variables")

os.environ["GOOGLE_API_KEY"] = google_api_key

# Create the model instance using LangChain with verbose=False to suppress extra output
llm_model = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0, verbose=False)

In [5]:
# --- First chain ---
def generate_cypher_query(question):
    prompt = f"""You are an expert in translating natural language to Cypher queries for Neo4j graph databases.

Database schema:
{schema}
                                                 
Question: {question}
                                            
Return only the Cypher query without any explanations.
Don't include explanations, only return the Cypher query.
and don't add the word 'cypher'"""
    
    response = llm_model.invoke(prompt)
    cypher_query = response.content
    
    # Clean up the response - remove markdown code blocks if present
    cypher_query = cypher_query.strip()
    if cypher_query.startswith("```"):
        # Remove opening code block marker
        lines = cypher_query.split("\n")
        if lines[0].startswith("```"):
            lines = lines[1:]
        # Remove closing code block marker
        if lines and lines[-1].strip() == "```":
            lines = lines[:-1]
        cypher_query = "\n".join(lines).strip()
    
    return cypher_query

In [6]:
# --- Second chain ---
def generate_response(question, results):
    prompt = f"""The user asked: "{question}"
The cypher query returned results: "{results}"
Provide a concise and clear answer based on the query results.
If there are no results, respond with "No relevant information found."
Don't mention the cypher query in your response.
Just provide the answer to the user's question."""
    
    response = llm_model.invoke(prompt)
    return response.content

In [7]:
def answer_question(question, neo4j_connector):
    # Generate Cypher query
    cypher_query = generate_cypher_query(question)

    # Run Cypher query against Neo4j
    query_results = neo4j_connector.run_query(cypher_query)
    results_str = json.dumps(query_results, indent=2)

    # Generate final answer
    answer = generate_response(question, results_str)
    return answer

In [8]:
try:
    neo4j_uri = os.getenv("NEO4J_URI", "bolt://localhost:7687")
    neo4j_user = os.getenv("NEO4J_USER", "neo4j")
    neo4j_password = os.getenv("NEO4J_PASSWORD", "123456789")
    
    neo_connector = Neo4jConnector(neo4j_uri, neo4j_user, neo4j_password)
    # user_question = "what is the formula of Aspirin?"
    user_question = "which drugs treat Headache?"
    
    print("="*60)
    print(f"Question: {user_question}")
    print("="*60)
    
    final_answer = answer_question(user_question, neo_connector)
    
    print(f"\nAnswer: {final_answer}")
    print("="*60)
    
    neo_connector.close()
except Exception as e:
    print(f"An error occurred: {e}")

Question: which drugs treat Headache?

Answer: Aspirin treats headache.

Answer: Aspirin treats headache.
