In [1]:
import os
import json
from dotenv import load_dotenv

load_dotenv()


True

In [2]:
from langchain_neo4j import Neo4jGraph, GraphCypherQAChain
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
from langchain_core.messages import AIMessage, SystemMessage, HumanMessage


In [None]:
graph = Neo4jGraph(refresh_schema=False, url="bolt://localhost:7687", username="neo4j", password="fraud_detection") #docker


In [4]:
llm = ChatGoogleGenerativeAI(
    api_key=os.getenv("GOOGLE_API_KEY"),
    model="gemini-2.0-flash"
)

# llm = ChatGroq(
#     model="llama3-8b-8192",
#     api_key=os.getenv("GROQ_API_KEY")
# )
# llm = ChatCohere(
#     api_key=os.getenv("COHERE_API_KEY")
# )


In [5]:
loader = CSVLoader("../datasets/synthetic_bank_transactions.csv")

data = loader.load()
data[0]


Document(metadata={'source': '../datasets/synthetic_bank_transactions.csv', 'row': 0}, page_content='transaction_id: 1\ntimestamp: 2025-06-04T08:32:11\namount: 89290.85\ncurrency: INR\nsender_account_id: ACC1005\nreceiver_account_id: ACC1079\nsender_customer_id: CUST024\nreceiver_customer_id: CUST028\ntransaction_type: payment\nsender_country: China\nreceiver_country: India')

In [6]:
def chunk_data(data, chunk_size=10):
    for i in range(0, len(data), chunk_size):
        yield data[i:i + chunk_size]


In [7]:
batches = list(chunk_data(data, 50))


In [8]:
print(batches[0][0])


page_content='transaction_id: 1
timestamp: 2025-06-04T08:32:11
amount: 89290.85
currency: INR
sender_account_id: ACC1005
receiver_account_id: ACC1079
sender_customer_id: CUST024
receiver_customer_id: CUST028
transaction_type: payment
sender_country: China
receiver_country: India' metadata={'source': '../datasets/synthetic_bank_transactions.csv', 'row': 0}


In [3]:
def graph_query(cql: str):
    try:
        graph.query(cql);
        return "✅ SUCCESS"
    except Exception as e:
        print(e);
        return "❌ FAILED"


In [4]:
def run_command(command):
    print("🔦 Tool called:", command)
    exit_code = os.system(command)
    if exit_code == 0:
        return "✅ Success"
    else:
        return f"❌ Failure with exit code {exit_code}"


In [5]:
def escape_braces(strings):
    return [s.replace('{', '{{').replace('}', '}}') for s in strings]


In [6]:
def escape_brace(s)->str:
    return s.replace('{', '{{').replace('}', '}}')


In [7]:
tools = {
    "graph_query":{
        "parameter": "The Cypher Query in docstring format",
        "description": "Takes cypher query and run that for creating the nodes and their relationships in neo4j graph database. Return either 'Success' or 'Failed' based on execution.",
        "fn": graph_query
    },
    "run_command": {
        "parameter": "Command in string format",
        "description": "Takes the command as a parameter, executes the command and return either the success or (failure with error message).",
        "fn" : run_command,
    },
}


In [8]:
tools_list = []
for key, value in tools.items():
    str = f"""
        {key} - {{
            "parameter": {value["parameter"]},
            "description": {value["description"]},
            "fn": {value["fn"]},
        }}
    """
    tools_list.append(str);
    
tools_list = escape_braces(tools_list);


In [9]:
messages = [
    ("system", """
      You are a Cypher Query Language expert who writes CQL for graph databases.

      You properly extract entities (nodes) by reading the input data and understanding the context. Then, you create relationships between those entities based on the inferred meaning and structure of the data.

      You follow the process below, step by step:
      think → understand → entities → relationships → action → tool (optional) → tool_response (optional) → (again action if needed) → result
      
      ### Tools
      Here is your tools:
      {tools_list}

      ### Rules
      - Do not assume relationships or entities that are not directly or logically inferred from the input.
      - Use your tools effectively whenever needed.
      - You perform only one step at a time.
      - Always respond in JSON format only.
      - Don't use ``` in your response.
      
      Output Format -
      {{ 
         "step": "<step_taken>",
         "content": "<content>"
      }}
      If you are at tool step, then use this format, so that I can execute that and provide you its response -
      {{
         "step": "tool",
         "name": "<tool_name",
         "params": "<tool_parameters>",
      }}

      ### Example
      Input: "Sophie visits a cozy café called 'The Reading Nook' in Paris. She orders a cappuccino and reads 'Pride and Prejudice' by Jane Austen. The book is part of the 'Classic Literature' genre. The café is owned by a man named Thomas, who is friends with Sophie. Sophie loves books by Jane Austen and visits this café often."

      Output: {{"step": "think", "content": "Okay, I got a story from the user, now I have to understand it carefully."}}

      Output: {{ "step": "understand", "content": "This story describes Sophie visiting a cozy café named 'The Reading Nook' in Paris. She orders a cappuccino and reads 'Pride and Prejudice' by Jane Austen. The book is in the 'Classic Literature' genre. The café is owned by Thomas, who is Sophie's friend. Sophie enjoys books by Jane Austen and often visits this café." }}

      Output: {{ "step": "entities", "content": "Entities - Entities found in the story:
      - Person: Sophie, Thomas
      - Café: The Reading Nook
      - City: Paris
      - Book: Pride and Prejudice
      - Author: Jane Austen
      - Genre: Classic Literature
      - Drink: Cappuccino" }}

      Output: {{ "step": "relationships", "content": "Relationships based on the entities:
      - Sophie VISITED The Reading Nook
      - The Reading Nook IS_IN Paris
      - Sophie READ Pride and Prejudice
      - Pride and Prejudice WAS_WRITTEN_BY Jane Austen
      - Pride and Prejudice BELONGS_TO_GENRE Classic Literature
      - Sophie ORDERED Cappuccino
      - Thomas OWNS The Reading Nook
      - Sophie IS_FRIENDS_WITH Thomas
      - Sophie LIKES_AUTHOR Jane Austen" }}

      Output: {{ "step": "action",  "content": "Now that I have all entities and relationships, I can write the Cypher queries:

      CREATE (p1:Person {{name: 'Sophie'}})
      CREATE (p2:Person {{name: 'Thomas'}})
      CREATE (c1:Cafe {{name: 'The Reading Nook'}})
      CREATE (city1:City {{name: 'Paris'}})
      CREATE (b1:Book {{title: 'Pride and Prejudice'}})
      CREATE (a1:Author {{name: 'Jane Austen'}})
      CREATE (g1:Genre {{name: 'Classic Literature'}})
      CREATE (d1:Drink {{name: 'Cappuccino'}})
      CREATE (p1)-[:VISITED]->(c1)
      CREATE (c1)-[:IS_IN]->(city1)
      CREATE (p1)-[:READ]->(b1)
      CREATE (b1)-[:WAS_WRITTEN_BY]->(a1)
      CREATE (b1)-[:BELONGS_TO_GENRE]->(g1)
      CREATE (p1)-[:ORDERED]->(d1)
      CREATE (p2)-[:OWNS]->(c1)
      CREATE (p1)-[:IS_FRIENDS_WITH]->(p2)
      CREATE (p1)-[:LIKES_AUTHOR]->(a1) "" }}

      Output: {{ "step": "tool", "name": "graph_query", "params": "CREATE (p1:Person {{name: 'Sophie'}})
      CREATE (p2:Person {{name: 'Thomas'}})
      CREATE (c1:Cafe {{name: 'The Reading Nook'}})
      CREATE (city1:City {{name: 'Paris'}})
      CREATE (b1:Book {{title: 'Pride and Prejudice'}})
      CREATE (a1:Author {{name: 'Jane Austen'}})
      CREATE (g1:Genre {{name: 'Classic Literature'}})
      CREATE (d1:Drink {{name: 'Cappuccino'}})
      CREATE (p1)-[:VISITED]->(c1)
      CREATE (c1)-[:IS_IN]->(city1)
      CREATE (p1)-[:READ]->(b1)
      CREATE (b1)-[:WAS_WRITTEN_BY]->(a1)
      CREATE (b1)-[:BELONGS_TO_GENRE]->(g1)
      CREATE (p1)-[:ORDERED]->(d1)
      CREATE (p2)-[:OWNS]->(c1)
      CREATE (p1)-[:IS_FRIENDS_WITH]->(p2)
      CREATE (p1)-[:LIKES_AUTHOR]->(a1)" }}

      Output: {{ "step":"tool_response", "content": "Success" }}

      Output: {{ "step":"action", "content": "The tool executed successfully, which means all Cypher queries were applied." }}

      Output: {{ "step":"result", "content": "All entities and relationships have been successfully created and stored in your graph database 🎉." }}
   """),
   ("human", """Generate and store entities and relationships for this:
      <data>
      {dataset}
      </data>
   """),
]


In [16]:
cql_generator_prompt = ChatPromptTemplate.from_messages(messages)


In [17]:
cql_chain = cql_generator_prompt | llm | JsonOutputParser()


In [12]:
print(str, type(str))


<class 'str'> <class 'type'>


In [11]:
del str


In [None]:
print(str, type(str))


<class 'str'> <class 'type'>


In [25]:
for i in range(1):
    # try:
        res = cql_chain.invoke(input={
            "tools_list": tools_list,
            "dataset": data[:50],
        })
        print(res);
        # res = json.loads(res);
        if res.get("step") == "result":
            break
        
        if res.get("step") == "tool":
            tool_fn = res.get("name"),
            tool_params = res.get("params")
            tool_res = tools.get(tool_fn).get("fn")(tool_params);
            
            messages.append(AIMessage(content=json.dumps({
                "step": "tool_response",
                "content": tool_res
            })))
            continue;
        
        res = escape_brace(f"{res.get("step")}-{res.get("content")}")
        # print(res)
        cql_generator_prompt.append(("ai", f"{res}"));
        # if not isinstance(res, str):
        #     res = json.dumps(res)

        # escaped_res = escape_brace(res)
        # cql_generator_prompt.append(("ai", res))

        print(len(cql_generator_prompt));
    # except Exception as e:
    #     print(e);
    #     break;


OutputParserException: Invalid json output: Each document represents a transaction, and I need to iterate through them.
For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE 

In [28]:
len(cql_generator_prompt)


3

In [24]:
cql_generator_prompt.invoke(input={
            "tools_list": tools_list,
            "dataset": data[0],
        })


ChatPromptValue(messages=[SystemMessage(content='\n      You are a Cypher Query Language expert who writes CQL for graph databases.\n\n      You properly extract entities (nodes) by reading the input data and understanding the context. Then, you create relationships between those entities based on the inferred meaning and structure of the data.\n\n      You follow the process below, step by step:\n      think → understand → entities → relationships → action → tool (optional) → tool_response (optional) → (again action if needed) → result\n\n      ### Tools\n      Here is your tools:\n      [\'\\n        graph_query - {{\\n            "parameter": The Cypher Query in docstring format,\\n            "description": Takes cypher query and run that for creating the nodes and their relationships in neo4j graph database. Return either \\\'Success\\\' or \\\'Failed\\\' based on execution.,\\n            "fn": <function graph_query at 0x000001872D987920>,\\n        }}\\n    \', \'\\n        run_

In [None]:
graph_query(res.get("content"))
