# **[GraphRAG-powered AI Assistant with FalkorDB](https://)**

In [None]:
# !pip install openai

!pip install --upgrade gradio

!pip install datasets pypdf langchain
!pip install langchain-community
!pip install langchain-openai
!pip install langchain-experimental
!pip install falkordb
!pip install langchain-groq
!pip install -U langchain-google-genai

Collecting gradio
  Downloading gradio-5.6.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.5-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.4.3 (from gradio)
  Downloading gradio_client-1.4.3-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart==0.0.12 (from gradio)
  Downloading python_multipart-0.0.12-py3-none-any.whl.metadata (1.9 kB)
Collecting ruff>=0.2.2 (from gradio)
  Downloading ruff-0.7.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metad

In [None]:

import logging
import sys

import os
import falkordb

from langchain_openai import ChatOpenAI
from langchain.chains import FalkorDBQAChain
from langchain_community.graphs import FalkorDBGraph
from langchain.prompts import PromptTemplate
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.prompts import PromptTemplate
from langchain.schema import StrOutputParser
from langchain.tools import Tool
from langchain.agents import AgentExecutor, create_react_agent
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain import hub
from langchain_community.graphs.graph_document import Node, Relationship
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_groq import ChatGroq



logging.basicConfig(stream=sys.stdout, level=logging.INFO)

In [None]:
os.environ["OPENAI_API_KEY"] = "sk-"
os.environ["GROQ_API_KEY"] = ""

# Data Ingestion

In [None]:

from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader

DOCS_PATH = "./data"

loader = DirectoryLoader(DOCS_PATH, glob="**/*.pdf", loader_cls=PyPDFLoader)

docs = loader.load()
docs

[Document(metadata={'source': 'data/XYZ Bank Savings Account.pdf', 'page': 0}, page_content="# XYZ Bank Savings Account \n## Features & Privileges \n \n### Page 1 \n \n#### Welcome to XYZ Bank Savings \nAt XYZ Bank, we are committed to helping you grow your savings and achieve your financial goals. \nOur savings account offers a range of features and privileges designed to provide you with the best \npossible banking experience. \n \nAs a customer-centric financial institution, we understand the importance of building long-term \nwealth through smart savings strategies. That's why we've crafted our savings account to not only \noffer competitive interest rates, but also a suite of tools and resources to empower you on your \nsavings journey. \n \n#### Competitive Interest Rates \nOur savings account offers industry-leading interest rates, allowing your money to grow faster. Enjoy \na base rate of 1.50% APY , with the opportunity to earn up to 2.00% APY through our promotional \nprogram

In [None]:
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(
    separator="\n\n",
    chunk_size=1000,
    chunk_overlap=200,
)

chunks = text_splitter.split_documents(docs)
chunks

[Document(metadata={'source': 'data/XYZ Bank Savings Account.pdf', 'page': 0}, page_content="# XYZ Bank Savings Account \n## Features & Privileges \n \n### Page 1 \n \n#### Welcome to XYZ Bank Savings \nAt XYZ Bank, we are committed to helping you grow your savings and achieve your financial goals. \nOur savings account offers a range of features and privileges designed to provide you with the best \npossible banking experience. \n \nAs a customer-centric financial institution, we understand the importance of building long-term \nwealth through smart savings strategies. That's why we've crafted our savings account to not only \noffer competitive interest rates, but also a suite of tools and resources to empower you on your \nsavings journey. \n \n#### Competitive Interest Rates \nOur savings account offers industry-leading interest rates, allowing your money to grow faster. Enjoy \na base rate of 1.50% APY , with the opportunity to earn up to 2.00% APY through our promotional \nprogram

In [None]:
llm = ChatOpenAI(temperature=0, model="gpt-4o-mini")
llm2 = ChatGroq(
    model="mixtral-8x7b-32768",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)


doc_transformer = LLMGraphTransformer(
    llm=llm,
    )


In [None]:
# graph = FalkorDBGraph(
#     url="redis://localhost:6379", decode_responses=True
# )

graph = FalkorDBGraph(
    host="r-6jissuruar.instance-5lf1sib9t.hc-7up0crkyn.ap-south-1.aws.f2e0a955bb84.cloud",
    username="falkordb",
    password="falkordb",
    port=52780,
    database="BFSI"
)



allowed_nodes = ["Organization", "FinancialProduct", "Feature", "Service", "Program"]
no_schema = LLMGraphTransformer(llm=llm, allowed_nodes=allowed_nodes)
data = no_schema.convert_to_graph_documents(docs)
print(data)
graph.add_graph_documents(data)


graph.refresh_schema()
print(graph.schema)

Node properties: [[OrderedDict([('label', 'Program'), ('keys', ['id'])])], [OrderedDict([('label', 'Financialproduct'), ('keys', ['id'])])], [OrderedDict([('label', 'Feature'), ('keys', ['id'])])], [OrderedDict([('label', 'Organization'), ('keys', ['id'])])], [OrderedDict([('label', 'Service'), ('keys', ['id'])])]]
Relationships properties: [[OrderedDict([('types', 'MAINTAINS'), ('keys', [])])], [OrderedDict([('types', 'OFFERED_BY'), ('keys', [])])], [OrderedDict([('types', 'HAS_FEATURE'), ('keys', [])])], [OrderedDict([('types', 'MANAGED_THROUGH'), ('keys', [])])], [OrderedDict([('types', 'PARTNERS_WITH'), ('keys', [])])], [OrderedDict([('types', 'DEVELOPS'), ('keys', [])])], [OrderedDict([('types', 'LINKED_TO'), ('keys', [])])], [OrderedDict([('types', 'COVERS'), ('keys', [])])], [OrderedDict([('types', 'PROVIDES'), ('keys', [])])], [OrderedDict([('types', 'INCLUDES'), ('keys', [])])], [OrderedDict([('types', 'OFFERS'), ('keys', [])])], [OrderedDict([('types', 'ENCOURAGES'), ('keys',

In [None]:
results = graph.query("MATCH (sa:Financialproduct) RETURN sa")


In [None]:
content_list = []
for row in results:
    node = row[0]
    print(node)

(:Financialproduct{id:"Savings Account"})
(:Financialproduct{id:"Savings_Account"})
(:Financialproduct{id:"Checking Account"})
(:Financialproduct{id:"Holiday-Themed Savings Incentives"})
(:Financialproduct{id:"Youth Savings Account"})


# Query

In [None]:
def query_graph_with_llm(
    llm,
    graph,
    user_query: str,
    system_prompt=None,
    analysis_prompt: str = """You are a financial services expert. Based on the graph query results provided,
    give a comprehensive analysis and explanation. Include relevant details about each item and how they relate
    to each other. If appropriate, suggest related products or services that might be relevant to the user.
    Format your response in a clear, structured way."""
):
    """
    Query the knowledge graph using LLM-generated Cypher queries and analyze results.

    Args:
        llm: Language model instance
        graph: FalkorDB graph instance
        user_query: Natural language query from user
        analysis_prompt: Prompt for analyzing results

    Returns:
        Dict containing query results, metadata, and analysis
    """
    try:
        # Get current schema and create dynamic system prompt
        current_schema = graph.schema

        # Format schema for better LLM understanding
        formatted_schema = format_schema_for_prompt(current_schema)

        system_prompt = f"""You are an expert at converting natural language questions into Cypher queries.
        The graph has the following schema:

        {formatted_schema}

        Return ONLY the Cypher query without any explanation or additional text.
        Make sure to use proper Cypher syntax and casing.
        Use the exact relationship types and node labels as shown in the schema."""

        # Generate Cypher query using LLM
        query_messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": f"Convert this question to a Cypher query: {user_query}"}
        ]

        cypher_query = llm.predict_messages(query_messages).content

        # Clean up the query (remove markdown formatting if present)
        cypher_query = re.sub(r'```cypher\s*|\s*```', '', cypher_query).strip()

        # Execute query
        results = graph.query(cypher_query)

        # Process results
        processed_results = []
        for row in results:
            row_data = []
            for item in row:
                if hasattr(item, 'properties'):  # Node or Relationship
                    row_data.append({
                        'type': item.labels[0] if hasattr(item, 'labels') else item.type,
                        'properties': dict(item.properties)
                    })
                else:  # Regular value
                    row_data.append(item)
            processed_results.append(row_data)

        # Format results for LLM analysis
        results_text = format_results_for_llm(processed_results)

        # Generate analysis using LLM
        analysis_messages = [
            {"role": "system", "content": analysis_prompt},
            {"role": "user", "content": f"User Question: {user_query}\n\nQuery Results:\n{results_text}\n\nPlease provide a comprehensive analysis of these results."}
        ]

        analysis = llm.predict_messages(analysis_messages).content

        return {
            'success': True,
            'query': cypher_query,
            'raw_results': processed_results,
            'analysis': analysis,
            'error': None,
            'schema_used': formatted_schema  # Include schema for reference
        }

    except Exception as e:
        return {
            'success': False,
            'query': cypher_query if 'cypher_query' in locals() else None,
            'raw_results': None,
            'analysis': None,
            'error': str(e),
            'schema_used': formatted_schema if 'formatted_schema' in locals() else None
        }


def format_schema_for_prompt(schema) :
    """
    Format the graph schema into a clear, LLM-friendly string.
    Expects schema with node_properties, relationship_properties, and relationships.

    Args:
        schema: Schema dictionary from the graph database

    Returns:
        Formatted string representation of the schema
    """
    try:
        formatted_output = "Node Types:\n"

        # Format node labels
        if "node_properties" in schema:
            nodes = [node[0]["label"] for node in schema["node_properties"]]
            for node in sorted(nodes):
                formatted_output += f"- {node}\n"

        formatted_output += "\nRelationships:\n"


        # Format relationships with their start and end nodes
        if "relationships" in schema:
            unique_relationships = set()
            for rel in schema["relationships"]:
                rel_info = rel[0]  # Get the OrderedDict
                start_node = rel_info["start"]
                end_node = rel_info["end"]
                rel_type = rel_info["type"]

                # Create a formatted relationship string
                rel_str = f"- {start_node} -[{rel_type}]-> {end_node}"
                unique_relationships.add(rel_str)

            # Add all unique relationships to output in sorted order
            for rel in sorted(unique_relationships):
                formatted_output += f"{rel}\n"

        # Add relationship types as a reference
        if "relationship_properties" in schema:
            formatted_output += "\nRelationship Types:\n"
            rel_types = [rel[0]["types"] for rel in schema["relationship_properties"]]
            for rel_type in sorted(rel_types):
                formatted_output += f"- {rel_type}\n"

        return formatted_output

    except Exception as e:
        # Fallback to simpler format if there's an error
        try:
            # Basic formatting of whatever we can extract
            output = "Schema Structure:\n"
            if "node_properties" in schema:
                output += "\nNodes:\n"
                for node in schema["node_properties"]:
                    output += f"- {node[0]['label']}\n"
            if "relationships" in schema:
                output += "\nRelationships:\n"
                for rel in schema["relationships"]:
                    output += f"- {rel[0]['type']}\n"
            return output
        except:
            # If all else fails, return raw string representation
            return f"Schema structure: {str(schema)}"

In [None]:
from typing import List, Dict, Any, Union
import re


def format_results_for_llm(results: List) -> str:
    """
    Format results in a way that's optimal for LLM analysis.

    Args:
        results: Processed query results

    Returns:
        Formatted string of results
    """
    output = ""
    for i, row in enumerate(results, 1):
        output += f"\nItem {i}:\n"
        for item in row:
            if isinstance(item, dict):
                output += f"Type: {item['type']}\n"
                output += "Properties:\n"
                for key, value in item['properties'].items():
                    output += f"  - {key}: {value}\n"
            else:
                output += f"Value: {item}\n"
        output += "---\n"
    return output


def format_final_output(query_results: Dict[str, Any], include_query: bool = True) -> str:
    """
    Format the complete results including analysis into a readable string.

    Args:
        query_results: Results dictionary from query_graph_with_llm
        include_query: Whether to include the Cypher query in output

    Returns:
        Formatted string of results and analysis
    """
    if not query_results['success']:
        return f"Error executing query: {query_results['error']}"

    output = ""
    if include_query:
        output += f"Cypher Query:\n{query_results['query']}\n\n"

    if query_results['analysis']:
        output += f"Analysis:\n{query_results['analysis']}"
    else:
        output += "No analysis available."

    return output



In [None]:
query = "What financial products are available for young customers?"
results = query_graph_with_llm(llm, graph, query)
print(format_final_output(results))


  cypher_query = llm.predict_messages(query_messages).content


Cypher Query:
MATCH (p:Product)<-[:AVAILABLE_FOR]-(c:Customer) WHERE c.age < 30 RETURN p

Analysis:
Based on the query results regarding financial products available for young customers, we can analyze and categorize the offerings into several key areas. This analysis will help young customers understand their options and how these products can meet their financial needs.

### 1. **Savings Accounts**
   - **Youth Savings Accounts**: These accounts are specifically designed for young customers, often with lower minimum balance requirements and no monthly fees. They typically offer competitive interest rates to encourage saving from an early age.
   - **Benefits**: Teaching financial responsibility, earning interest, and building a savings habit.

### 2. **Checking Accounts**
   - **Student Checking Accounts**: Tailored for students, these accounts usually come with no monthly maintenance fees and free access to ATMs. They may also offer features like mobile banking and budgeting tools.


# Chatbot

In [None]:
import gradio as gr
from typing import List, Dict, Any
import time

class GraphChatbot:
    def __init__(self, llm, graph):
        self.llm = llm
        self.graph = graph
        self.history = []

    def format_schema_for_prompt(self, schema: Dict[str, Any]) -> str:
        """Format the graph schema into a clear, LLM-friendly string"""
        try:
            formatted_output = "Node Types:\n"

            # Format node labels
            if "node_properties" in schema:
                nodes = [node[0]["label"] for node in schema["node_properties"]]
                for node in sorted(nodes):
                    formatted_output += f"- {node}\n"

            formatted_output += "\nRelationships:\n"

            # Format relationships with their start and end nodes
            if "relationships" in schema:
                unique_relationships = set()
                for rel in schema["relationships"]:
                    rel_info = rel[0]  # Get the OrderedDict
                    start_node = rel_info["start"]
                    end_node = rel_info["end"]
                    rel_type = rel_info["type"]

                    # Create a formatted relationship string
                    rel_str = f"- {start_node} -[{rel_type}]-> {end_node}"
                    unique_relationships.add(rel_str)

                # Add all unique relationships to output in sorted order
                for rel in sorted(unique_relationships):
                    formatted_output += f"{rel}\n"

            return formatted_output

        except Exception as e:
            # Fallback to basic format
            return str(schema)

    def generate_system_prompt(self) -> str:
        """Generate system prompt with current schema"""
        try:
            current_schema = self.graph.schema
            formatted_schema = self.format_schema_for_prompt(current_schema)

            return f"""You are an expert at converting natural language questions into Cypher queries.
            The graph has the following schema:

            {formatted_schema}

            Return ONLY the Cypher query without any explanation or additional text.
            Make sure to use proper Cypher syntax and casing.
            Use the exact relationship types and node labels as shown in the schema above.

            Example valid relationship patterns:
            (org:Organization)-[r:OFFERS]->(prod:FinancialProduct)
            (prod:FinancialProduct)-[r:INCLUDES]->(feat:Feature)
            (org:Organization)-[r:PROVIDES]->(serv:Service)"""

        except Exception as e:
            # Fallback to basic prompt if schema formatting fails
            return """You are an expert at converting natural language questions into Cypher queries.
            The graph has these node types: Organization, FinancialProduct, Feature, Service, Program.
            Return ONLY the Cypher query without any explanation or additional text.
            Make sure to use proper Cypher syntax and casing (e.g., FinancialProduct not Financialproduct)."""

    def generate_analysis_prompt(self) -> str:
        return """You are a financial services expert. Based on the graph query results provided,
        give a comprehensive analysis and explanation. Include relevant details about each item and how they relate
        to each other. If appropriate, suggest related products or services that might be relevant to the user.
        Format your response in a clear, structured way."""


    def format_bot_message(self, message: str) -> str:
        """Format the bot's message for better readability"""
        return message.strip()

    def format_results_for_analysis(self, results: List) -> str:
        """Format query results for LLM analysis"""
        output = "Query Results:\n"
        for i, row in enumerate(results, 1):
            output += f"\nItem {i}:\n"
            for item in row:
                if isinstance(item, dict):
                    output += f"Type: {item['type']}\n"
                    output += "Properties:\n"
                    for key, value in item['properties'].items():
                        output += f"  - {key}: {value}\n"
                else:
                    output += f"Value: {item}\n"
            output += "---\n"
        return output

    def query_and_respond(self, message: str, history: List[List[str]]) -> str:
        """Process user message and generate response"""
        try:
            # Query the graph and get analysis
            results = query_graph_with_llm(
                self.llm,
                self.graph,
                message,
                system_prompt=self.generate_system_prompt(),
                analysis_prompt=self.generate_analysis_prompt()
            )

            if not results['success']:
                return f"I apologize, but I encountered an error: {results['error']}"

            # Format results for analysis if query was successful
            if results.get('raw_results'):
                formatted_results = self.format_results_for_analysis(results['raw_results'])

                # Get structured analysis from LLM
                analysis_messages = [
                    {"role": "system", "content": self.generate_analysis_prompt()},
                    {"role": "user", "content": f"User Question: {message}\n\n{formatted_results}"}
                ]

                analysis = self.llm.predict_messages(analysis_messages).content
                return self.format_bot_message(analysis)

            return self.format_bot_message(results['analysis'])

        except Exception as e:
            return f"I apologize, but something went wrong: {str(e)}"



def create_chatbot_interface(llm, graph):
    """Create and launch the Gradio interface"""

    # Initialize chatbot
    bot = GraphChatbot(llm, graph)

    # Define the interface
    with gr.Blocks(css="footer {visibility: hidden}") as interface:
        gr.Markdown("""
        # XYZ Bank Financial Services Assistant
        Ask me anything about our financial products, services, and programs. I'll query our knowledge graph to provide detailed information.
        """)

        chatbot = gr.Chatbot(
            [],
            elem_id="chatbot",
            bubble_full_width=False,
            avatar_images=(None, "🤖"),
            height=400
        )

        with gr.Row():
            msg = gr.Textbox(
                show_label=False,
                placeholder="Ask me about our financial products and services...",
                container=False
            )
            submit = gr.Button("Send", variant="primary")

        gr.Examples(
            examples=[
                "What savings accounts do you offer?",
                "Tell me about products for young customers",
                "What are the features of your checking accounts?",
                "What programs do you have for financial education?",
            ],
            inputs=msg
        )

        gr.Markdown("""
        ### Tips:
        - Be specific in your questions for more detailed responses
        - You can ask about features, benefits, and relationships between products
        - Feel free to ask follow-up questions for clarification
        """)

        def respond(message, chat_history):
            bot_message = bot.query_and_respond(message, chat_history)
            chat_history.append((message, bot_message))
            return "", chat_history

        # Set up event handlers
        submit_click = submit.click(
            respond,
            inputs=[msg, chatbot],
            outputs=[msg, chatbot]
        )

        input_press = msg.submit(
            respond,
            inputs=[msg, chatbot],
            outputs=[msg, chatbot]
        )

    return interface

interface = create_chatbot_interface(llm, graph)
interface.launch(
    server_name="0.0.0.0",  # Makes the interface accessible from other machines
    server_port=7864,       # Specify port
    share=True              # Creates a public link
)



Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://7e2a9c4a635ffaeff9.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)






---



-----------------------------------------------------------------------------------------------------

# Bonus Feature: Add Agents

In [None]:
CYPHER_GENERATION_TEMPLATE = """Task:Generate Cypher statement to query a graph database.
Instructions:
Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.
Only include the generated Cypher statement in your response. Note: Do not include any explanations or apologies in your responses.

Always use case insensitive search when matching strings.

Schema:
{schema}

The question is:
{question}"""

cypher_generation_prompt = PromptTemplate(
    template=CYPHER_GENERATION_TEMPLATE,
    input_variables=["schema", "question"],
)

In [None]:
agent_prompt = PromptTemplate.from_template("""
You are a banking expert from XYZ Bank.
Be as helpful as possible and return as much information as possible.
Only answer questions that relate to the bank.

Always use a tool and only use the information provided in the context.

TOOLS:
------

You have access to the following tools:

{tools}

To use a tool, please use the following format:

```
Thought: Do I need to use a tool? Yes
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
```

When you have a response to say to the Human, or if you do not need to use a tool, you MUST use the format:

```
Thought: Do I need to use a tool? No
Final Answer: [your response here]
```

Begin!

Previous conversation history:
{chat_history}

New input: {input}
{agent_scratchpad}
""")


In [None]:
agent = create_react_agent(llm, tools, agent_prompt)
agent_executor = AgentExecutor(
    agent=agent,
    tools=tools,
    handle_parsing_errors=True,
    verbose=True
    )

chat_agent = RunnableWithMessageHistory(
    agent_executor,
    get_by_session_id,
    input_messages_key="input",
    history_messages_key="chat_history",
)

def generate_response(user_input):
    """
    Create a handler that calls the Conversational agent
    and returns a response to be rendered in the UI
    """

    response = chat_agent.invoke(
        {"input": user_input},
        {"configurable": {"session_id": get_session_id()}},)

    return response['output']