In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/cx-agent-dataset/faq.csv
/kaggle/input/cx-agent-dataset/tickets.csv
/kaggle/input/cx-agent-dataset/user_info.csv
/kaggle/input/cx-agent-dataset/payments.csv


# 1. Introduction and Problem statement

Customer support is at the heart of user satisfaction, yet it’s often one of the most resource-draining functions in any organization. Whether you’re in property tech, e-commerce, travel, or SaaS — the need for accurate, fast, and scalable support is universal.

While the use case we’ll explore is rooted in the property tech industry, the architecture is highly adaptable and can be applied across domains. We’ll focus on modular tooling, smart decision routing, and scalable design choices that help take your project from prototype to production-ready.


**Why Now? The Case for Smarter Customer Support**

As businesses grow, so do customer queries — often faster than our support team can scale. Most companies tackle this by hiring more agents, but that’s rarely sustainable.

Enter chatbots.

Modern AI agents, powered by LLMs, aren’t just rule-based bots responding to keywords. With the ability to understand context, fetch relevant information, and respond conversationally, they’re redefining how support can work — handling 60–70% of queries autonomously while escalating only the truly complex cases.

In [1]:
#Libraries 
!pip install -q \
  langchain \
  langchain-huggingface \
  langchain_community \
  langchainhub \
  langchain-groq \
  chromadb \
  sentence-transformers \
  huggingface-hub \
  python-dotenv \
  langchain-mistralai \
  streamlit \
  langgraph

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.5/43.5 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m35.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m38.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.3/18.3 MB[0m [31m37.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0mm
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m57.4 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.9

In [6]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

In [7]:
#Accessing relevant keys
Groq_key = user_secrets.get_secret("Groq")
Mistral_key = user_secrets.get_secret("Mistral")

# Embedding & vector store settings
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
VECTOR_STORE_PATH = "/kaggle/input/cx-agent-dataset"

# Retrieval settings
TOP_K = 3

# LLM parameters
TEMPERATURE = float(0.2)
MAX_LENGTH = int(512)
TOP_P = float(0.95)

# Placeholder for vector store instance
VECTOR_STORE = None

# 2. Data source

For demonstration purposes, we’ve used CSV files to simulate database connections. In a production setting, you can replace these with direct connections to relational databases like PostgreSQL, MySQL, or even SQLite during local development.

1. FAQ: Contains frequently asked questions that don’t require human intervention. These are chunked, embedded using MistralAI embeddings, and stored in ChromaDB for RAG-based retrieval.
2. Tickets: Stores support tickets created by users for follow-up or escalation.
3. Payment info: Includes user payment details etc.
4. User Info: Includes personal data such as room numbers, move-in status, etc.

In [8]:
faq_path = '/kaggle/input/cx-agent-dataset/faq.csv'
ticket_path = '/kaggle/input/cx-agent-dataset/tickets.csv'
payment_path = '/kaggle/input/cx-agent-dataset/tickets.csv'
user_info_path = '/kaggle/input/cx-agent-dataset/user_info.csv'

# 3. Agent tools

We have developed four tools that agent can access to retreive relevant information
1. FAQ tool : This convert the existing FAQ and user RAG to convert into vector database, any FAQ reletated question can be answered from there
2. Payment tool : Give user payment information 
3. Ticket tool: Give user ticket information
4. User info tool: Give user personal information

In [9]:
#Faq tool

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import CSVLoader
from langchain_community.vectorstores import Chroma
from langchain_mistralai import MistralAIEmbeddings

def load_documents(faq_path: str):
    """Load documents from the FAQ CSV file."""
    loader = CSVLoader(file_path=faq_path, source_column="question")
    return loader.load()


def split_into_chunks(data, chunk_size=150, chunk_overlap=2):
    """Split documents into smaller chunks."""
    splitter = RecursiveCharacterTextSplitter(
        separators=["\n\n", "\n", " ", ""],
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap
    )
    all_chunks = []
    for doc in data:
        chunks = splitter.split_text(doc.page_content)
        for chunk in chunks:
            all_chunks.append(doc.__class__(page_content=chunk, metadata=doc.metadata))
    return all_chunks


def create_vector_store(chunks, Mistral_key):
    embeddings_model = MistralAIEmbeddings(model="mistral-embed",mistral_api_key=Mistral_key)

    vector_store = Chroma.from_documents(
        documents=chunks,
        embedding=embeddings_model,
        collection_name="faq_collection",
        persist_directory=None
    )
    
    return vector_store


def get_faq_answer(user_query: str, vector_store, TOP_K) -> str:
    """Retrieve an answer from the FAQ vector store based on the user query."""
    retriever = vector_store.as_retriever(
        search_type="similarity",
        search_kwargs={"k": TOP_K}
    )
    relevant_docs = retriever.get_relevant_documents(user_query)
    if relevant_docs:
        return relevant_docs[0].page_content
    else:
        return "I couldn't find an answer to your question in our FAQ database."


def initialize_vector_store():
    """Initialize vector store from the FAQ data."""
    documents = load_documents(faq_path)
    chunks = split_into_chunks(documents)
    vector_store = create_vector_store(chunks,Mistral_key)
    return vector_store

In [11]:
#Payment_tool
import pandas as pd

def load_payment_data(payment_path):
    """Load payment data from CSV."""
    payment_data = pd.read_csv(payment_path)
    return payment_data


def get_payment_status(user_id: str, payment_path):
    """
    Get the latest payment info for a user.

    Args:
        user_id (str): The user's ID.
        file_path (str): Path to the payments CSV file.

    Returns:
        str: A human-readable summary of the payment.
    """
    df = load_payment_data(payment_path)
    
    # Filter and sort by due_date descending (latest first)
    user_payments = df[df["user_id"] == user_id].sort_values(by="due_date", ascending=False)

    if user_payments.empty:
        return "I couldn't find any payment information for your account."

    latest_payment = user_payments.iloc[0]
    
    summary = (
        f"Your last recorded payment is ₹{latest_payment['amount']} "
        f"which was due on {latest_payment['due_date']}. "
        f"{'GST was applied.' if latest_payment['gst_applied'].lower() == 'yes' else 'GST was not applied.'} "
        f"Current payment status: {latest_payment['status']}."
    )
    
    return summary

In [12]:
#Ticket tool
import pandas as pd
from datetime import datetime

def load_ticket_data(ticket_path):
    """Load ticket data from CSV."""
    ticket_data = pd.read_csv(ticket_path)
    return ticket_data

def get_open_ticket_status(user_id: str, ticket_path):
    """
    Get the latest open ticket info for a user.

    Args:
        user_id (str): The user's ID.
        file_path (str): Path to the tickets CSV file.

    Returns:
        str: Status of open ticket or a message if none found.
    """
    df = load_ticket_data(ticket_path)
    open_tickets = df[(df["user_id"] == user_id) & (df["status"] == "open")]

    if open_tickets.empty:
        return "You don't have any open support tickets at the moment."

    latest_ticket = open_tickets.sort_values(by="created_at", ascending=False).iloc[0]
    return (
        f"Your open ticket (ID: {latest_ticket['ticket_id']}) "
        f"is regarding: '{latest_ticket['issue']}'. It is currently marked as: {latest_ticket['status']}."
    )


def create_ticket(user_id: str, issue: str, ticket_path):
    """
    Create a new support ticket for a user.

    Args:
        user_id (str): The user's ID.
        issue (str): A summary of the unresolved issue.
        file_path (str): Path to the tickets CSV file.

    Returns:
        str: Confirmation message with ticket ID.
    """
    df = load_ticket_data(ticket_path)

    new_ticket = {
        "ticket_id": f"T{int(datetime.now().timestamp())}",
        "user_id": user_id,
        "issue": issue,
        "status": "open",
        "created_at": datetime.now().strftime("%Y-%m-%d %H:%M")
    }

    df = pd.concat([df, pd.DataFrame([new_ticket])], ignore_index=True)
    df.to_csv(file_path, index=False)

    return f"A support ticket has been created for you. Your ticket ID is {new_ticket['ticket_id']}."

In [15]:
#User info tool

import pandas as pd

def load_user_info(user_info_path):
    """Load user info data from CSV."""
    return pd.read_csv(user_info_path)


def get_user_info(user_id: str, user_info_path):
    """
    Get user profile info (room, city, check-in date, etc.).

    Args:
        user_id (str): The user's ID.
        file_path (str): Path to the user info CSV file.

    Returns:
        str: A summary of the user's info or an error message.
    """
    df = load_user_info(user_info_path)
    user_row = df[df["user_id"] == user_id]

    if user_row.empty:
        return "Sorry, I couldn't find your profile information."

    user = user_row.iloc[0]
    return (
        f"Here's your profile info:\n"
        f"Name: {user['name']}\n"
        f"Room No: {user['room_no']}\n"
        f"City: {user['city']}\n"
        f"Check-in Date: {user['checkin_date']}"
    )

# 4. Langraph Agents nodes 
1. Classify Node: This node classifies the user’s query into predefined categories such as FAQ, ticket, payment, or user info, helping route the request accordingly.

2. Escalation Node: This node determines when to escalate a query, triggering the creation of a support ticket for unresolved or complex issues.

3. Payment Response Node: This node handles queries related to payments by querying the payment database and generating a response based on user details.

4. Ticket Response Node: This node retrieves ticket information for follow-up queries, allowing the system to provide updates or escalate as necessary.

5. User Info Node: This node retrieves and returns user-specific information, such as room numbers or move-in status, from the user info database.

6. FAQ Response Node: This node uses RAG to fetch the most relevant FAQ data, enabling the system to generate accurate and context-aware responses to general queries.

7. LangGraph Node: This node orchestrates the flow of tasks, ensuring the proper handling of responses based on the query classification and user context.

In [16]:
#Classifying node

from langchain_groq import ChatGroq
from langchain.schema import SystemMessage, HumanMessage

def classify_node(state: dict) -> dict:
    """
    Classify user intent using Groq's LLM via LangChain.

    Input state must contain:
    - 'user_query': the raw query
    - 'user_id': the user's ID

    Returns updated state with:
    - 'intent': one of ['faq', 'ticket', 'payment', 'user_info', 'fallback']
    """
    user_query = state.get("user_query", "")

    # Updated prompt for better classification logic
    system_prompt = SystemMessage(content="""
You are a classification assistant for a residential support chatbot.

Classify user queries into one of the following:

- faq: If the user is asking about general info, help, or known issues like payment failures, service availability, policies, etc.
- ticket: Only if the user is **raising a personal complaint or service request** (e.g., "My Wi-Fi is not working", "There is a leak in my bathroom").
- payment: If the user asks about rent, GST, amount paid, due date, refunds, etc.
- user_info: If the user asks about their personal info like room number, check-in, ID details, address, etc.
- fallback: If you are not sure or the user is just greeting or unclear.

Examples:
Query: What time is check-in? → faq  
Query: My AC is not working → ticket  
Query: When is my rent due? → payment  
Query: What is my room number? → user_info  
Query: Hello → fallback

Only respond with one word: faq, ticket, payment, user_info, or fallback.
""")

    human_prompt = HumanMessage(content=f"Query: {user_query}\nAnswer:")

    # Use Groq’s LLM
    llm = ChatGroq(groq_api_key=Groq_key,model_name="llama3-70b-8192")

    response = llm.invoke([system_prompt, human_prompt])
    intent = response.content.strip().lower()

    return {**state, "intent": intent}

In [17]:
#Escalation node
from langchain_groq import ChatGroq
from langchain.schema import HumanMessage, SystemMessage


def summarize_issue(user_query: str) -> str:
    """Use Groq LLM to summarize user query for ticket description."""
    llm = ChatGroq(groq_api_key=Groq_key, model_name="llama3-70b-8192")

    prompt = f"Summarize the following issue clearly for creating a support ticket:\n\n{user_query}"
    response = llm.invoke([
        SystemMessage(content="You are a helpful assistant for writing support ticket summaries."),
        HumanMessage(content=prompt)
    ])
    return response.content.strip()


def escalation_node(state: dict) -> dict:
    """
    Escalates unresolved queries by creating a support ticket.
    
    Requires:
    - 'user_id'
    - 'user_query'
    - 'feedback' == 'no'
    
    Returns:
    - 'ticket_id'
    - 'final_response'
    - 'escalation_status'
    """
    user_id = state.get("user_id")
    user_query = state.get("user_query")
    feedback = state.get("feedback", "").lower()

    if feedback != "no":
        return {**state, "escalation_status": "not_triggered"}

    #Summarize issue with LLM
    issue_summary = summarize_issue(user_query)

    #Create ticket using your existing tool
    ticket_id = create_ticket(user_id, issue_summary, file_path= ticket_path)

    return {
        **state,
        "ticket_id": ticket_id,
        "escalation_status": "ticket_created",
        "final_response": f"A support ticket has been created for you. Your ticket ID is {ticket_id}."
    }

In [18]:
#faq node

def faq_response_node(state: dict) -> dict:
    """
    Responds to a user query classified as 'faq' using the FAQ vector store.

    Requires:
    - 'user_query' key in state

    Returns:
    - Updated state with 'final_response'
    """
    print("✅ Running updated faq_response_node...")

    user_query = state.get("user_query")


    try:
        # Use top 3 most relevant documents
        answer = get_faq_answer(user_query, vector_store=VECTOR_STORE, TOP_K=3)
    except Exception as e:
        answer = f"Error while fetching answer from FAQ: {e}"

    return {
        **state,
        "final_response": answer
    }



In [19]:
#Payment response node
def payment_response_node(state: dict) -> dict:
    """
    Responds to a user query classified as 'payment' by fetching payment details.
    
    Requires:
    - 'user_id'
    
    Returns:
    - Updated state with 'final_response'
    """
    user_id = state.get("user_id")
    answer = get_payment_status(user_id, file_path= payment_path)

    return {
        **state,
        "final_response": answer
    }

In [21]:
#Ticket response node
def ticket_response_node(state: dict) -> dict:
    """
    Responds to a user query classified as 'ticket' by fetching open ticket status.
    
    Requires:
    - 'user_id'
    
    Returns:
    - Updated state with 'final_response'
    """
    user_id = state.get("user_id")
    answer = get_open_ticket_status(user_id, file_path= ticket_path)

    return {
        **state,
        "final_response": answer
    }

In [22]:
#User response node

def user_info_response_node(state: dict) -> dict:
    """
    Responds to a user query classified as 'user_info' by fetching user profile details.
    
    Requires:
    - 'user_id'
    
    Returns:
    - Updated state with 'final_response'
    """
    user_id = state.get("user_id")
    answer = get_user_info(user_id, file_path= user_info_path)

    return {
        **state,
        "final_response": answer
    }

In [23]:
#Langraph flow
from langgraph.graph import StateGraph, END
from typing import TypedDict, Optional


# Define the state schema using TypedDict
class AgentState(TypedDict):
    user_query: str
    user_id: str
    intent: str
    feedback: str
    ticket_id: Optional[str]
    escalation_status: Optional[str]
    final_response: Optional[str]


# Define intent-based routing function
def intent_router(state: AgentState) -> str:
    return state.get("intent", "fallback")


# Define feedback-based routing function
def feedback_router(state: AgentState) -> str:
    return state.get("feedback", "yes")


# Create LangGraph flow
graph = StateGraph(AgentState)

# Add nodes
graph.add_node("classify", classify_node)
graph.add_node("faq_response", faq_response_node)
graph.add_node("ticket_response", ticket_response_node)
graph.add_node("payment_response", payment_response_node)
graph.add_node("user_info_response", user_info_response_node)
graph.add_node("escalate", escalation_node)

# Entry point
graph.set_entry_point("classify")

# Conditional routing based on intent
graph.add_conditional_edges("classify", intent_router, {
    "faq": "faq_response",
    "ticket": "ticket_response",
    "payment": "payment_response",
    "user_info": "user_info_response",
    "fallback": "escalate"
})

# Conditional routing from each response node based on feedback
graph.add_conditional_edges("faq_response", feedback_router, {"no": "escalate","yes": END})
graph.add_conditional_edges("ticket_response", feedback_router, {"no": "escalate","yes": END})
graph.add_conditional_edges("payment_response", feedback_router, {"no": "escalate","yes": END})
graph.add_conditional_edges("user_info_response", feedback_router, {"no": "escalate","yes": END})

# Escalation ends the flow
graph.add_edge("escalate", END)

# Compile the graph
cx_agent_graph = graph.compile()

#Run agent code
def run_agent(user_query, user_id, feedback="yes"):
    initial_state = {
        "user_query": user_query,
        "user_id": user_id,
        "feedback": feedback
    }
    final_state = cx_agent_graph.invoke(initial_state)
    return final_state

# 5. Running file

In [24]:
# Initialize the vector store
VECTOR_STORE = initialize_vector_store()

def main():
    print("=== CX Agent for STANZA LIVING ===\n")

    user_id = input("Enter your user ID: ").strip()
    if not user_id:
        print("User ID is required to proceed.")
        return

    print("\nYou can type 'exit' anytime to quit.\n")

    while True:
        user_query = input("Please mention what you are looking for: ").strip()

        if user_query.lower() == "exit":
            print("Goodbye!")
            break

        # Call the agent to process the query
        response = run_agent(user_query=user_query, user_id=user_id)

        print(f"\nAgent: {response.get('final_response')}\n")

        # Ask for feedback
        feedback = input("Was this issue resolved? (yes/no): ").strip().lower()

        if feedback == "yes":
            print("Thank you for your feedback. Have a great day!")
            break

        # If issue was not resolved, show ticket if created
        if response.get("escalation_status") == "ticket_created":
            print(f"Ticket ID: {response.get('ticket_id')}\n")

if __name__ == "__main__":
    main()




=== CX Agent for STANZA LIVING ===



Enter your user ID:  U101



You can type 'exit' anytime to quit.



Please mention what you are looking for:  How often is housekeeping provided?


✅ Running updated faq_response_node...


  relevant_docs = retriever.get_relevant_documents(user_query)



Agent: question: How often is housekeeping provided?
answer: Housekeeping is done once every two days for shared areas and once a week for personal rooms.
:



Was this issue resolved? (yes/no):  yes


Thank you for your feedback. Have a great day!
