Date: 12/07/2025 <br>
Author: Wan Xuen <br>
Notebook03: Text Mining for Mental Health Chatbot <br>

In [1]:
from openai import OpenAI
from portkey_ai import PORTKEY_GATEWAY_URL, createHeaders
from transformers import AutoTokenizer, AutoModelForSequenceClassification

import torch
import re
import spacy
from textblob import TextBlob
from typing import TypedDict, Annotated, List

from langchain.tools import tool
from langchain.memory import ConversationSummaryBufferMemory
from langgraph.graph import StateGraph, END
from langchain.vectorstores import Chroma
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.schema import BaseMessage
from langdetect import detect, LangDetectException
from langchain_core.prompts import ChatPromptTemplate
from langchain_fireworks import ChatFireworks


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
embedding_func = SentenceTransformerEmbeddings(model_name="BAAI/bge-small-en-v1.5")
vectorstore = Chroma(
    persist_directory="./chroma_db",
    embedding_function=embedding_func
)

  embedding_func = SentenceTransformerEmbeddings(model_name="BAAI/bge-small-en-v1.5")
INFO:datasets:PyTorch version 2.7.1+cu118 available.
INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cuda:0
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5
  vectorstore = Chroma(
INFO:chromadb.telemetry.product.posthog:Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.


In [3]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})

### Use PortKey


In [5]:
client = OpenAI(
    api_key= GROQ_API_KEY,
    base_url=PORTKEY_GATEWAY_URL,
    default_headers=createHeaders(
        provider="groq",
        api_key= PORTKEY_API_KEY, 
    )
)

In [6]:
llm = ChatFireworks(
    model="accounts/fireworks/models/llama-v3p3-70b-instruct",
    temperature=0.7,
    api_key=FIREWORKS_API_KEY,  # Or use os.getenv("FIREWORKS_API_KEY")
)


ERROR:asyncio:Unclosed client session
client_session: <aiohttp.client.ClientSession object at 0x0000016F9CA87D90>
ERROR:asyncio:Unclosed client session
client_session: <aiohttp.client.ClientSession object at 0x0000016F9CA87110>


### Define tools

In [7]:
SYSTEM_PROMPT_EMOTION = """
You are a kind, emotionally aware assistant designed to have deep and thoughtful conversations about both happy and difficult topics.

You specialize in mental health, but you can also engage in uplifting, meaningful discussions.

Do not assume the user is in distress unless there is clear emotional language or context. Respond based on the emotional tone of the query.

If someone says hello or shares good news, respond with warmth and curiosity.

If the user expresses emotional pain, distress, or mentions self-harm or depression, prioritize emotional validation and support.

If the context is unclear or missing, politely ask for more information.

If the context is unclear or insufficient, you may also respond with:
"I'm sorry, I don't have enough information to answer that based on the context provided."

Answer in a way that is engaging, informative, and sounds natural — not robotic.
"""

tone_map = {
    "distressed": "Use a gentle, calming tone. Validate their emotions.",
    "positive": "Respond warmly and invite the user to share more.",
    "neutral": "Be supportive and open-ended. Encourage conversation.",
    "mixed": "Be compassionate and inquisitive. Let the user lead.",
    "empathetic": "Use warm and understanding language.",
    "professional": "Keep answers concise and factual.",
    "cheerful": "Use a positive and upbeat tone.",
}

SYSTEM_PROMPT_GENERAL = """
You are a helpful, friendly, and respectful AI assistant. You answer questions clearly, accurately, and concisely.

When responding:
- If you do not have enough information, say: "I'm sorry, I don't have enough information to answer that based on the context provided."
- Always use a professional, respectful, and approachable tone.
- Straightforwardly answer the question without unnecessary complexity.
- For general questions (factual, technical, or everyday knowledge), respond clearly and straight to the point.
- For technical questions, provide concise explanations and examples when needed.
- Do not offer medical, legal, or financial advice beyond general information.
- If unsure, encourage the user to seek professional help or provide general guidance.
- Stay grounded, avoid assumptions, and never be dismissive.
- Do not give so much emotional support that it distracts from the main question.

Your goal is to be efficient, supportive, and direct in your answers, especially for general cases.
Answer in a way that is engaging, informative, and sounds natural — not robotic.
"""


In [8]:
def needs_emotional_support(text):
    polarity = TextBlob(text).sentiment.polarity
    if polarity < -0.5:
        return "strong_negative"
    elif polarity < -0.1:
        return "mild_negative"
    elif polarity > 0.5:
        return "strong_positive"
    elif polarity > 0.1:
        return "mild_positive"
    else:
        return "neutral"

In [9]:
# Load BGE-Reranker
tokenizer = AutoTokenizer.from_pretrained("BAAI/bge-reranker-base")
model = AutoModelForSequenceClassification.from_pretrained("BAAI/bge-reranker-base")

def rerank(query, documents, top_k=3):
    if not documents:
        return []

    pairs = [(query, doc) for doc in documents]
    inputs = tokenizer(pairs, padding=True, truncation=True, return_tensors="pt")
    with torch.no_grad():
        scores = model(**inputs).logits.squeeze(-1)

    scored_docs = list(zip(documents, scores.tolist()))
    scored_docs.sort(key=lambda x: x[1], reverse=True)
    return [doc for doc, _ in scored_docs[:top_k]]



In [10]:

memory = ConversationSummaryBufferMemory(llm=llm, max_token_limit=1000)

@tool
def math_tool(query: str) -> str:
    """A tool that calculates the result of a math expression."""
    try:
        return f"The answer is: {eval(query)}"
    except Exception as e:
        return f"Error: {str(e)}"

def generate_response(query: str, system_prompt: str, memory: ConversationSummaryBufferMemory) -> str:
    docs = vectorstore.similarity_search(query, k=10)
    reranked = rerank(query, [d.page_content for d in docs], top_k=3)
    context = "\n\n".join(reranked)
    memory_summary = memory.load_memory_variables({}).get("history", "")

    prompt = [
        {
            "role": "system",
            "content": f"{system_prompt}\n\nPrevious Conversation:\n{memory_summary}\n\nTone Guide: {needs_emotional_support(query)}"
        },
        {
            "role": "user",
            "content": f"Using the info below:\n{context}\n\nUser: {query}"
        }
    ]

    response = llm.invoke(prompt)
    memory.chat_memory.add_user_message(query)
    memory.chat_memory.add_ai_message(response.content)

    return response.content

@tool
def mental_health_tool(query: str) -> str:
    """Answer mental health-related questions using the emotion-focused system prompt."""
    return generate_response(query, SYSTEM_PROMPT_EMOTION,memory)

@tool
def general_tool(query: str) -> str:
    """Answer general questions using the general system prompt."""
    return generate_response(query, SYSTEM_PROMPT_GENERAL,memory)



  memory = ConversationSummaryBufferMemory(llm=llm, max_token_limit=1000)


In [11]:
from langchain.agents import initialize_agent, AgentType

tools = [math_tool, mental_health_tool, general_tool]

agent_executor = initialize_agent(
    tools,
    llm,
    agent="zero-shot-react-description",# AgentType.OPENAI_FUNCTIONS,
    verbose=True,
    return_intermediate_steps=False,
    memory=memory
)


  agent_executor = initialize_agent(


In [12]:
class AgentState(TypedDict):
    query: Annotated[str, "Input"]  
    output: Annotated[str, "Final response"]
    chat_history: List[BaseMessage]
    
def route_tool(state: AgentState) -> str:
    text = state["query"].lower()

    if "calculate" in text or any(x in text for x in "+-*/=^()1234567890"):
        return "math"

    mental_keywords = [
        "sad", "depressed", "depression", "unhappy", "lonely", "anxiety", "anxious",
        "angry", "angriness", "feel lost", "feel empty", "crying", "want to cry",
        "i feel", "i'm feeling", "panic", "panic attack", "stress", "stressed",
        "overwhelmed", "helpless", "hopeless", "worthless", "not okay",
        "mental health", "emotional pain", "broken", "tired of life", "no one cares",
        "numb", "burnout", "burned out", "can’t cope", "i need help",
        "therapy", "therapist", "counseling", "support group", "feel better",
        "why do i feel", "i feel like", "i hate myself", "self harm", "hurt myself",
        "i want to talk", "i need someone", "help me feel better", "is something wrong with me",
        "i am not okay", "i'm not okay", "mental breakdown", "emotional support",
        "i feel anxious", "i feel sad", "i feel down"
    ]

    if any(keyword in text for keyword in mental_keywords):
        return "mental"

    return "general"


def language_check_condition(state: AgentState) -> str:
    try:
        lang = detect(state["query"])
        if lang != "en":
            if is_math_expression(state["query"]):
                return "non_english_math"
            else:
                return "non_english"
        return "english"
    except LangDetectException:
        if is_math_expression(state["query"]):
            return "non_english_math"
        return "non_english"

def non_english_response(state: AgentState) -> AgentState:
    if is_math_expression(state["query"]):
        return {"query": state["query"], "output": math_tool.invoke(state["query"])}
    return {
        "query": state["query"],
        "output": "Sorry, this chatbot currently only supports English."
    }

def passthrough(state: AgentState) -> AgentState:
    return state

# Tool handlers
def handle_math(state: AgentState) -> AgentState:
    return {"query": state["query"], "output": math_tool.invoke(state["query"])}

def handle_mental(state: AgentState) -> AgentState:
    return {"query": state["query"], "output": mental_health_tool.invoke(state["query"])}

def handle_general(state: AgentState) -> AgentState:
    return {"query": state["query"], "output": general_tool.invoke(state["query"])}

def error_fallback(state: AgentState) -> AgentState:
    return {"input": state["input"], "output": "Something went wrong. Please try again later."}

def is_math_expression(text: str) -> bool:
    # Basic check for math-like expressions
    return bool(re.search(r"[\d\s\+\-\*/\^\=\(\)]", text))

def is_short_input(text: str) -> bool:
    # Less than 5 words, and no math operators or keywords
    words = text.strip().split()
    if len(words) < 5 and not re.search(r"[\d\+\-\*/=]", text):
        return True
    return False

def input_check_condition(state: AgentState) -> str:
    text = state["query"].strip()
    if not text or is_short_input(text):
        return "short_input"
    return "proceed"

def short_input_handler(state: AgentState) -> AgentState:
    text = state["query"].strip()
    try:
        lang = detect(text)
    except LangDetectException:
        lang = "unknown"

    if lang != "en":
        return {
            "query": state["query"],
            "output": "It looks like your message is short and not in English. This assistant currently supports English only. Please try rephrasing your question in English with more context, and I’ll do my best to assist you!"
        }

    return {
        "query": state["query"],
        "output": "Your message seems a bit short. Could you please provide more context or details so I can better understand and help you?"
    }

def response_node(state: dict) -> dict:
    response = generate_response(state["query"], state["system_prompt"], state["memory"])
    return {
        "query": state["query"],
        "system_prompt": state["system_prompt"],
        "output": response,
        "memory": state["memory"]
    }

workflow = StateGraph(AgentState)

workflow.add_node("input_check", passthrough)
workflow.add_node("short_input", short_input_handler)
workflow.add_node("language_check", passthrough)
workflow.add_node("non_english", non_english_response)
workflow.add_node("error", error_fallback)
workflow.add_node("router", passthrough)
workflow.add_node("math", handle_math)
workflow.add_node("mental", handle_mental)
workflow.add_node("general", handle_general)
workflow.add_node("end", lambda s: s)

workflow.set_entry_point("input_check")

# Route short/long input
workflow.add_conditional_edges("input_check", input_check_condition, {
    "short_input": "short_input",
    "proceed": "language_check"
})
workflow.add_edge("short_input", "end")

# Language check flow
workflow.add_conditional_edges("language_check", language_check_condition, {
    "non_english_math": "math",
    "non_english": "non_english",
    "english": "router"
})
workflow.add_edge("non_english", "end")

workflow.add_conditional_edges("router", route_tool, {
    "math": "math",
    "mental": "mental",
    "general": "general"
})

workflow.add_edge("math", "end")
workflow.add_edge("mental", "end")
workflow.add_edge("general", "end")
workflow.add_edge("error", "end")
workflow.set_finish_point("end")

graph = workflow.compile()



In [14]:
nlp = spacy.load("en_core_web_sm")
user_input = "I am sad today because I get bad results. Can you help me understand why I feel this way?"

# Sentence segmentation
doc = nlp(user_input)
results = []
chat_history = [] 

for sent in doc.sents:
    result = graph.invoke({
        "query": sent.text.strip(),
    })
    results.append(result["output"])
    chat_history = result.get("chat_history", chat_history)

merge_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant who summarizes multiple answers into one elegant, coherent reply for a user."),
    ("user", "Here are partial answers:\n\n{answers}\n\nPlease summarize them smoothly.")
])

if len(results) == 1:
    final_output = results[0]
else:
    final_output = llm.invoke(
        merge_prompt.format_messages(answers="\n\n".join(results))
    ).content


print(final_output)


I'm so sorry to hear that you're feeling sad today. It takes a lot of courage to acknowledge and express your emotions, and I'm here to listen and support you. If you're willing, could you tell me more about what's been going on and how you're feeling? Sometimes talking through our emotions can help clarify things and gain a different perspective. What happened, and how did you feel about the results? Was it something you were hoping to do well on, or was it a surprise? I'm here to support you in exploring your emotions, and I want to help you process your feelings. By talking through what you're experiencing, including any feelings or sensations you've been having, we can work together to understand your emotions better.
