# RAG-based Mental Health Chatbot

This notebook implements a RAG (Retrieval-Augmented Generation) chatbot for mental health support using Google Gemini and LangChain.

In [None]:
%pip install -q -U langchain langchain-google-genai langchain-community pypdf faiss-cpu google-generativeai langchain-huggingface sentence-transformers chainlit

In [None]:
import os
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

# Set API Keys
os.environ["GOOGLE_API_KEY"] = "AIzaSyCPToG3uYMof94dduZ2W0nfjQSXPz6Gyz4"
# os.environ["HF_TOKEN"] = "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" # Not strictly needed for local embeddings

## 1. Load Data
Loading PDF documents from the `content/data` directory.

In [None]:
pdf_directory = 'content/data'
documents = []

if os.path.exists(pdf_directory):
    for file in os.listdir(pdf_directory):
        if file.endswith('.pdf'):
            file_path = os.path.join(pdf_directory, file)
            print(f"Loading {file_path}...")
            loader = PyPDFLoader(file_path)
            documents.extend(loader.load())
    print(f"Total documents loaded: {len(documents)}")
else:
    print(f"Directory {pdf_directory} not found.")

## 2. Process Text
Split the loaded documents into smaller chunks for embedding.

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(documents)
print(f"Total text chunks: {len(texts)}")

## 3. Create Vector Store
Generate embeddings using HuggingFace's `all-MiniLM-L6-v2` model and store them in a FAISS index.

In [None]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Create FAISS vector store
vector_store = FAISS.from_documents(texts, embeddings)

# Save locally
vector_store.save_local("faiss_index_local")
print("Vector store created and saved.")

## 4. Setup Retrieval Chain
Configure the RAG chain using the Gemini Pro model.

In [None]:
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0.3)

prompt_template = """You are an empathetic and professional mental health chatbot assistant.
Use the following pieces of context to answer the user's question. 
If the answer is not in the context, generally answer from your knowledge but mention that this specific information wasn't in the provided documents.
Always maintain a supportive tone.

Context: {context}

Question: {question}

Answer:"""

PROMPT = PromptTemplate(
    template=prompt_template, 
    input_variables=["context", "question"]
)

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vector_store.as_retriever(search_kwargs={"k": 5}),
    chain_type_kwargs={"prompt": PROMPT}
)

## 5. Test the Chatbot

In [None]:
query = "What are some common signs of anxiety?"
result = qa_chain.invoke(query)
print("Q:", query)
print("A:", result['result'])

In [None]:
query = "How can I help a friend who is depressed?"
result = qa_chain.invoke(query)
print("Q:", query)
print("A:", result['result'])

## 6. Create Chainlit App Interface
Run the following cell to create the `app.py` file. You can then run it from the terminal using:
`chainlit run app.py -w`

In [None]:
%%writefile app.py
import os
import chainlit as cl
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

# Set API Keys
os.environ["GOOGLE_API_KEY"] = "AIzaSyCPToG3uYMof94dduZ2W0nfjQSXPz6Gyz4"

# Load Embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Load Vector Store
# Make sure 'faiss_index_local' was generated by the notebook cells above
vector_store = FAISS.load_local("faiss_index_local", embeddings, allow_dangerous_deserialization=True)

# Setup LLM
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0.3)

prompt_template = """You are an empathetic and professional mental health chatbot assistant.
Use the following pieces of context to answer the user's question. 
If the answer is not in the context, generally answer from your knowledge but mention that this specific information wasn't in the provided documents.
Always maintain a supportive tone.

Context: {context}

Question: {question}

Answer:"""

PROMPT = PromptTemplate(
    template=prompt_template, 
    input_variables=["context", "question"]
)

@cl.on_chat_start
async def start():
    chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=vector_store.as_retriever(search_kwargs={"k": 5}),
        chain_type_kwargs={"prompt": PROMPT}
    )
    cl.user_session.set("chain", chain)
    await cl.Message(content="Hello! I'm here to support you. How are you feeling today?").send()

@cl.on_message
async def main(message: cl.Message):
    chain = cl.user_session.get("chain")
    # Async callback handler for streaming if supported, though RetrievalQA is synchronous by default unless using .acall or similar with async components
    res = await chain.ainvoke(message.content)
    answer = res["result"]
    
    await cl.Message(content=answer).send()
