In [3]:
# ✅ Step 1: Install Required Packages
!pip install -q langchain langchain-core langchain-community \
    langchain-groq \
    sentence-transformers \
    chromadb \
    pypdf \
    huggingface-hub


[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m30.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.3/18.3 MB[0m [31m84.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m82.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.9/94.9 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.3/302.3 kB[0m [31m24.9 MB/s[0m eta [36m0:00:00[

In [None]:
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_core.documents import Document
from langchain_groq import ChatGroq
from langchain_community.vectorstores.utils import filter_complex_metadata
import os
import json

def convert_metadata(metadata):
    """Convert complex metadata values to strings"""
    new_metadata = {}
    for key, value in metadata.items():
        if isinstance(value, (list, dict)):
            new_metadata[key] = str(value)
        else:
            new_metadata[key] = value
    return new_metadata

def load_chatbot_json(file_path):
    """Loader for chatbot-like intents structure"""
    with open(file_path, 'r') as f:
        data = json.load(f)
        documents = []

        if isinstance(data, list):
            intents = data
        elif isinstance(data, dict) and "intents" in data:
            intents = data["intents"]
        else:
            intents = []

        for intent in intents:
            patterns = intent.get("patterns", [])
            responses = intent.get("responses", [])
            tag = intent.get("tag", "no_tag")

            for pattern in patterns:
                doc = Document(
                    page_content=pattern,
                    metadata=convert_metadata({
                        "tag": tag,
                        "responses": responses,
                        "source": "chatbot_json"
                    })
                )
                documents.append(doc)
        return documents

def load_context_response_json(file_path):
    """Loader for context-response JSON structure"""
    with open(file_path, 'r') as f:
        data = json.load(f)
        documents = []

        if isinstance(data, list):
            intents = data
        else:
            intents = []

        for intent in intents:
            context = intent.get("Context", "")
            response = intent.get("Response", "")

            doc = Document(
                page_content=context,
                metadata=convert_metadata({
                    "response": response,
                    "source": "context_response_json"
                })
            )
            documents.append(doc)
        return documents

def load_pdf(file_path):
    """Loader for PDF files"""
    loader = PyPDFLoader(file_path)
    pages = loader.load()
    for page in pages:
        page.metadata = convert_metadata({
            **page.metadata,
            "source": "pdf"
        })
    return pages

def create_vector_db():
    directory = "./sample_data/"
    all_documents = []

    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename)

        try:
            if filename.endswith(".json"):
                # Try both JSON formats
                try:
                    docs = load_chatbot_json(file_path)
                except:
                    docs = load_context_response_json(file_path)
                all_documents.extend(docs)

            elif filename.endswith(".pdf"):
                docs = load_pdf(file_path)
                all_documents.extend(docs)

        except Exception as e:
            print(f"Error loading {filename}: {str(e)}")
            continue

    # Filter complex metadata and split documents
    filtered_docs = [filter_complex_metadata(doc) for doc in all_documents]
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=50,
        length_function=len,
        is_separator_regex=False
    )
    texts = text_splitter.split_documents(filtered_docs)

    # Create embeddings and vector store
    embeddings = HuggingFaceBgeEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    vector_db = Chroma.from_documents(
        documents=texts,
        embedding=embeddings,
        persist_directory="./chroma_db"
    )
    vector_db.persist()
    print("Vector database created with all training data")
    return vector_db

def initialize_llm():
    llm = ChatGroq(
        temperature=0,
        groq_api_key="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",
        model_name="llama3-70b-8192"
    )
    return llm

def setup_qa_chain(vector_db, llm):
    retriever = vector_db.as_retriever()

    prompt_template = """
    You are a compassionate mental health chatbot. Use the following context to respond thoughtfully:

    Context: {context}

    User Question: {question}

    If the context contains specific responses, you may use them directly when appropriate.
    Otherwise, generate a helpful response based on your mental health knowledge.

    Chatbot Response:"""

    PROMPT = PromptTemplate(
        template=prompt_template,
        input_variables=["context", "question"]
    )

    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        chain_type_kwargs={"prompt": PROMPT},
        return_source_documents=True
    )
    return qa_chain

def main():
    print("Initializing Mental Health Chatbot...")
    llm = initialize_llm()
    db_path = "./chroma_db"

    if not os.path.exists(db_path):
        os.makedirs(db_path)
        vector_db = create_vector_db()
    else:
        embeddings = HuggingFaceBgeEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
        vector_db = Chroma(persist_directory=db_path, embedding_function=embeddings)

    qa_chain = setup_qa_chain(vector_db, llm)

    print("\nChatbot ready. Type 'exit' to end the conversation.")
    while True:
        query = input("\nUser: ")
        if query.lower() == "exit":
            print("Chatbot: Take care of yourself. Goodbye!")
            break

        result = qa_chain({"query": query})
        response = result["result"]
        sources = set(doc.metadata.get("source", "unknown") for doc in result["source_documents"])

        print(f"\nChatbot: {response}")
        print(f"[Sources: {', '.join(sources)}]")

if __name__ == "__main__":
    main()

Initializing Mental Health Chatbot...

Chatbot ready. Type 'exit' to end the conversation.

User: I have depression 

Chatbot: I'm so sorry to hear that you're struggling with depression. It takes a lot of courage to acknowledge and share about your struggles, and I'm here to support you.

Firstly, please know that you're not alone in this. Depression is a common mental health condition that affects many people, and it's not a sign of weakness or a personal failing. It's important to remember that depression is a treatable condition, and there is hope for recovery.

How have you been experiencing depression lately? What are some of the symptoms you've been struggling with, and how have they been affecting your daily life?

Remember, I'm here to listen without judgment, and everything shared with me is confidential. If you're comfortable, can you tell me a bit more about what you're going through?
[Sources: ]

User: how to  reduce stress

Chatbot: Reducing stress is such an important to