In [1]:
%pwd


'd:\\sih\\mental_health_platform\\research'

In [2]:
import os
os.chdir('../')

In [3]:
%pwd

'd:\\sih\\mental_health_platform'

In [4]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# Extract text from PDF files
def load_pdf_files(data):
    loader = DirectoryLoader(
        data,
        glob="*.pdf",
        loader_cls=PyPDFLoader
    )

    documents = loader.load()
    return documents

In [6]:
extracted_data = load_pdf_files("data")

In [7]:
extracted_data

[Document(metadata={'producer': 'ReportLab PDF Library - www.reportlab.com', 'creator': '(unspecified)', 'creationdate': '2025-09-20T06:05:40+00:00', 'author': '(anonymous)', 'keywords': '', 'moddate': '2025-09-20T06:05:40+00:00', 'subject': '(unspecified)', 'title': '(anonymous)', 'trapped': '/False', 'source': 'data\\mental_health_full_knowledge_base.pdf', 'total_pages': 4, 'page': 0, 'page_label': '1'}, page_content='Anxiety Disorders\nDefinition: A group of conditions involving excessive fear, worry, or nervousness.\nSymptoms: Restlessness, racing heart, sweating, difficulty concentrating, sleep problems.\nRemedies/Management: CBT, relaxation techniques, SSRIs, mindfulness, exercise.\nAttention-Deficit/Hyperactivity Disorder (ADHD)\nDefinition: A neurodevelopmental disorder marked by inattention, hyperactivity, and impulsivity.\nSymptoms: Difficulty sustaining attention, fidgeting, impulsiveness, poor organization.\nRemedies/Management: Behavioral therapy, medications, structured r

In [8]:
len(extracted_data)

4

In [9]:
from typing import List
from langchain.schema import Document

def filter_to_minimal_docs(docs: List[Document]) -> List[Document]:
    """
    Given a list of Document objects, return a new list of Document objects
    containing only 'source' in metadata and the original page_content.
    """
    minimal_docs: List[Document] = []
    for doc in docs:
        src = doc.metadata.get("source")
        minimal_docs.append(
            Document(
                page_content=doc.page_content,
                metadata={"source": src}
            )
        )
    return minimal_docs

In [10]:
minimal_docs = filter_to_minimal_docs(extracted_data)

In [11]:
minimal_docs

[Document(metadata={'source': 'data\\mental_health_full_knowledge_base.pdf'}, page_content='Anxiety Disorders\nDefinition: A group of conditions involving excessive fear, worry, or nervousness.\nSymptoms: Restlessness, racing heart, sweating, difficulty concentrating, sleep problems.\nRemedies/Management: CBT, relaxation techniques, SSRIs, mindfulness, exercise.\nAttention-Deficit/Hyperactivity Disorder (ADHD)\nDefinition: A neurodevelopmental disorder marked by inattention, hyperactivity, and impulsivity.\nSymptoms: Difficulty sustaining attention, fidgeting, impulsiveness, poor organization.\nRemedies/Management: Behavioral therapy, medications, structured routines, lifestyle\nmanagement.\nAutism Spectrum Disorder (ASD)\nDefinition: A developmental disorder that affects communication, behavior, and social interaction.\nSymptoms: Difficulty with social cues, repetitive behaviors, intense focus on interests, sensory\nsensitivities.\nRemedies/Management: Behavioral therapy, speech/occup

In [12]:
# Split the documents into smaller chunks
def text_split(minimal_docs):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=2,
        chunk_overlap=1,
    )
    texts_chunk = text_splitter.split_documents(minimal_docs)
    return texts_chunk

In [14]:
texts_chunk = text_split(minimal_docs)
print(f"Number of chunks: {len(texts_chunk)}")

Number of chunks: 5839


In [15]:
texts_chunk

[Document(metadata={'source': 'data\\mental_health_full_knowledge_base.pdf'}, page_content='An'),
 Document(metadata={'source': 'data\\mental_health_full_knowledge_base.pdf'}, page_content='nx'),
 Document(metadata={'source': 'data\\mental_health_full_knowledge_base.pdf'}, page_content='xi'),
 Document(metadata={'source': 'data\\mental_health_full_knowledge_base.pdf'}, page_content='ie'),
 Document(metadata={'source': 'data\\mental_health_full_knowledge_base.pdf'}, page_content='et'),
 Document(metadata={'source': 'data\\mental_health_full_knowledge_base.pdf'}, page_content='ty'),
 Document(metadata={'source': 'data\\mental_health_full_knowledge_base.pdf'}, page_content='D'),
 Document(metadata={'source': 'data\\mental_health_full_knowledge_base.pdf'}, page_content='Di'),
 Document(metadata={'source': 'data\\mental_health_full_knowledge_base.pdf'}, page_content='is'),
 Document(metadata={'source': 'data\\mental_health_full_knowledge_base.pdf'}, page_content='so'),
 Document(metadata={'

In [16]:
from langchain.embeddings import HuggingFaceEmbeddings

def download_embeddings():
    """
    Download and return the HuggingFace embeddings model.
    """
    model_name = "sentence-transformers/all-MiniLM-L6-v2"
    embeddings = HuggingFaceEmbeddings(
        model_name=model_name
    )
    return embeddings

embedding = download_embeddings()

  embeddings = HuggingFaceEmbeddings(


In [17]:
embedding

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [18]:
vector = embedding.embed_query("Hello world")
vector

[-0.03447727859020233,
 0.031023195013403893,
 0.0067350007593631744,
 0.02610897831618786,
 -0.03936203941702843,
 -0.16030246019363403,
 0.06692393869161606,
 -0.006441489793360233,
 -0.04745043069124222,
 0.014758850447833538,
 0.07087530940771103,
 0.05552756413817406,
 0.019193332642316818,
 -0.026251299306750298,
 -0.010109570808708668,
 -0.026940522715449333,
 0.022307388484477997,
 -0.022226659581065178,
 -0.1496925801038742,
 -0.017493056133389473,
 0.00767620699480176,
 0.05435231328010559,
 0.003254441311582923,
 0.03172589838504791,
 -0.08462132513523102,
 -0.02940601296722889,
 0.0515955425798893,
 0.048124007880687714,
 -0.003314810572192073,
 -0.05827917903661728,
 0.04196931794285774,
 0.022210605442523956,
 0.12818878889083862,
 -0.022338947281241417,
 -0.011656264774501324,
 0.06292837113142014,
 -0.03287631645798683,
 -0.09122605621814728,
 -0.031175334006547928,
 0.052699584513902664,
 0.04703482240438461,
 -0.0842030793428421,
 -0.030056186020374298,
 -0.0207448396

In [19]:
print( "Vector length:", len(vector))

Vector length: 384


In [20]:
from dotenv import load_dotenv
import os
load_dotenv()

True

In [22]:
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")


os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
os.environ["GEMINI_API_KEY"] = GEMINI_API_KEY

In [23]:
from pinecone import Pinecone 
pinecone_api_key = PINECONE_API_KEY

pc = Pinecone(api_key=pinecone_api_key)

In [24]:
pc

<pinecone.pinecone.Pinecone at 0x204c4ecc100>

In [25]:
from pinecone import ServerlessSpec 

index_name = "mindcare"

if not pc.has_index(index_name):
    pc.create_index(
        name = index_name,
        dimension=384,  # Dimension of the embeddings
        metric= "cosine",  # Cosine similarity
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )


index = pc.Index(index_name)

In [26]:
from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_documents(
    documents=texts_chunk,
    embedding=embedding,
    index_name=index_name
)

In [27]:
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":3})

In [28]:
retrieved_docs = retriever.invoke("What is Stress?")
retrieved_docs

[Document(id='75c85d99-d7a5-453e-96fd-704bf6e3e24c', metadata={'source': 'data\\mental_health_full_knowledge_base.pdf'}, page_content='fu'),
 Document(id='1ebe03e3-23e8-450f-b3e6-fe584bab3e6d', metadata={'source': 'data\\mental_health_full_knowledge_base.pdf'}, page_content='fu'),
 Document(id='2156e9df-70e7-4520-b956-6f55ea357d03', metadata={'source': 'data\\mental_health_full_knowledge_base.pdf'}, page_content='fu')]

In [30]:
from langchain_google_genai import ChatGoogleGenerativeAI


chatModel = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    google_api_key=os.getenv("GEMINI_API_KEY")  # explicitly pass key
)

In [31]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

In [32]:
system_prompt = (
    "You are a supportive mental health companion. Your role is to engage users in empathetic, non-judgmental "
    "conversation. Do not give medical diagnoses or prescriptions. Instead, ask gentle, open-ended questions "
    "to help users reflect on their emotions, behaviors, sleep, energy, and daily life. "
    "If users do not directly say they are struggling, gradually explore their mood by asking caring questions "
    "and noticing patterns that may suggest stress, anxiety, or depression. "
    "Respond warmly, and suggest healthy coping strategies such as journaling, deep breathing, grounding "
    "exercises, or reaching out to trusted people. "
    "If the user expresses suicidal thoughts, self-harm, or crisis, respond with empathy, encourage them to reach "
    "out to someone they trust immediately, and provide crisis hotline information if possible. "
    "Always make it clear you are not a medical professional, and remind them that seeking professional help "
    "from a counselor or doctor is important for their well-being. "
    "Always maintain a compassionate, safe, and respectful tone. "
    "\n\n"
    "When starting a conversation, use gentle openers such as:\n"
    "- 'How have you been feeling these days?'\n"
    "- 'What’s been on your mind lately?'\n"
    "- 'If you had to describe your week in one word, what would it be?'\n"
    "- 'How has your sleep been recently? Do you feel rested when you wake up?'\n"
    "- 'Do you still enjoy the things you usually like to do?'\n"
    "- 'Have you noticed changes in your energy or motivation?'\n"
    "- 'Do you feel connected with friends and family, or more distant than before?'\n"
    "Based on their answers, continue with empathetic follow-up questions and gentle reflections."
    "{context}"
)
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)


In [33]:
question_answer_chain = create_stuff_documents_chain(chatModel, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [34]:
response = rag_chain.invoke({"input": "what is Stress?"})
print(response["answer"])

That's a really important question, and it's something many of us experience.

Generally, stress is our body's natural response to any kind of demand, challenge, or perceived threat. When we encounter something that requires us to adapt or react, our body releases hormones that prepare us to cope with the situation. This can be anything from a tight deadline at work to a difficult conversation, or even just a busy day.

It can show up in many ways, both physically and emotionally:

*   **Physically:** You might notice a racing heart, tense muscles, headaches, fatigue, difficulty sleeping, or stomach issues.
*   **Emotionally:** You might feel overwhelmed, anxious, irritable, sad, frustrated, or have difficulty concentrating.
*   **Mentally:** Your thoughts might race, or you might find it hard to focus or make decisions.

It's a very common human experience, and a little bit of stress can even be motivating. However, when stress becomes chronic or overwhelming, it can start to take a t