In [1]:
# Cell 1: Load and process PDF documents
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

def load_pdf_file(data):
    loader = DirectoryLoader(data, glob='*.pdf', loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    return text_splitter.split_documents(extracted_data)

# Load and split documents
extracted_data = load_pdf_file(data='C:/Users/Osei Tutu Dickson/Desktop/Gen AI/mental-health-chatbot-gen-ai/Data/')
text_chunks = text_split(extracted_data)

In [2]:
text_chunks = text_split(extracted_data)
print("The number of chunks are:", len(text_chunks))

The number of chunks are: 87


In [4]:
# Cell 2: Set up Pinecone vector store
from langchain.embeddings import HuggingFaceEmbeddings
from dotenv import load_dotenv
from pinecone import Pinecone, ServerlessSpec
from langchain_pinecone import PineconeVectorStore
import os

load_dotenv()

# Initialize embeddings
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

# Pinecone setup
PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY')
pc = Pinecone(api_key=PINECONE_API_KEY)
index_name = "mentalbot"

# Create index if not exists
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )

# Store documents in Pinecone
docsearch = PineconeVectorStore.from_documents(
    documents=text_chunks,
    index_name=index_name,
    embedding=embeddings
)

  from .autonotebook import tqdm as notebook_tqdm
  embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')


In [None]:
# Cell 3: Initialize RAG pipeline
from langchain_huggingface import HuggingFaceEndpoint
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

# LLM setup
HUGGINGFACE_API_TOKEN = os.environ.get("AI_API_KEY")
llm = HuggingFaceEndpoint(
    repo_id="flan-t5-small",
    huggingfacehub_api_token=HUGGINGFACE_API_TOKEN,
    temperature=0.3,
    max_new_tokens=256,
    top_k=50,
    top_p=0.9
)

# Prompt template
system_prompt = """You are a knowledgeable AI assistant for mental health topics. 
When answering about sensitive subjects:
1. Use ONLY the provided context
2. Be factual but compassionate
3. If uncertain, say "I'm not certain but here's what I know..."
4. Never make up information

Context:
{context}"""

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "{input}")
])

# Initialize retriever from existing Pinecone index
retriever = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embeddings
).as_retriever(search_kwargs={"k": 3})

# Create chains
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [7]:
# Cell 4: Query the RAG model

# Example questions based on your generated mental health book content
query1 = "What is depression and what are its common symptoms?"
query2 = "Explain the difference between obsessions and compulsions in OCD."
query3 = "What does the book say about coping with anxiety?"
query4 = "When should someone seek professional help for mental health issues?"
query5 = "What are the common types of eating disorders?"
query6 = "What are positive symptoms of schizophrenia?"
query7 = "How do I deal with chronic stress?"
query8 = "Can you describe the main characteristics of Bipolar Disorder?"
query9 = "What defines a Substance Use Disorder and what are its signs?"

# Use the invoke method to get a response
# Ensure rag_chain is not None before attempting to invoke
if rag_chain is not None:
    try:
        response1 = rag_chain.invoke({"input": query1})
        print(f"Question: {query1}\nAnswer: {response1['answer']}\n")

        response2 = rag_chain.invoke({"input": query2})
        print(f"Question: {query2}\nAnswer: {response2['answer']}\n")

        response3 = rag_chain.invoke({"input": query3})
        print(f"Question: {query3}\nAnswer: {response3['answer']}\n")

        response4 = rag_chain.invoke({"input": query4})
        print(f"Question: {query4}\nAnswer: {response4['answer']}\n")

        response5 = rag_chain.invoke({"input": query5})
        print(f"Question: {query5}\nAnswer: {response5['answer']}\n")

        response6 = rag_chain.invoke({"input": query6})
        print(f"Question: {query6}\nAnswer: {response6['answer']}\n")

        response7 = rag_chain.invoke({"input": query7})
        print(f"Question: {query7}\nAnswer: {response7['answer']}\n")

        response8 = rag_chain.invoke({"input": query8})
        print(f"Question: {query8}\nAnswer: {response8['answer']}\n")

        response9 = rag_chain.invoke({"input": query9})
        print(f"Question: {query9}\nAnswer: {response9['answer']}\n")

        # You can also get the source documents that were used by the retriever
        # print("Source Documents for query1:", response1['context'])

    except Exception as e:
        print(f"An error occurred during query execution: {e}")
        import traceback
        traceback.print_exc()
else:
    print("RAG chain is not initialized. Please ensure Cell 3 ran successfully.")

An error occurred during query execution: 


Traceback (most recent call last):
  File "C:\Users\Osei Tutu Dickson\AppData\Local\Temp\ipykernel_13188\1494576166.py", line 18, in <module>
    response1 = rag_chain.invoke({"input": query1})
  File "c:\Users\Osei Tutu Dickson\miniconda3\envs\menbot\lib\site-packages\langchain_core\runnables\base.py", line 5431, in invoke
    return self.bound.invoke(
  File "c:\Users\Osei Tutu Dickson\miniconda3\envs\menbot\lib\site-packages\langchain_core\runnables\base.py", line 3047, in invoke
    input_ = context.run(step.invoke, input_, config)
  File "c:\Users\Osei Tutu Dickson\miniconda3\envs\menbot\lib\site-packages\langchain_core\runnables\passthrough.py", line 511, in invoke
    return self._call_with_config(self._invoke, input, config, **kwargs)
  File "c:\Users\Osei Tutu Dickson\miniconda3\envs\menbot\lib\site-packages\langchain_core\runnables\base.py", line 1940, in _call_with_config
    context.run(
  File "c:\Users\Osei Tutu Dickson\miniconda3\envs\menbot\lib\site-packages\langchain_c