In [1]:
%pwd

'c:\\Users\\ranji\\My Projects\\Projects\\EasePlease-GenAI\\research'

In [2]:
import os
os.chdir("../")

In [3]:
%pwd

'c:\\Users\\ranji\\My Projects\\Projects\\EasePlease-GenAI'

In [4]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [5]:
#Extract Data From the PDF File
def load_pdf_file(data):
    loader= DirectoryLoader(data, glob="*.pdf", loader_cls=PyPDFLoader)
    documents=loader.load()
    return documents

In [6]:
extracted_data=load_pdf_file(data='Data/')

In [7]:
#extracted_data

In [8]:
#Split the Data into Text Chunks
def text_split(extracted_data):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks=text_splitter.split_documents(extracted_data)
    return text_chunks

In [9]:
text_chunks=text_split(extracted_data)
print("Length of Text Chunks", len(text_chunks))

Length of Text Chunks 792


In [10]:
#text_chunks

In [11]:
from langchain.embeddings import HuggingFaceEmbeddings

#Download the Embeddings from Hugging Face
def download_hugging_face_embeddings():
    embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    return embeddings

In [12]:
embeddings = download_hugging_face_embeddings()

  embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
  from .autonotebook import tqdm as notebook_tqdm
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


In [13]:
query_result = embeddings.embed_query("Hello world")
print("Length", len(query_result))

Length 384


In [14]:
#query_result

In [27]:
from dotenv import load_dotenv
load_dotenv()

True

In [29]:
PINECONE_API_KEY=os.environ.get('PINECONE_API_KEY')
OPENAI_API_KEY=os.environ.get('OPENAI_API_KEY')

In [17]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec
import os

pc = Pinecone(api_key=PINECONE_API_KEY)

index_name = "easeplease"


pc.create_index(name=index_name, dimension=384, metric="cosine", 
                spec=ServerlessSpec(cloud="aws", region="us-east-1")) 

{
    "name": "easeplease",
    "metric": "cosine",
    "host": "easeplease-kdthsrr.svc.aped-4627-b74a.pinecone.io",
    "spec": {
        "serverless": {
            "cloud": "aws",
            "region": "us-east-1"
        }
    },
    "status": {
        "ready": true,
        "state": "Ready"
    },
    "vector_type": "dense",
    "dimension": 384,
    "deletion_protection": "disabled",
    "tags": null
}

In [31]:
import os
os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

In [19]:
# Embed each chunk and upsert the embeddings into your Pinecone index.
from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_documents(
    documents=text_chunks,
    index_name=index_name,
    embedding=embeddings, 
)

In [20]:
# Load Existing index 

from langchain_pinecone import PineconeVectorStore
# Embed each chunk and upsert the embeddings into your Pinecone index.
docsearch = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embeddings
)

In [21]:
docsearch

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x1a0e6673dc0>

In [22]:
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":3})

In [23]:
retrieved_docs = retriever.invoke("What is Sleep Disturbance?")

In [24]:
retrieved_docs

[Document(id='a4de39d3-42d1-4e10-9673-e76a5b4f33fd', metadata={'author': 'pam smith', 'creationdate': '2014-01-14T15:39:55-05:00', 'creator': 'Microsoft® Office Publisher 2007', 'gts_pdfxconformance': 'PDF/X-1a:2001', 'gts_pdfxversion': 'PDF/X-1:2001', 'moddate': '2014-01-14T16:06:51-05:00', 'page': 85.0, 'page_label': '86', 'producer': 'Acrobat Distiller 10.0.0 (Windows)', 'source': 'Data\\MHGuidebook-EBookDownload.pdf', 'title': 'merged.pdf', 'total_pages': 261.0, 'trapped': '/False'}, page_content='III: Conditions & Issues: Sleep Disturbance \n85 \n \nSLEEP DISTURBANCE \n \nSigns/characteristics \nA disturbance in sleep can occur as a part of or separately from a psy-\nchiatric condition.  Studies from western cultures have indicated that \ninsomnia (decreased ability to sleep) is common and can have many \ncauses (e.g. as a primary condition or due to a secondary cause such \nas medical illness, psychiatric conditions, medications, or drugs and'),
 Document(id='31c6e79b-8766-4014-b

In [32]:
from langchain_openai import OpenAI
llm = OpenAI(temperature=0.4, max_tokens=500)

In [33]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate


system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)


prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [34]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [35]:
response = rag_chain.invoke({"input": "What is Sleep Disturbance?"})
print(response["answer"])



Sleep disturbance is a common response to stress and can occur as a part of or separately from a psychiatric condition. It is characterized by difficulty falling or staying asleep and can have various causes, including medical illness, medications, and drugs and alcohol. Counseling interventions for sleep disturbance include setting a regular bedtime, avoiding napping during the day, and practicing relaxation exercises in the evening. Medication therapy may also be used as a treatment option.


In [36]:
response = rag_chain.invoke({"input": "What is stats?"})
print(response["answer"])


I'm sorry, I don't know the answer to that question.


In [38]:
response = rag_chain.invoke({"input": "What are the general points may be useful in helping people through the grieving process?"})
print(response["answer"])



The following general points may be useful in helping people through the grieving process: ensuring that normal culturally appropriate mourning processes have been able to take place, providing reassurance that the grieving process is normal despite the painful feelings it causes, and encouraging finding simple ways to enjoy positive memories of the past.
