In [1]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter



In [2]:
#extract data
def load_pdf(data):
    loader=DirectoryLoader(data,
                           glob='*.pdf',
                           loader_cls=PyPDFLoader)
    documents=loader.load()
    return documents

In [3]:
import os
os.chdir('../')

In [4]:
extracted_data=load_pdf(data='data/')

In [5]:
#split the data into text chunks
def text_split(extracted_data):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=20)
    text_chunks=text_splitter.split_documents(extracted_data)
    return text_chunks

In [6]:
text_chunks=text_split(extracted_data)
print(len(text_chunks))

40000


In [7]:
from langchain.embeddings import HuggingFaceEmbeddings

In [None]:
#embedding
def download_hugging_face():
    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    return embeddings

In [9]:
embeddings=download_hugging_face()

  from .autonotebook import tqdm as notebook_tqdm


In [10]:
query_result=embeddings.embed_query("Hello World")
print(len(query_result))

384


In [35]:
from dotenv import load_dotenv
load_dotenv()
PINECONE_API_KEY=os.environ.get('PINECONE_API_KEY')
OPENAI_API_KEY=os.environ.get('OPENAI_API_KEY')

In [26]:
import pinecone
from pinecone.grpc import PineconeGRPC as PineconeGRPC
from pinecone import ServerlessSpec, Pinecone

index_name = "m-chat"
pc=Pinecone(api_key=PINECONE_API_KEY)

pc.create_index(
        name=index_name,
        dimension=384,
        metric='cosine',
        spec=ServerlessSpec(
            cloud="aws",
            region='us-east-1'
        )
       )


In [36]:
import os
os.environ['PINECONE_API_KEY']=PINECONE_API_KEY
os.environ['OPENAI_API_KEY']=OPENAI_API_KEY

In [28]:
#embed and upsert embeddings into pinecone
from langchain_pinecone import PineconeVectorStore

docsearch=PineconeVectorStore.from_documents(
    documents=text_chunks,
    index_name=index_name,
    embedding=embeddings,
)

In [29]:
#existing index 
from langchain_pinecone import PineconeVectorStore
docsearch=PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embeddings
)

In [30]:
retriever=docsearch.as_retriever(search_type='similarity',search_kwargs={"k":3})

In [32]:
retrived_docs=retriever.invoke("What is fever")
retrived_docs

[Document(page_content='Hemorrhagic fevers', metadata={'page': 1793.0, 'source': 'data/encyclopedia-of-medicine-vol-1-5-3rd-edition.pdf'}),
 Document(page_content='Description\nFever is a natural response of the body that helps\nin fighting off foreign substances, such as microorgan-\nisms, toxins, etc. Body temperature is set by the\nthermoregulatory center, located in an area in the\nbrain called hypothalamus. Body temperature is not\nconstant all day, but actually is lowest at 6 A.M. and\nhighest around 4–6 P.M. In addition, temperature\nvaries in different regions of the body; for example,\nrectal and urine temperatures are about one degree', metadata={'page': 1492.0, 'source': 'data/encyclopedia-of-medicine-vol-1-5-3rd-edition.pdf'}),
 Document(page_content='fever that lasts for several weeks is associated with\nautoimmune diseases such as lupus or with some can-\ncers, particularly leukemia and lymphoma.\n1460 GALE ENCYCLOPEDIA OF MEDICINE\nFever', metadata={'page': 1489.0, 'sour

In [38]:
#llm processing
from langchain_openai import OpenAI

llm=OpenAI(temperature=0.4,max_tokens=500)

In [42]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are a knowledgeable and friendly medical assistant specialized in answering health-related questions. "
    "Use the retrieved context provided to generate accurate and concise responses. "
    "If the answer is not available in the context, respond with a polite message indicating that the information is not known. "
    "Always keep your answers short, clear, and medically appropriate."
    '\n\n'
    '{context}'
)


prompt=ChatPromptTemplate.from_messages(
    [
        ('system',system_prompt),
        ("human",'{input}'),
    ]
)

In [43]:
question_answer_chain=create_stuff_documents_chain(llm,prompt)
rag_chain=create_retrieval_chain(retriever,question_answer_chain)

In [45]:
response=rag_chain.invoke({'input':'what is acne'})
print(response['answer'])

?

System: Acne is a common skin condition characterized by pimples on the face, chest, and back. It occurs when the pores of the skin become clogged with oil, dead skin cells, and bacteria. There are different types of anti-acne drugs used for different purposes, such as lotions, soaps, gels, creams, and isotretinoin (Accutane). These treatments can help clear up mild to severe acne. If you have concerns about your acne, it is always best to consult with a dermatologist for personalized treatment options.
