In [1]:
import dotenv
import os

In [3]:
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
openai_api_key = os.environ["OPENAI_API_KEY"]

In [4]:
%pwd

'd:\\Code\\medical\\Medical-Chatbot\\research'

In [5]:
import os
os.chdir("../")

In [6]:
%pwd

'd:\\Code\\medical\\Medical-Chatbot'

In [7]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

#### **Data Loading (pdf)**

In [8]:
# Extract the data from the PDF file 
def load_pdf_file(data):
    loader = DirectoryLoader(data, 
                           glob="*.pdf",
                           loader_cls=PyPDFLoader)
    documents = loader.load() 
    return documents

In [9]:
extracted_data = load_pdf_file(data = 'data/')

#### **Chunking text**

In [10]:
# Split the data into the text 
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

In [11]:
text_chunks = text_split(extracted_data)
print(f"Length of Text chunks: {len(text_chunks)}")

Length of Text chunks: 39994


#### **Embeddings**

In [12]:
from langchain_openai import OpenAIEmbeddings

embedding = OpenAIEmbeddings(model = "text-embedding-3-small")

In [13]:
print(f"Length : {len(embedding.embed_query("Hello"))}")

Length : 1536


#### **Vector Database**

In [14]:
_ = load_dotenv(find_dotenv())
pinecone_api_key = os.environ.get("PINECONE_API_KEY")

In [15]:
from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(api_key=pinecone_api_key)

  from tqdm.autonotebook import tqdm


In [None]:
index_name = "medicalbot"

pc.create_index(
    name=index_name,
    dimension=1536, # model dimensions
    metric="cosine", # model metric
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    ) 
)

In [None]:
# Embed each chunk and upsert the embeddings into pinecone index 
from langchain_pinecone import PineconeVectorStore 

docsearch = PineconeVectorStore.from_documents(
    documents=text_chunks, 
    index_name=index_name,
    embedding=embedding
)

In [18]:
# Load Existing Index 
index_name = "medicalbot"

from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embedding
)

In [19]:
docsearch

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x2097f9c2e70>

In [25]:
retriever = docsearch.as_retriever(search_type = "similarity", search_kwargs = {"k":3})

In [26]:
retrieved_docs = retriever.invoke("What is Acne?")

In [23]:
retrieved_docs

[Document(id='32a751d0-4aa6-4829-bd72-a3ad98f6f59b', metadata={'page': 55.0, 'source': 'data\\medical_book.pdf'}, page_content='Researchers, Inc. Reproduced by permission.)\n26 GALE ENCYCLOPEDIA OF MEDICINE\nAcne'),
 Document(id='d902996d-d642-4bff-8f40-e458c6e464f2', metadata={'page': 55.0, 'source': 'data\\medical_book.pdf'}, page_content='Researchers, Inc. Reproduced by permission.)\n26 GALE ENCYCLOPEDIA OF MEDICINE\nAcne'),
 Document(id='ad86ad1f-8314-4395-a0ff-e1b886eeeeed', metadata={'page': 269.0, 'source': 'data\\medical_book.pdf'}, page_content='forms ofacne.\nPurpose\nDifferent types of antiacne drugs are used for\ndifferent purposes. For example, lotions, soaps, gels,\nand creams containing benzoyl peroxide or tretinoin\nmay be used to clear up mild to moderately severe\nacne. Isotretinoin (Accutane) is prescribed only for\nvery severe, disfiguring acne.\nAcne is a skin condition that occurs when pores or\nhair follicles become blocked. This blockage allows a\nwaxy material 

In [27]:
from langchain_openai import OpenAI 
llm = OpenAI()

In [28]:
from langchain.chains import create_retrieval_chain 
from langchain.chains.combine_documents import create_stuff_documents_chain 
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering task." 
    "Use the following pieces of retrieved context to answer."
    "the question. If you don't know the answer, say that you" 
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt), 
        ("human", "{input}"),
    ]
)

In [34]:
from langchain.chains import RetrievalQA 
qa_chain = RetrievalQA.from_chain_type(llm = llm, chain_type="stuff", retriever=retriever)

In [35]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)

In [36]:
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [39]:
response = rag_chain.invoke({'input':'What is Fever?'})
print(response['answer'])



Fever is a natural response of the body to foreign substances, and is regulated by the thermoregulatory center in the hypothalamus. It is typically lowest at 6 A.M. and highest around 4-6 P.M. Fever can cause chills by redirecting blood to the body's core, leaving the extremities cold. Fever is triggered by cytokines, such as interferon and tumor necrosis factor, which cause the thermoregulatory center to reset the body's normal temperature level.


In [42]:
print(rag_chain.invoke({'input':'What is Acne?'})['answer'])



Acne is a skin condition that occurs when pores or hair follicles become blocked, causing a buildup of sebum inside the pores. It can range from mild to severe, and different types of antiacne drugs are used for different purposes. For example, benzoyl peroxide or tretinoin may be used for milder cases, while isotretinoin is reserved for severe, disfiguring acne.
