In [1]:
!pip -q install sentence_transformers

In [2]:
%pwd

'd:\\HOPE\\Gen AI\\End-to-End-Medical-Chatbot-using-GenAI\\research'

In [3]:
import os
os.chdir("../")

In [4]:
%pwd

'd:\\HOPE\\Gen AI\\End-to-End-Medical-Chatbot-using-GenAI'

In [5]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [6]:
# Extract the data from pdf file
def load_pdf_file(data):
    loader = DirectoryLoader(data,
                            glob="*.pdf",
                            loader_cls=PyPDFLoader)

    documents = loader.load()
    
    return documents


extracted_data = load_pdf_file(data = 'Data/')

In [7]:
# extracted_data

In [7]:
# Split the data into text chuncks
def text_split(extracted_data):
        
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(extracted_data)
    
    return text_chunks

text_chunks = text_split(extracted_data)
print("Length of text chunks", len(text_chunks))

Length of text chunks 5860


In [8]:
# text_chunks

In [9]:
# pip install sentence-transformers

In [10]:
# Embeddings 

from langchain.embeddings import HuggingFaceEmbeddings

def download_hugging_face_embeddings():
    
    embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    return embeddings


embeddings = download_hugging_face_embeddings()


  embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
  from .autonotebook import tqdm as notebook_tqdm


In [11]:
# for checking whether the embedding model is working or not

query_result = embeddings.embed_query("Hello")
print("Length: ",len(query_result))

Length:  384


In [12]:
from dotenv import load_dotenv
load_dotenv()

True

In [13]:
PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY')
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')

In [15]:
from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(api_key=PINECONE_API_KEY)

index_name = "medibot"

pc.create_index(
    name=index_name,
    dimension=384, # Replace with your model dimensions
    metric="cosine", # Replace with your model metric
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    ) 
)

In [16]:
from langchain.vectorstores import Pinecone

docsearch = Pinecone.from_documents(
    documents = text_chunks,
    index_name = index_name,
    embedding = embeddings
)

In [22]:
# docsearch

In [17]:
retriever = docsearch.as_retriever(search_type ="similarity", search_kwargs = {"k":3})

In [18]:
retriever_docs = retriever.invoke("What is Acne?")

In [19]:
retriever_docs

[Document(metadata={'page': 39.0, 'source': 'Data\\Medical_book.pdf'}, page_content='GALE ENCYCLOPEDIA OF MEDICINE 226\nAcne\nGEM - 0001 to 0432 - A  10/22/03 1:41 PM  Page 26'),
 Document(metadata={'page': 38.0, 'source': 'Data\\Medical_book.pdf'}, page_content='GALE ENCYCLOPEDIA OF MEDICINE 2 25\nAcne\nAcne vulgaris affecting a woman’s face. Acne is the general\nname given to a skin disorder in which the sebaceous\nglands become inflamed.(Photograph by Biophoto Associ-\nates, Photo Researchers, Inc. Reproduced by permission.)\nGEM - 0001 to 0432 - A  10/22/03 1:41 PM  Page 25'),
 Document(metadata={'page': 37.0, 'source': 'Data\\Medical_book.pdf'}, page_content='Acidosis see Respiratory acidosis; Renal\ntubular acidosis; Metabolic acidosis\nAcne\nDefinition\nAcne is a common skin disease characterized by\npimples on the face, chest, and back. It occurs when the\npores of the skin become clogged with oil, dead skin\ncells, and bacteria.\nDescription\nAcne vulgaris, the medical term fo

In [20]:
from langchain_openai import OpenAI
llm = OpenAI(temperature=0, max_tokens= 800)


In [21]:
llm.invoke("What is Acne?")

'\n\nAcne is a common skin condition that occurs when hair follicles become clogged with oil and dead skin cells. This can result in the formation of pimples, blackheads, and whiteheads on the face, neck, chest, and back. Acne can range from mild to severe and can cause physical and emotional discomfort. It is most commonly associated with puberty, but can also affect people of all ages.'

In [31]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.prompts import ChatPromptTemplate


system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise. Give the answer in bulletin points to make user understandable"
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system",system_prompt),
        ("human","{input}")
    ]
)


In [32]:
question_answer_chain = create_stuff_documents_chain(llm,prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [33]:
response = rag_chain.invoke({"input": "What is alzheimers?"})
print(response["answer"])


- Alzheimer's disease is a form of dementia that affects mental ability and daily activities
- It is characterized by a loss of mental ability lasting at least six months
- It is the most common form of dementia and is caused by neurologic changes in the brain


In [34]:
response = rag_chain.invoke({"input": "What is heart attack?"})
print(response["answer"])



- A heart attack, also known as a myocardial infarction, is a blockage in the blood supply to the heart.
- This blockage can result in damage to the heart muscle, known as the myocardium.
- Heart attacks can be caused by a variety of factors, including autoantibodies that attack the body's own cells or tissues.


In [35]:
response = rag_chain.invoke({"input": "What is heart attack and the treatment?"})
print(response["answer"])



- Heart attacks damage and weaken the heart muscle, and the damage continues even after a person recovers from the attack.
- ACE inhibitors are a type of medication that can help slow down further damage to the heart and may also be used to treat congestive heart failure.
- Some commonly used ACE inhibitors are available only with a physician's prescription and come in tablet, capsule, and injectable forms.


In [43]:
while True:
    user_query = input("Enter the query (or type 'q' or 'e' to exit): ")
    print(user_query)

    if user_query.lower() in ["q", "e"]:
        print("Exiting... Goodbye!")
        break

    try:
        # Invoke the RAG chain with the user query
        response = rag_chain.invoke({"input": user_query})
        
        # Extract and print the answer
        if "answer" in response:
            print(f"Answer: {response['answer']}")
        else:
            print("No 'answer' found in the response. Here's the full response:")
            print(response)

    except KeyError as e:
        print(f"KeyError: {e}")
    except AssertionError as e:
        print(f"AssertionError: {e}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")


heart attack
Answer:  can be caused by an antibody that attacks the body's own cells or tissues, resulting in a block in the blood supply to the heart. This can lead to angina, which is temporary pain in the heart muscle. ACE inhibitors can help slow down further damage to the heart and are commonly used to treat congestive heart failure.
headache
Answer: 

- Migraine headaches cause throbbing pain on one side of the head
- Other symptoms may include nausea, vomiting, dizziness, increased sensitivity to light and sound
- Attacks can last for several hours or days and may occur several times a week
stock
Answer:  photo
- The photo is from a corporation called GEM.
- The photo was taken on October 22, 2003 at 6:09 PM.
- The photo is of a medical condition and was reproduced with permission.
alzhiemers
Answer: 

- Alcoholism is a serious problem that often occurs during the teenage years.
- Teenagers who are at high risk for alcoholism should receive education about alcohol and its long-t