In [1]:
%pwd

'e:\\Gen Ai\\Projects\\Medical-Chatbot\\research'

In [2]:
import os
os.chdir("../")

In [3]:
%pwd

'e:\\Gen Ai\\Projects\\Medical-Chatbot'

In [4]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [5]:
#Extract Data From the PDF File
def load_pdf_file(path):
    loader=DirectoryLoader(path,
                           glob="*.pdf",
                           loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

In [6]:
extracted_data=load_pdf_file(path='Data/')

In [7]:
len(extracted_data)

637

In [8]:
type(extracted_data[0])

langchain_core.documents.base.Document

In [9]:
#Split the Data into Text Chunks
def text_split(extracted_data):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=20)
    text_chunks=text_splitter.split_documents(extracted_data)
    return text_chunks

In [10]:
text_chunks=text_split(extracted_data)

In [11]:
len(text_chunks)

5860

In [12]:
type(text_chunks[0])

langchain_core.documents.base.Document

In [13]:
from langchain.embeddings import HuggingFaceEmbeddings

In [14]:
#Download the Embeddings from Hugging Face
def download_hugging_face_embeddings():
    embeddings=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [15]:
embeddings=download_hugging_face_embeddings()

  embeddings=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


In [16]:
query_result = embeddings.embed_query("Hello world")

In [17]:
len(query_result)

384

In [18]:
from dotenv import load_dotenv
load_dotenv()

True

In [19]:
PINECONE_API_KEY=os.getenv('PINECONE_API_KEY')
GOOGLE_API_KEY=os.getenv('GOOGLE_API_KEY')

In [20]:
# Import the Pinecone library
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec

# Initialize a Pinecone client with your API key
pc = Pinecone(api_key=PINECONE_API_KEY)

# Create a serverless index
index_name = "medical-chatbot"

if not pc.has_index(index_name):
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(
            cloud='aws', 
            region='us-east-1'
        ) 
    )

In [43]:
os.environ["PINECONE_API_KEY"]=os.getenv('PINECONE_API_KEY')

In [21]:
from langchain.vectorstores import Pinecone

vectordb = Pinecone.from_documents(
    documents=text_chunks,
    index_name=index_name,
    embedding=embeddings
)

In [22]:
vectordb

<langchain_community.vectorstores.pinecone.Pinecone at 0x200990c6b90>

In [23]:
retriever = vectordb.as_retriever(search_type='similarity',search_kwargs={'k':3})

In [24]:
retrieved_docs=retriever.invoke('What is Acne?')

In [25]:
retrieved_docs

[Document(metadata={'page': 39.0, 'source': 'Data\\Medical_book.pdf'}, page_content='GALE ENCYCLOPEDIA OF MEDICINE 226\nAcne\nGEM - 0001 to 0432 - A  10/22/03 1:41 PM  Page 26'),
 Document(metadata={'page': 38.0, 'source': 'Data\\Medical_book.pdf'}, page_content='GALE ENCYCLOPEDIA OF MEDICINE 2 25\nAcne\nAcne vulgaris affecting a woman’s face. Acne is the general\nname given to a skin disorder in which the sebaceous\nglands become inflamed.(Photograph by Biophoto Associ-\nates, Photo Researchers, Inc. Reproduced by permission.)\nGEM - 0001 to 0432 - A  10/22/03 1:41 PM  Page 25'),
 Document(metadata={'page': 37.0, 'source': 'Data\\Medical_book.pdf'}, page_content='Acidosis see Respiratory acidosis; Renal\ntubular acidosis; Metabolic acidosis\nAcne\nDefinition\nAcne is a common skin disease characterized by\npimples on the face, chest, and back. It occurs when the\npores of the skin become clogged with oil, dead skin\ncells, and bacteria.\nDescription\nAcne vulgaris, the medical term fo

In [26]:
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model='gemini-1.5-pro',temperature=0.4,max_tokens=500)

In [27]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    """You are an assistant for question-answering task.
    Use the following pieces of retrieved context to answer
    the question.If you don't know the answer,say that you
    don't know.Use three sentences maximum and keep the
    answer concise."""
    "\n\n"
    "{context}"
)

prompt=ChatPromptTemplate.from_messages(
    [
        ("system",system_prompt),
        ("human","{input}")
    ]
)

In [28]:
question_answer_chain = create_stuff_documents_chain(llm,prompt)
rag_chain = create_retrieval_chain(retriever,question_answer_chain)

In [29]:
response = rag_chain.invoke({"input": "What is Acromegaly and gigantism"})
print(response['answer'])

Acromegaly and gigantism are disorders caused by an abnormal release of a chemical from the pituitary gland, leading to increased bone and soft tissue growth.  Gigantism occurs when this abnormality happens before bone growth stops, resulting in unusual height. Acromegaly occurs when the abnormality happens after bone growth stops.



In [30]:
response = rag_chain.invoke({"input": "What is stats?"})
print(response["answer"])

This question cannot be answered from the given context.  The provided text discusses blood counts and athletic heart syndrome, but does not define or mention "stats".



In [34]:
response = rag_chain.invoke({"input": "What is acne?"})
print(response["answer"])

Acne, also known as acne vulgaris, is a common skin disease.  It's characterized by pimples on the face, chest, and back due to clogged pores. These clogged pores result from a buildup of oil, dead skin cells, and bacteria.



In [33]:
response = rag_chain.invoke({"input": "What is Abdominal wall defects?"})
print(response["answer"])

Abdominal wall defects are birth defects that cause the stomach or intestines to protrude.  The cause is currently unknown, and symptoms in the mother are typically nondescript.  Diagnosis is made visually at birth or via ultrasound before birth.

