In [1]:
%pwd

'd:\\Viit\\Medical_Chatbot\\research'

In [2]:
import os
os.chdir("../")

In [3]:
%pwd

'd:\\Viit\\Medical_Chatbot'

In [4]:
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [5]:
# Extract data from the pdf file

def load_pdf_file(data):
    loader = DirectoryLoader(
        data,
        glob="*.pdf",
        loader_cls= PyPDFLoader
    )
    documents = loader.load()
    return documents

In [6]:
extracted_data = load_pdf_file(data="Data/")

In [7]:
#extracted_data

In [8]:
len(extracted_data)

637

In [9]:
# Split the Data into Text Chunks

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=20
)

text_chunks = text_splitter.split_documents(extracted_data)

In [10]:
#text_chunks

In [11]:
len(text_chunks)

5860

In [12]:
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
GOOGLE_API_KEY = os.environ["GOOGLE_API_KEY"]

In [13]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

  from .autonotebook import tqdm as notebook_tqdm


In [14]:
query = embeddings.embed_query("Hello World")
print("Length", len(query))

Length 768


In [15]:
#query

### Create Pinecone vector store database

In [16]:
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
PINECONE_API_KEY = os.environ["PINECONE_API_KEY"]

In [17]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec
import os

pc = Pinecone(api_key=PINECONE_API_KEY)

index_name = "medical"

pc.create_index(
    name=index_name,
    dimension=768,
    metric="cosine",
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    )
)

In [18]:
# Embed each chunks and insert the embeddings into your Pinecone index.
from langchain.vectorstores import Pinecone

docsearch = Pinecone.from_documents(
    documents=text_chunks,
    index_name=index_name,
    embedding=embeddings,
)

In [19]:
# Load existing index

from langchain.vectorstores import Pinecone
docsearch = Pinecone.from_existing_index(
    index_name=index_name,
    embedding=embeddings
)

In [20]:
docsearch

<langchain_community.vectorstores.pinecone.Pinecone at 0x1e557b3bcd0>

In [21]:
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":3})

In [22]:
retrieved_docs = retriever.invoke("What is Acne?")

In [23]:
retrieved_docs

[Document(metadata={'page': 38.0, 'source': 'Data\\Medical_book.pdf'}, page_content='GALE ENCYCLOPEDIA OF MEDICINE 2 25\nAcne\nAcne vulgaris affecting a woman’s face. Acne is the general\nname given to a skin disorder in which the sebaceous\nglands become inflamed.(Photograph by Biophoto Associ-\nates, Photo Researchers, Inc. Reproduced by permission.)\nGEM - 0001 to 0432 - A  10/22/03 1:41 PM  Page 25'),
 Document(metadata={'page': 239.0, 'source': 'Data\\Medical_book.pdf'}, page_content='Isotretinoin (Accutane) is prescribed only for very\nsevere, disfiguring acne.\nAcne is a skin condition that occurs when pores or\nhair follicles become blocked. This allows a waxy\nmaterial, sebum, to collect inside the pores or follicles.\nNormally, sebum flows out onto the skin and hair to\nform a protective coating, but when it cannot get out,\nsmall swellings develop on the skin surface. Bacteria\nand dead skin cells can also collect that can cause\ninflammation. Swellings that are small and no

In [25]:
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", temperature=0.3, max_tokens=500)

In [26]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrived context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [27]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [28]:
response = rag_chain.invoke({"input": "What is Acne?"})
print(response["answer"])

Acne is a skin condition characterized by blocked pores or hair follicles.  This blockage traps sebum, leading to swellings on the skin surface.  Inflammation can occur due to trapped bacteria and dead skin cells.


In [29]:
# if you give any question other than pdf content, it will give answer as I don't know
response = rag_chain.invoke({"input": "What is Statistic?"})
print(response["answer"])

This document discusses medical tests like AST and NST, but doesn't define "statistic."  It describes how AST levels relate to cell damage and how NST/biophysical profiles assess fetal health.  Therefore, I cannot answer your question using the provided context.


In [31]:
response = rag_chain.invoke({"input": "What precautions takes place for Amnesia?"})
print(response["answer"])

Preventive measures for amnesia focus on minimizing brain injury risks.  This includes wearing helmets during activities like bicycling or sports, using seatbelts, and avoiding excessive alcohol and drug use.  Prompt treatment of brain infections and seeking immediate medical attention for strokes and aneurysms are also crucial.
