In [1]:
from langchain_chroma import Chroma
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.storage import InMemoryStore
from langchain.retrievers import ParentDocumentRetriever
from api_key import GOOGLE_API_KEY
import re


In [2]:
pdf = "docs\medical_book.pdf"
loader = PyPDFLoader(pdf)
loaded_data = loader.load()
print(len(loaded_data))

759


In [3]:
parent_splitter = RecursiveCharacterTextSplitter(chunk_size=1000 * 4, chunk_overlap=200)
child_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)

In [4]:
# The vectorstore to use to index the child chunks
vectorstore = Chroma(
    collection_name="parent_document_splits1",
    embedding_function=FastEmbedEmbeddings(),
    persist_directory="db_PDR"
)
docstore = InMemoryStore()

  from .autonotebook import tqdm as notebook_tqdm
Fetching 5 files: 100%|██████████| 5/5 [00:00<?, ?it/s]


In [5]:
retriever = ParentDocumentRetriever(
    vectorstore=vectorstore, 
    docstore=docstore,
    child_splitter=child_splitter,
    parent_splitter=parent_splitter,
    search_type="mmr",
    search_kwargs={"k": 4}
)

In [6]:
retriever.add_documents(loaded_data)

In [7]:
num_parent_docs = len(retriever.docstore.store.items())
num_child_docs = len(set(retriever.vectorstore.get()['documents']))

print (f"You have {num_parent_docs} parent docs and {num_child_docs} child docs")

You have 1235 parent docs and 4086 child docs


In [8]:
question = "How to treat corneal ulcers?"

In [9]:
child_docs = retriever.vectorstore.similarity_search(question)
print (f"{len(child_docs)} child docs were found") 
child_docs[0]

4 child docs were found


Document(page_content='Germ —A disease-causing microorganism.\nInflammation —The body’s reaction to irritation.\nTopical corticosteroids —Cortisone and related\ndrugs used on the skin and in the eye, usually for\nallergic conditions.Treatment\nA corneal ulcer needs to be treated aggressively, as it\ncan result in loss of vision. The first step is to eliminate\ninfection. Broad spectrum antibiotics will be used\nbefore the lab results come back. Medications may then\nbe changed to more specifically target the cause of the\ninfection. A combination of medications may be neces-\nsary. Patients should return for their follow-up visits so\nthat the doctor can monitor the healing process. The\ncornea can heal from many insults, but if it remains\nscarred, corneal transplantation may be necessary to\nrestore vision. If the corneal ulcer is large, hospitaliza-\ntion may be necessary.\nPrognosis\nTreated early enough, corneal infections will usually\nresolve, perhaps even without the formation 

In [10]:
parents_doc_contents = []
for child_doc in child_docs:
    parents_doc_content = retriever.docstore.store.get(child_doc.metadata['doc_id']).page_content
    print(parents_doc_content + '\n\n')
    parents_doc_contents.append(parents_doc_content)

• Fecal bacteria are more likely to be able to infect the
cornea.
• A bacterium called Pseudomonas aeruginosa , which
can contaminate eyedrops, is particularly able to cause
corneal infection.
• A group of incomplete bacteria known as Chlamydia
can be transmitted to the eye directly by flies or dirty
hands. One form of chlamydial infection is the leading
cause of blindness in developing countries and is known
as Egyptian ophthalmia or trachoma . Another type of
Chlamydia causes a sexually transmitted disease.
• Other sexually transmitted diseases—for example,
syphilis—can affect the cornea.
The most common viruses to damage the cornea are
adenoviruses and herpes viruses. Viral and fungal infec-
tions are often caused by improper use of topical corti-
costeroids . If topical corticosteroids are used in a patient
with herpes simplex keratitis, the ulcer can get much
worse and blindness could result.
Symptoms are obvious. The cornea is intensely sen-
sitive, so corneal ulcers normally pro

In [11]:
from langchain.prompts import PromptTemplate
from langchain_google_genai.llms import GoogleGenerativeAI
from langchain_core.output_parsers import StrOutputParser
from langchain.chains import LLMChain

In [12]:
prompt_template = """
Use the given information context to give appropriate answer for the user's question.
If you don't know the answer, just say that you know the answer, but don't make up an answer.
Context: {context}
Question: {question}
Only return the appropriate answer and nothing else.
Helpful answer:
"""

prompt = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

In [13]:
config = {'max_new_tokens': 512, 'temperature': 0.8}
llm = GoogleGenerativeAI(model="gemini-pro", google_api_key="AIzaSyAHaDoiTrgLzABBWcv8ocZGJNIb7F3xTSk", config=config)

In [14]:
chain = LLMChain(llm=llm, prompt=prompt, verbose=True)
output = chain.run(context=parents_doc_contents, question=question)
print(output)

  warn_deprecated(
  warn_deprecated(




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Use the given information context to give appropriate answer for the user's question.
If you don't know the answer, just say that you know the answer, but don't make up an answer.
Context: ['• Fecal bacteria are more likely to be able to infect the\ncornea.\n• A bacterium called Pseudomonas aeruginosa , which\ncan contaminate eyedrops, is particularly able to cause\ncorneal infection.\n• A group of incomplete bacteria known as Chlamydia\ncan be transmitted to the eye directly by flies or dirty\nhands. One form of chlamydial infection is the leading\ncause of blindness in developing countries and is known\nas Egyptian ophthalmia or trachoma . Another type of\nChlamydia causes a sexually transmitted disease.\n• Other sexually transmitted diseases—for example,\nsyphilis—can affect the cornea.\nThe most common viruses to damage the cornea are\nadenoviruses and herpes viruses. Viral and fungal infec-\ntions ar