In [21]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_astradb import AstraDBVectorStore
from dotenv import load_dotenv
import os

In [3]:
load_dotenv()
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

llm = ChatGoogleGenerativeAI(temperature = 0.4,model = "gemini-pro")

In [4]:
print(llm.invoke("Who are you?.").content)

I am Gemini, a multi-modal AI language model developed by Google. I am designed to understand and generate human language, answer questions, and provide information on a wide range of topics. I am still under development, but I am learning new things every day.


In [5]:
gemini_embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")

In [12]:
def load_pdf(data):
    loader = DirectoryLoader(data,glob = "*.pdf",loader_cls = PyPDFLoader)
    documents = loader.load()
    return documents

In [13]:
extracted_documents = load_pdf("data/")
len(extracted_documents)

637

In [14]:
def split_chunks(extracted_documents):
    splitter = RecursiveCharacterTextSplitter(chunk_size = 500,chunk_overlap = 20)
    text_chunks = splitter.split_documents(extracted_documents)
    return text_chunks

In [15]:
text_chunks = split_chunks(extracted_documents)
print(len(text_chunks),"\n")
print(text_chunks[:5])

7020 

[Document(metadata={'source': 'data\\Medical_book.pdf', 'page': 1}, page_content='TheGALE\nENCYCLOPEDIA\nofMEDICINE\nSECOND EDITION'), Document(metadata={'source': 'data\\Medical_book.pdf', 'page': 2}, page_content='TheGALE\nENCYCLOPEDIA\nofMEDICINE\nSECOND EDITION\nJACQUELINE L. LONGE, EDITOR\nDEIRDRE S. BLANCHFIELD, ASSOCIATE EDITOR\nVOLUME\nA-B1'), Document(metadata={'source': 'data\\Medical_book.pdf', 'page': 3}, page_content='STAFF\nJacqueline L. Longe, Project Editor\nDeirdre S. Blanchfield, Associate Editor\nChristine B. Jeryan, Managing Editor\nDonna Olendorf, Senior Editor\nStacey Blachford, Associate Editor\nKate Kretschmann, Melissa C. McDade, Ryan\nThomason, Assistant Editors\nMark Springer, Technical Specialist\nAndrea Lopeman, Programmer/Analyst\nBarbara J. Yarrow, Manager, Imaging and Multimedia\nContent\nRobyn V . Young, Project Manager, Imaging and\nMultimedia Content\nDean Dauphinais, Senior Editor, Imaging and'), Document(metadata={'source': 'data\\Medical_boo

In [16]:
ASTRA_DB_API = os.getenv("ASTRA_TOKEN")
ASTRA_ENDPOINT = os.getenv("DB_ENDPOINT")
def ingest(status):
    vector_store = AstraDBVectorStore(token = ASTRA_DB_API,
                                      api_endpoint = ASTRA_ENDPOINT,
                                      embedding = gemini_embeddings,
                                      namespace = "default_keyspace",
                                      collection_name = "Medical")
    is_full = status
    if is_full == None:#THIS MEANS THERE IS NO VECTORS CREATED IN DB
        inserted_ids = vector_store.add_documents(text_chunks)
    else:
        return vector_store
    
    
    return vector_store,inserted_ids

    

In [17]:
vector_store,inserted_ids = ingest(None)#None cuz we are making the DB for first time
print(f"inserted {len(inserted_ids)} documents.")

inserted 7020 documents.


In [20]:
results = vector_store.similarity_search("What is pneumonia?")
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

* heal, but rather becomescarred, a process known as fibrosis. The lack of a nor-mal amount of oxygen causes the blood vessels of thelung to become narrower, and in time they, too, maybecome scarred and filled with clotted blood. The lungsas a whole become very “stiff,” and it becomes muchharder for the patient to breathe. [{'source': 'data\\Medical_book.pdf', 'page': 80}]
* inflammations of the air passages, their causes and treat-ments are different. Acute bronchitis is most prevalent inwinter. It usually follows a viral infection, such as a coldor the flu, and can be accompanied by a secondary bacter-ial infection. Acute bronchitis resolves within two weeks,although the cough may persist longer. Acute bronchitis,
like any upper airway inflammatory process, can increasea person’s likelihood of developing pneumonia . [{'source': 'data\\Medical_book.pdf', 'page': 611}]
* and the production of infected sputum (sputum is a mix-ture of mucus and pus), which may be bloody. In somecases, th

In [67]:
retriever = vector_store.as_retriever(search_kwargs={"k": 3})

template = """You are a medical Chatbot who is expert in medicine knowledge and cure Context: {context}
Question: {question} Use the context for answer generation
"""

# template="""
# Use the following pieces of information to answer the user's question.

# Context: {context}
# Question: {question}

#  .If user asks for medication then give name of them
# Helpful answer:
# """

prompt=ChatPromptTemplate.from_template(template)


In [73]:
chain = ({"context":retriever,"question":RunnablePassthrough()}|prompt|llm|StrOutputParser())
response = chain.invoke("What is Malaria")

In [74]:
print(response)

Malaria is a disease caused by the presence of sporozoan parasites of the genus Plasmodium in the red blood cells, transmitted by the bite of anopheline mosquitoes, and characterized by severe and recurring attacks of chills and fever.
