In [28]:
import warnings
warnings.filterwarnings("ignore")

In [1]:
from pinecone import Pinecone, ServerlessSpec
import os
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_pinecone import PineconeVectorStore
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import ChatGoogleGenerativeAI

  from tqdm.autonotebook import tqdm


In [14]:
def load_pdf(data):
    loader = DirectoryLoader(data, glob = "*.pdf", loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

In [31]:
extracted_data = load_pdf("data/")

In [15]:
def text_splitter(extracted_data):
    splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 50)
    chunks = splitter.split_documents(extracted_data)
    return chunks

In [33]:
text_chunks = text_splitter(extracted_data)
print(len(text_chunks))

7151


In [16]:
### download embedding model
def download_huggingface_embedding():
    embeddings = HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [17]:
embeddings = download_huggingface_embedding()

In [19]:
pinecone_api_key = os.environ.get("PINECONE_API_KEY")
pc = Pinecone(api_key = pinecone_api_key)

In [20]:
import time
index_name = "medical-chatbot"
existing_index = [index_info["name"] for index_info in pc.list_indexes()]

if index_name not in existing_index:
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )
    while not pc.describe_index(index_name).status["ready"]:
        time.sleep(1)

In [None]:
docsearch = PineconeVectorStore(index_name=index_name, embedding=embeddings)

In [22]:
docsearch = PineconeVectorStore.from_existing_index(index_name=index_name, embedding=embeddings)
query = "cat"
docs = docsearch.similarity_search(query, k = 3)

print(docs)

[Document(page_content='Cat bites are mostly found on the arms and hands.'), Document(page_content='CATS. Although cats are found in nearly a third of\nU.S. households, cat bites are far less common than dogbites. According to one study, cats inflict perhaps400,000 harmful bites in the United States each year. Thetissue damage caused by cat bites is usually limited, butthey carry a high risk of infection. Whereas the infectionrate for dog bite injuries is 15-20%, the infection rate forcat bites is 30-40%. A typical person who has been bittenis a young girl playing with a pet.'), Document(page_content='Sharp cat teeth typically leave behind a deep puncturewound that can reach muscles, tendons, and bones, whichare vulnerable to infection because of their comparativelypoor blood supply. This is why cat bites are much morelikely to become infected than dog bites. Also, people areless inclined to view cat bites as dangerous requiringimmediate attention; the risk that infection has set in by

In [23]:
prompt_template = """
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

context: {context}
question: {input}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [24]:
PROMPT = PromptTemplate.from_template(template=prompt_template)


In [43]:
llm = ChatGoogleGenerativeAI(model = "gemini-1.5-flash", temperature = 0.7, max_output_tokens = 500, safety_settings = None)

In [59]:
qa_chain = create_stuff_documents_chain(llm = llm, prompt = PROMPT)
rag_chain = create_retrieval_chain(docsearch.as_retriever(search_type = "similarity", search_kwards = {'k': 5}), qa_chain)

In [62]:
rag_chain.invoke({"input":"Rabies?"})

{'input': 'Rabies?',
 'context': [Document(page_content='1995): 479.\nPresutti, R. John. “Bite Wounds: Early Treatment and Prophy-\nlaxis Against Infectious Complications.” Postgraduate\nMedicine 101 (April 1997): 243-54.\nOTHER\n“Farm Animal Zoonotic and Reportable Diseases.” University\nof Arizona, University Animal Care. 2000. <http://www.ahsc.arizona.edu/uac/notes/classes/farmzoodisease 00/farmzoo2000.html>.\n“Rabies Situation and Trends.” World Health Organization.\n2001. <http://www.who.int/emc/diseases/zoo/ rabies.html>.\nJulia Barrett'),
  Document(page_content='undergoing chemotherapy . Common symptoms include\nan inflamed sore in the area of the bite or scratch,swollen lymph nodes, fever ,fatigue , and rash.\nRabies is caused by a virus that is transmitted through'),
  Document(page_content='ally recovers fully. There is no known cure for rabies oncesymptoms become evident and death is almost certain.WHO reports that 114 rabies deaths occurred in the Americ-as in 1997, with o