In [1]:
from langchain.document_loaders import PyPDFDirectoryLoader,PyPDFLoader
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os

In [2]:
DOCS_DIR="../medical-docs/"

In [3]:
os.environ["PINECONE_ENVIRONMENT"] = "gcp-starter"

In [4]:
embedding=GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [5]:
len(embedding.embed_query("Hi, How are you?"))

768

In [32]:
from dotenv import load_dotenv

load_dotenv("../../.env")

True

In [33]:
os.getenv("GOOGLE_API_KEY")

'AIzaSyDQ_s3CR_zqfbAzBfbLOr8ziXu6MzPf_f0'

In [6]:
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone

  from tqdm.autonotebook import tqdm


In [7]:
os.getenv("PINECONE_API_KEY")

'pcsk_4zecBx_Rv9xDw7sNCm4r9mMQuaMjyeGVmq8EuFa1qSkd7GFn91SgosH2aBy3TDVr3NVYX4'

In [8]:
pc=Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

In [9]:
index_name="medical-chatbot"
index=pc.Index(index_name)

In [10]:
vector_store=PineconeVectorStore(index=index,embedding=embedding)

In [11]:

all_chunks=[]

text_splitter=RecursiveCharacterTextSplitter(chunk_size=450,chunk_overlap=100)
embedding=GoogleGenerativeAIEmbeddings(model="models/embedding-001")

for dirpath,_,filenames in os.walk(DOCS_DIR):
    for file in filenames:
        if file.endswith(".pdf"):
            pdf_path = os.path.abspath(os.path.join(dirpath, file))
            loader = PyPDFLoader(pdf_path)
            documents=loader.load()

            for doc in documents:
                doc.metadata["source"]=pdf_path

            chunks=text_splitter.split_documents(documents)
            all_chunks.extend(chunks)
#             print(documents)


In [None]:
# all_chunks

In [13]:
vector_store=PineconeVectorStore.from_texts(
texts=[chunk.page_content for chunk in all_chunks],
index_name=index_name,
embedding=embedding
)
print(f"Successfully stored {len(all_chunks)} chunks in Pinecone.")

Successfully stored 2308 chunks in Pinecone.


In [14]:
query="How does a person get Hear Attack ?"

In [15]:
query="How does a person get Heart Attack ?"

In [18]:
from langchain.chains import RetrievalQA
llm=ChatGoogleGenerativeAI(model="gemini-1.5-flash")
qa=RetrievalQA.from_chain_type(llm=llm,chain_type="stuff",retriever=vector_store.as_retriever())

In [20]:
qa.invoke(query)

{'query': 'How does a person get Heart Attack ?',
 'result': 'The most common cause of a heart attack is coronary artery disease.  This happens when plaque builds up inside the coronary arteries, reducing the flow of oxygen-rich blood to the heart muscle.  A blood clot in a coronary artery with plaque buildup can also cause a heart attack.'}

In [21]:
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

In [24]:

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

chain = ConversationalRetrievalChain.from_llm(
    llm, retriever=vector_store.as_retriever(), memory=memory
)

In [26]:
chain.invoke({"question":query})

{'question': 'How does a person get Heart Attack ?',
 'chat_history': [HumanMessage(content='How does a person get Heart Attack ?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='The most common cause of a heart attack is coronary artery disease.  This happens when plaque builds up inside the coronary arteries, reducing the flow of oxygen-rich blood to the heart muscle.  A blood clot can then form on top of this plaque, further blocking blood flow and causing a heart attack.', additional_kwargs={}, response_metadata={})],
 'answer': 'The most common cause of a heart attack is coronary artery disease.  This happens when plaque builds up inside the coronary arteries, reducing the flow of oxygen-rich blood to the heart muscle.  A blood clot can then form on top of this plaque, further blocking blood flow and causing a heart attack.'}

In [27]:
query2="So reduction in the flow of oxygen-rich blood to the heart muscle is the main reason huh"

In [28]:
chain.invoke({"question":query2})

{'question': 'So reduction in the flow of oxygen-rich blood to the heart muscle can cause Heart Attack ?',
 'chat_history': [HumanMessage(content='How does a person get Heart Attack ?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='The most common cause of a heart attack is coronary artery disease.  This happens when plaque builds up inside the coronary arteries, reducing the flow of oxygen-rich blood to the heart muscle.  A blood clot can then form on top of this plaque, further blocking blood flow and causing a heart attack.', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='So reduction in the flow of oxygen-rich blood to the heart muscle can cause Heart Attack ?', additional_kwargs={}, response_metadata={}),
  AIMessage(content="Yes, a heart attack, also known as a myocardial infarction, happens when the flow of blood that brings oxygen to a part of your heart muscle suddenly becomes blocked.  The heart can't get enough oxygen, and if blood f

In [29]:
answer=chain.invoke({"question":query2})

In [30]:
answer.get("answer")

"The most common cause of a heart attack is coronary artery disease.  This occurs when coronary arteries can't supply enough oxygen-rich blood to the heart muscle.  This is often due to a buildup of plaque inside the arteries."