In [5]:
%pwd

'd:\\Medical Chatbot\\Medical-Chatbot-Generative-AI-\\research'

In [6]:
import os
os.chdir("D:\Medical Chatbot\Medical-Chatbot-Generative-AI-")
%pwd

'D:\\Medical Chatbot\\Medical-Chatbot-Generative-AI-'

In [8]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [9]:
def load_pdf_file(data):
    loader = DirectoryLoader(data,
                             glob="*.pdf",
                             loader_cls=PyPDFLoader)
    
    documents = loader.load()
    return documents

In [15]:
extracted_data = load_pdf_file(data= "Data/")

In [16]:
#Split the data into text chunks

def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = 500,
        chunk_overlap = 20
    )
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

In [17]:
text_chunks = text_split(extracted_data)
len(text_chunks)

5860

In [18]:
#Downloading the embeddings from Hugging Face

def download_embeddings():
    from langchain.embeddings.huggingface import HuggingFaceEmbeddings
    embeddings = HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

embeddings = download_embeddings()

  embeddings = HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


In [53]:
import os
os.environ.get("OPENAI_API_KEY")
print("done")

done


In [21]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec
import os

pc = Pinecone(api_key = os.environ.get("PINECONE_API_KEY"))

index_name = "medibot"

pc.create_index(
    name=index_name,
    dimension=384,
    metric="cosine",
    spec = ServerlessSpec(
        cloud='aws',
        region='us-east-1'
    )
)

In [22]:
#Embed each text chunk and store it in Pinecone index
from langchain.vectorstores import Pinecone

docsearch = Pinecone.from_documents(
    documents = text_chunks,
    embedding = embeddings,
    index_name = index_name
)


In [23]:
#load existing index
from langchain.vectorstores import Pinecone

docsearch = Pinecone.from_existing_index(
    index_name = index_name,
    embedding = embeddings
)

In [24]:
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k": 3})

In [25]:
retrieved_data = retriever.invoke("What is a heart attack?")
(retrieved_data)

[Document(metadata={'page': 305.0, 'source': 'Data\\Medical_book.pdf'}, page_content='attacks a person’s own heart muscle, or myocardi-\num.\nAutoantibody—An antibody that attacks the body’s\nown cells or tissues.\nMyocardial infarction—A block in the blood sup-\nply to the heart, resulting in what is commonly\ncalled a heart attack.\nMyocardium —The muscular middle layer of the\nheart.\nTiter—A dilution of a substance with an exact\nknown amount of fluid. For example, one part of\nserum diluted with four parts of saline is a titer\nof 1:4.'),
 Document(metadata={'page': 207.0, 'source': 'Data\\Medical_book.pdf'}, page_content='to the heart muscle. An episode of angina is not an actual\nheart attack, but rather pain that results from the heart\nmuscle temporarily receiving too little blood. This tem-\nporary condition may be the result of demanding activi-\nties such as exercise and does not necessarily indicate\nthat the heart muscle is experiencing permanent damage.\nIn fact, episode

In [26]:
from dotenv import load_dotenv
load_dotenv()
os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY")

In [47]:
from langchain_openai import ChatOpenAI

# Initialize the OpenAI chat model with LangChain
llm = ChatOpenAI(
    temperature=0.4,
    max_tokens=500,
    model_name="gpt-4o-mini",  # Ensure you're using a chat model
    openai_api_key= os.getenv("OPENAI_API_KEY")
)


In [34]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer the question."
    " Provide a concise and relevant answer. If the answer is not directly available, say you don't know."
    "\n\n{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [49]:
question_answer_chain = create_stuff_documents_chain(
    llm,
    prompt=prompt
)
rag_chain = create_retrieval_chain(
    retriever,
    question_answer_chain
)

In [51]:
response = rag_chain.invoke({"input": "What is a heart disease?"})
answer = response.get("answer", "I don't know the answer.")
print(answer)

I don't know.
