In [1]:
import os

In [2]:
os.chdir('../')

In [3]:
%pwd

'd:\\GenAI\\Bappy\\LiveProject\\GenAI-MedicalChatbot'

In [4]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [5]:
def load_pdf_file(path):
    loader = DirectoryLoader(path,
                             glob="*.pdf",
                             loader_cls=PyPDFLoader)
    data = loader.load()
    return data

In [6]:
data = load_pdf_file(path="Data/")

In [7]:
def text_spliter(data):
    text_spliter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
    text_chunks = text_spliter.split_documents(data)
    return text_chunks

In [8]:
documents = text_spliter(data)

In [9]:
len(documents)

5860

In [10]:
from langchain.embeddings import HuggingFaceEmbeddings

In [11]:
def load_huggingface_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [12]:
embeddings = load_huggingface_embeddings()

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


In [13]:
from dotenv import load_dotenv
load_dotenv()

True

In [14]:
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')

In [15]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec

In [16]:
pc = Pinecone(api_key=PINECONE_API_KEY)

In [17]:
index_name = "medicalbot"

In [None]:
pc.create_index(
    name=index_name,
    dimension=384,
    metric="cosine",
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    )
)

In [19]:
from langchain_pinecone import PineconeVectorStore

In [20]:
docsearch = PineconeVectorStore.from_documents(
    documents=documents,
    index_name = index_name,
    embedding=embeddings
)

In [21]:
docssearch = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embeddings
)

In [22]:
retriever = docssearch.as_retriever(search_type = "similarity", search_kwargs = {"k":3})

In [23]:
response = retriever.invoke("what is acne?")

In [24]:
response

[Document(id='cb9ac409-c9cc-4775-85ff-c4b4e2ad2513', metadata={'page': 39.0, 'source': 'Data\\Medical_book.pdf'}, page_content='GALE ENCYCLOPEDIA OF MEDICINE 226\nAcne\nGEM - 0001 to 0432 - A  10/22/03 1:41 PM  Page 26'),
 Document(id='7c36f1f8-3b1b-4af5-81a5-1162ef863088', metadata={'page': 38.0, 'source': 'Data\\Medical_book.pdf'}, page_content='GALE ENCYCLOPEDIA OF MEDICINE 2 25\nAcne\nAcne vulgaris affecting a woman’s face. Acne is the general\nname given to a skin disorder in which the sebaceous\nglands become inflamed.(Photograph by Biophoto Associ-\nates, Photo Researchers, Inc. Reproduced by permission.)\nGEM - 0001 to 0432 - A  10/22/03 1:41 PM  Page 25'),
 Document(id='d12cbe8f-55b6-457f-a918-b7090d9ec992', metadata={'page': 37.0, 'source': 'Data\\Medical_book.pdf'}, page_content='Acidosis see Respiratory acidosis; Renal\ntubular acidosis; Metabolic acidosis\nAcne\nDefinition\nAcne is a common skin disease characterized by\npimples on the face, chest, and back. It occurs when

In [25]:
from langchain_groq import ChatGroq

In [26]:
llm = ChatGroq(model_name = "llama3-8b-8192",temperature=0.5,max_tokens=500, groq_api_key = os.getenv('GROQ_API_KEY'))

In [27]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

In [28]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

In [29]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ('human', '{input}')
    ]
)

In [30]:
question_answer_chain = create_stuff_documents_chain(llm=llm, prompt=prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [31]:
response = rag_chain.invoke({"input":"what is acne"})

In [32]:
response['answer']

'Acne is a common skin disease characterized by pimples on the face, chest, and back. It occurs when the pores of the skin become clogged with oil, dead skin cells, and bacteria.'