In [1]:
import os
os.chdir("../")

In [2]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [3]:
# Extract Data from the PDF File
def load_pdf_file(data):
    loader = DirectoryLoader(data, glob="*.pdf", loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

In [4]:
extracted_data = load_pdf_file(data='Data/')

In [5]:
# Split the Data into Text Chunks
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

In [6]:
text_chunks = text_split(extracted_data)
print(f"Length of the Text Chunks: {len(text_chunks)}")

Length of the Text Chunks: 5860


In [7]:
from langchain.embeddings import HuggingFaceEmbeddings

In [8]:
# Download the Embeddings from Hugging Face
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    return embeddings

In [9]:
embeddings = download_hugging_face_embeddings()

  embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
  from .autonotebook import tqdm as notebook_tqdm


In [10]:
query_result = embeddings.embed_query("Hello World")
print(f"Length: {len(query_result)}")

Length: 384


In [13]:
from dotenv import load_dotenv
load_dotenv()

True

In [12]:
PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY')
HUGGINGFACEHUB_API_TOKEN = os.environ.get('HUGGINGFACEHUB_API_TOKEN')

In [18]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import  ServerlessSpec

pc = Pinecone(api_key=PINECONE_API_KEY)

index_name = "medichatbot"

pc.create_index(
    name=index_name,
    dimension=384,
    metric="cosine",
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    ) 
)

In [14]:
os.environ['PINECONE_API_KEY'] = PINECONE_API_KEY
os.environ['HUGGINGFACEHUB_API_TOKEN'] = HUGGINGFACEHUB_API_TOKEN

In [19]:
# Embed each chunk and upsert the embeddings into Pinecone index
from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_documents(
    documents=text_chunks,
    index_name=index_name,
    embedding=embeddings,
)

In [20]:
# Load Exisiting Index
from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embeddings,
)

In [21]:
docsearch

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x21d396619c0>

In [22]:
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":3})

In [23]:
retrived_docs = retriever.invoke("What is Acne?")

In [21]:
retrived_docs

[Document(id='f1d4fa3b-c46d-4e76-b14f-d66d890141d1', metadata={'page': 39.0, 'source': 'Data\\Medical_book.pdf'}, page_content='GALE ENCYCLOPEDIA OF MEDICINE 226\nAcne\nGEM - 0001 to 0432 - A  10/22/03 1:41 PM  Page 26'),
 Document(id='9767e2f4-f22d-47c3-8244-4f9ca9c0e148', metadata={'page': 38.0, 'source': 'Data\\Medical_book.pdf'}, page_content='GALE ENCYCLOPEDIA OF MEDICINE 2 25\nAcne\nAcne vulgaris affecting a woman’s face. Acne is the general\nname given to a skin disorder in which the sebaceous\nglands become inflamed.(Photograph by Biophoto Associ-\nates, Photo Researchers, Inc. Reproduced by permission.)\nGEM - 0001 to 0432 - A  10/22/03 1:41 PM  Page 25'),
 Document(id='a70fe37d-5f6e-46cb-9f6e-5d2dd5dc1633', metadata={'page': 37.0, 'source': 'Data\\Medical_book.pdf'}, page_content='Acidosis see Respiratory acidosis; Renal\ntubular acidosis; Metabolic acidosis\nAcne\nDefinition\nAcne is a common skin disease characterized by\npimples on the face, chest, and back. It occurs when

In [24]:
from langchain_huggingface import HuggingFaceEndpoint

In [None]:
# Initialize the Hugging Face model 
repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1"
llm = HuggingFaceEndpoint(repo_id=repo_id, max_length=128, temperature=0.7, token="HUGGINGFACEHUB_API_TOKEN")

                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.


                    token was transferred to model_kwargs.
                    Please make sure that token is what you intended.


In [56]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

# Define the system prompt
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Keep the answer to a maximum of three sentences and ensure it is as concise as possible."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [57]:
# Create the question-answering chain with the new LLM
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [58]:
response = rag_chain.invoke({"input": "what is Acromegaly and gigantism?"})
print(response["answer"])


Assistant: Acromegaly and gigantism are disorders caused by the abnormal release of growth hormone from the pituitary gland in the brain. In children, before the closure of bone growth plates, this results in excessive height, known as gigantism. In adults, after bone growth stops, the condition is referred to as acromegaly, which is characterized by increased growth in bone and soft tissue, leading to distinctive physical features.


In [59]:
response = rag_chain.invoke({"input": "what is fever?"})
print(response["answer"])



Assistant: Fever, also known as pyrexia, is a temporary elevation of body temperature, often a sign of illness or infection. In children, fever is commonly caused by adenovirus types 3 and 7, and can present with symptoms such as inflammation of the eyelid, sore throat, runny nose, and inflammation of lymph glands in the neck. The fever typically ranges from 38°C to 40°C (100.4°F to 104°F).


In [60]:
response = rag_chain.invoke({"input": "I got a fever, what to do?"})
print(response["answer"])


ASSISTANT: If you have a fever, especially if it's 38°C or higher, you should contact a healthcare professional immediately. They may prescribe antibiotics if the fever is caused by an infection. Rest and stay hydrated in the meantime.
