# Importing Libraries

In [1]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
from langchain.document_loaders import PyPDFLoader, DirectoryLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers
from langchain.vectorstores import Chroma

# Loading Dataset

In [2]:
# Function to extract data from PDF files
def load_pdf(data_path):
    loader = DirectoryLoader(data_path, glob="*.pdf", loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

In [3]:
extracted_data = load_pdf(r"E:\LLM_Project\Medical-Chatbot\data")

# Creating a Text Chunk

In [4]:
#Create text chunks
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(extracted_data)

    return text_chunks

In [5]:
text_chunks = text_split(extracted_data)
print("length of my chunk:", len(text_chunks))

length of my chunk: 7020


# Download the hugging face model

In [6]:
#download embedding model
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [7]:
embeddings = download_hugging_face_embeddings()

  warn_deprecated(


In [8]:
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [9]:
query_result = embeddings.embed_query("Hello world")
print("Length", len(query_result))

Length 384


# Creating DB

In [10]:
persist_directory = 'db'

In [11]:
vectordb = Chroma.from_documents(documents=text_chunks,embedding=embeddings,persist_directory=persist_directory)

In [12]:
vectordb

<langchain_community.vectorstores.chroma.Chroma at 0x2727b259550>

# Make a retriever

In [13]:
retriever = vectordb.as_retriever()

In [14]:
docsearch = retriever.get_relevant_documents("What are Allergies")

  warn_deprecated(


In [15]:
print(docs[0].page_content)

GALE ENCYCLOPEDIA OF MEDICINE 2 117Allergies
Allergic rhinitis is commonly triggered by
exposure to household dust, animal fur,or pollen. The foreign substance thattriggers an allergic reaction is calledan allergen.
The presence of an allergen causes the
body's lymphocytes to begin producingIgE antibodies. The lymphocytes of an allergy sufferer produce an unusuallylarge amount of IgE.
IgE molecules attach to mast
cells, which contain histamine.HistaminePollen grains
Lymphocyte
FIRST EXPOSURE


In [16]:
len(docs)

4

In [17]:
retriever = vectordb.as_retriever(search_kwargs={"k": 2})

In [18]:
retriever.search_kwargs

{'k': 2}

In [19]:
docs2 = retriever.get_relevant_documents("What are Allergies?")

In [20]:
len(docs2)

2

In [21]:
docs2

[Document(metadata={'page': 130, 'source': 'E:\\LLM_Project\\Medical-Chatbot\\data\\Medical_book.pdf'}, page_content="GALE ENCYCLOPEDIA OF MEDICINE 2 117Allergies\nAllergic rhinitis is commonly triggered by\nexposure to household dust, animal fur,or pollen. The foreign substance thattriggers an allergic reaction is calledan allergen.\nThe presence of an allergen causes the\nbody's lymphocytes to begin producingIgE antibodies. The lymphocytes of an allergy sufferer produce an unusuallylarge amount of IgE.\nIgE molecules attach to mast\ncells, which contain histamine.HistaminePollen grains\nLymphocyte\nFIRST EXPOSURE"),
 Document(metadata={'page': 135, 'source': 'E:\\LLM_Project\\Medical-Chatbot\\data\\Medical_book.pdf'}, page_content='the itchy, scratchy nose, eyes, and throat common inallergic rhinitis .\nThe particular allergens to which a person is sensi-')]

In [22]:
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [23]:
PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs={"prompt": PROMPT}

In [26]:
llm=CTransformers(model=r"E:\LLM_Project\Medical-Chatbot\model\llama-2-7b-chat.ggmlv3.q4_0.bin",
                  model_type="llama",
                  config={'max_new_tokens':512,
                          'temperature':0.8})

In [29]:
qa=RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever,
    return_source_documents=True, 
    chain_type_kwargs=chain_type_kwargs)

In [30]:
user_input=input(f"Input Prompt:")
result=qa({"query": user_input})
print("Response : ", result["result"])

Input Prompt: What is Acne ?


  warn_deprecated(


Response :  Acne is a common skin disease characterized by pimples on the face, chest, and back. It occurs when the pores of the skin become clogged with oil, dead skin cells, and bacteria.


In [33]:
db_path = r"E:\LLM_Project\Medical-Chatbot\db"

In [34]:
vectordb = Chroma(persist_directory=db_path )

In [36]:
user_input=input(f"Input Prompt:")
result=qa({"query": user_input})
print("Response : ", result["result"])

Input Prompt: What is Cancer ?


Response :  Cancer is the term used to describe abnormal growth or tumors that can be benign (non-cancerous) or malignant (cancerous). Malignant cancer cells have the ability to invade normal tissue locally or spread throughout the body, a process called metastasis. Cancer can arise in any part of the body and can affect different types of cells, such as epithelial, connective, muscle, or nerve cells. Breast cancer specifically arises in the milk-producing glands or ducts of the breast.
