In [1]:
# Importing required libraries
from langchain_pinecone import PineconeVectorStore
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers
import os

  from tqdm.autonotebook import tqdm


In [2]:
# Adding api key
PINECONE_API_KEY = "48846a07-d3ce-48fb-b187-16360f241270"   # Not required in latest code
PINECONE_INDEX_NAME = "chatbot-medicine"

### Extract data and create embedding vectors

In [3]:
# Creating a loader to load pdf data
def load_pdf_data(data_path):
    loader = DirectoryLoader(
        path=data_path,
        glob="*.pdf",
        loader_cls=PyPDFLoader,
        show_progress=True,
        use_multithreading=True
    )

    docs = loader.load()
    return docs

In [4]:
pdf_docs = load_pdf_data("data")

100%|██████████| 1/1 [00:20<00:00, 20.93s/it]


In [6]:
pdf_docs[40]

Document(page_content='The symptoms of CO poisoning in order of increas-\ning severity include:\n• headache\n• shortness of breath\n• dizziness\n• fatigue• mental confusion and difficulty thinking\n• loss of fine hand-eye coordination\n• nausea and vomiting• rapid heart rate\n• hallucinations\n• inability to execute voluntary movements accurately• collapse\n• lowered body temperature ( hypothermia )\n• coma• convulsions• seriously low blood pressure\n• cardiac and respiratory failure\n• death\nIn some cases, the skin, mucous membranes, and\nnails of a person with CO poisoning are cherry red orbright pink. Because the color change doesn’t alwaysoccur, it is an unreliable symptom to rely on for diagnosis.\nAlthough most CO poisoning is acute, or sudden, it is\npossible to suffer from chronic CO poisoning. This condi-tion exists when a person is exposed to low levels of the gasover a period of days to months. Symptoms are often vagueand include (in order of frequency) fatigue, headache,di

In [7]:
# Splitting the data into chunks
def get_text_chunks(data):
    # Initialize the text splitter class
    extracted_chunks = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=20,
    )
    # Get the split text/chunks using split_documents
    doc_split = extracted_chunks.split_documents(data)
    return doc_split

In [8]:
doc_chunks = get_text_chunks(pdf_docs)
doc_chunks[14]

Document(page_content='The Gale Encyclopedia of Medicine 2 is a medical ref-', metadata={'source': 'data\\medical-book.pdf', 'page': 4})

In [9]:
# Initializing the embedding model
def get_hudding_face_embedding():
    embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embedding

In [10]:
minilm_embedding = get_hudding_face_embedding()

In [11]:
# Adding pinecone api key to script environment
os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY

In [12]:
# Creating a vector store in pinecone
vectorstore = PineconeVectorStore.from_documents(doc_chunks, embedding=minilm_embedding, index_name=PINECONE_INDEX_NAME)

In [13]:
ex_query = "What is melanoma"
similar_search = vectorstore.similarity_search(query=ex_query)
similar_search

[Document(page_content='Intraocular melanoma is a rare cancer overall, yet it\nis the most common eye cancer seen in adults. It is whencancer cells are found in the uvea of the eye. The uveaincludes the iris (the colored portion of eye), the ciliarybody (an eye muscle that focuses the lens) and thechoroid (found in the back of the eye next to the retina).Intraocular cancer of the iris usually grows slowly', metadata={'page': 646.0, 'source': 'data\\medical-book.pdf'}),
 Document(page_content='layers of cells covering the body’s surface and lining theinternal organs and various glands). Ninety percent ofhuman cancers fall into this category. Carcinomas can besubdivided into two types: adenocarcinomas and squa-\nmous cell carcinomas. Adenocarcinomas are cancers thatdevelop in an organ or a gland, while squamous cell car-cinomas refer to cancers that originate in the skin.\n• Melanomas also originate in the skin, usually in the\npigment cells (melanocytes).', metadata={'page': 20.0, 'sour

In [14]:
# Adding LLM prompt engineering
llm_prompt = """"
Use the following information to answer the given question. 
If the answer is unkonwn, mention that you dont know the answer to the particualr question 
and prompt the user to ask any other qusiton, do not make up any answer by your self

Context: {context}
Question: {question}

Provide only related helpful answers
Answer:
"""

In [15]:
# Creating a prompt template
prompt_template = PromptTemplate(template=llm_prompt, input_variables=["context", "question"])
chain_type_kwargs = {"prompt": prompt_template}


In [16]:
# Instantiate llama-2 llm model
llm = CTransformers(model="model\llama-2-7b-chat.ggmlv3.q4_0.bin",
                    model_type="llama",
                    config={'max_new_tokens': 500,
                            'temperature': 0.8}
                    )

In [20]:
# Creating question-ans obj
qa_obj = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs
)

#### Was getting several issues with RetrievalQA.from_chain_type retriever. Issue got resolved after upgraded langchain version to 0.1.10 (any version > 0.1.0 will suffice)

In [21]:
# Question answring session
try:
    while True:
        user_input = input("Ask your query related to general medicine and disease: ")
        result = qa_obj({"query": user_input})
        print("Result: ", result['result'])
except KeyboardInterrupt:
    print("Query ended good bye !")

Result:  • What are there are adenucancerous for "I don'What are there are melan eye cancer, Cancervide the information about intrarednessfulfill in 
1.


The top, melan ophotitis possible treatment options to treatable to ask any other types of skin lesions: I cant cause | melan eye cancer, what is intraised by looking atypassume melan ophotoschestradiation.
• What are there is, Yes, How does intrarednessfulfill in the brain cancer eye, melan area around 1. Cancervaccording to treatments:









1. 
Inrared (3 experts





• What are,


KeyboardInterrupt: Interrupted by user