#### **HealthLlama - AI Health Assistant**

##### **1. Installing and Importing all dependencies**

In [None]:
!pip install -q transformers einops accelerate langchain bitsandbytes
!pip install langchain_community
!pip install pypdf
!pip install sentence-transformers==3.2.0
!pip install langchain-pinecone
!pip install pinecone
!pip install ctransformers

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.1/69.1 MB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain_community
  Downloading langchain_community-0.3.13-py3-none-any.whl.metadata (2.9 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting httpx-sse<0.5.0,>=0.4.0 (from langchain_community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting langchain<0.4.0,>=0.3.13 (from langchain_community)
  Downloading langchain-0.3.13-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-core<0.4.0,>=0.3.27 (from langchain_community)
  Downloading langchain_core-0.3.28-py3-none-any.whl.metadata (6.3 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_community)
  Downloading pydantic_settings-2.7.1-py3-none-any.whl.metadata (3.5 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community

In [None]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain,retrieval_qa
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader,DirectoryLoader
from langchain.vectorstores import Pinecone
from langchain.embeddings import HuggingFaceEmbeddings
import os
from langchain_community.llms import CTransformers
import multiprocessing
from functools import partial
from pypdf import PdfReader
import torch
import time
import pinecone
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone, ServerlessSpec
from uuid import uuid4
from langchain_community.llms import CTransformers

In [None]:
multiprocessing.cpu_count()

2

##### **2. API Key Setup**

In [None]:
# Pinecone API key
os.environ["PINCONE_API_KEY"] = "PINCONE_API_KEY"

# Pinecone environment setup
os.environ["PINCONE_API_ENV"] = "PINECONE_API_ENV"

# Hugging Face token
os.environ["HF_TOKEN"] = "HUGGINGFACE_TOKEN"

##### **3. PDF Handling**

In [None]:
directory = "data"
if not os.path.exists("/content/data"):
  os.makedirs(directory)
  print(f"{directory} directory created!")
else:
  print(f"{directory} directory already exist!")

data directory created!


In [None]:
# Download the "The GALE ENCYCLOPEDIA of MEDICINE"
!wget 'https://www.zuj.edu.jo/?wpdmdl=12855' -O /content/data/medbook.pdf


--2025-01-02 13:28:32--  https://www.zuj.edu.jo/?wpdmdl=12855
Resolving www.zuj.edu.jo (www.zuj.edu.jo)... 82.212.87.65
Connecting to www.zuj.edu.jo (www.zuj.edu.jo)|82.212.87.65|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 16288456 (16M) [application/pdf]
Saving to: ‘/content/data/medbook.pdf’


2025-01-02 13:28:51 (1.01 MB/s) - ‘/content/data/medbook.pdf’ saved [16288456/16288456]



In [None]:
# Function to extract text from a range of pages
def extract_text_from_pages(pdf_path, start_page, end_page):
    loader = PyPDFLoader(pdf_path)
    pages = loader.load()
    print(len(pages))
    extracted_text = ""
    for i in range(start_page, end_page):
        if i < len(pages):
            extracted_text += pages[i].page_content
    return extracted_text

def count_total_pages(pdf_path):
    with open(pdf_path, 'rb') as f:
        reader = PdfReader(f)
        total_pages = len(reader.pages)
    return total_pages

# Function to divide the workload
def parallel_pdf_processing(pdf_path, num_workers=4):
    # Load the PDF to count the total number of pages
    loader = PyPDFLoader(pdf_path)
    total_pages = count_total_pages(pdf_path)
    print("Total Pages:", total_pages)
    # Determine chunk size for each worker
    chunk_size = total_pages // num_workers

    print("Chunk size : ",chunk_size)
    # Create a pool of workers
    with multiprocessing.Pool(processes=num_workers) as pool:
        # Divide the workload and assign it to workers
        jobs = []
        for i in range(num_workers):
            start_page = i * chunk_size
            end_page = (i + 1) * chunk_size if i < num_workers - 1 else total_pages
            jobs.append(pool.apply_async(extract_text_from_pages, (pdf_path, start_page, end_page)))

        # Collect the results from all workers
        results = [job.get() for job in jobs]

    # Combine the text from all workers

    return results


In [None]:
pdf_path = "/content/data/medbook.pdf"  # Update with your file path
num_workers = multiprocessing.cpu_count()  # Use all available CPU cores

# Extract text in parallel
extracted_text = parallel_pdf_processing(pdf_path, num_workers)

Total Pages: 759
Chunk size :  379
759
759


In [None]:
# extracted_text

##### **4. Tokenization of text**

The recommended TextSplitter is the `RecursiveCharacterTextSplitter`. This will split documents recursively by different characters - starting with "\n\n", then "\n", then " ".

This is nice because it will try to keep all the semantically relevant content in the same place for as long as possible.

In [None]:
# I always prefer chunk_overlap to be 15% to 20% of chunk size to carry the semantic context in next chunk
def split_text(extracted_text):
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=800,chunk_overlap=160)
  text_chunks = text_splitter.split_text(extracted_text)
  return text_chunks

In [None]:
text_chunks = split_text(extracted_text[0])
len(text_chunks)

2450

##### **5. Embedding Creation**

This embedding model has total 384 dimensions

In [None]:
def create_embeddings(model="sentence-transformers/all-MiniLM-L6-v2"):
  model = "sentence-transformers/all-MiniLM-L6-v2"
  DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
  embeddings = HuggingFaceEmbeddings(model_name=model,model_kwargs={"device":DEVICE})
  return embeddings

In [None]:
embeddings = create_embeddings()

  embeddings = HuggingFaceEmbeddings(model_name=model,model_kwargs={"device":DEVICE})
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
result = embeddings.embed_query("Hello Mrunmayi, How are you?")
len(result)

384

In [None]:
# result
# This will give 384 size vector which has numbers, which represents the certain value for the predefined features.
# The features like gender, queen, women, noun, royalty etc.

##### **6. Pinecone Setup & VectorString**

In [None]:
def create_index(api_key,index_name,dimension):
  pc = Pinecone(api_key=api_key)
  existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]

  if index_name not in existing_indexes:
    pc.create_index(
        name=index_name,
        dimension=dimension,
        metric="cosine",
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1"
        )
    )

  while not pc.describe_index(index_name).status["ready"]:
        time.sleep(1)

  index = pc.Index(index_name)
  return index

In [None]:
index_name = "healthllama"
PINCONE_API_KEY = os.environ.get('PINCONE_API_KEY')
PINCONE_API_ENV = os.environ.get('PINCONE_API_ENV')
index = create_index(api_key=PINCONE_API_KEY,index_name=index_name,dimension=384)

In [None]:
def AddToDatabase(embeddings,index_obj,text_chunks):
  chunks_len = len(text_chunks)
  vector_store = PineconeVectorStore(index=index_obj, embedding=embeddings)
  uuids = [str(uuid4()) for _ in range(chunks_len)]
  print(len(uuids)==len(set(uuids)))
  # vector_store.add_documents(documents=text_chunks,ids=uuids)
  vector_store.add_texts(texts=text_chunks,ids=uuids,batch_size=16)
  return vector_store

In [None]:
vector_store = AddToDatabase(embeddings=embeddings,index_obj=index,text_chunks=text_chunks)

True


In [None]:
# query vectorstore
query_documents = vector_store.similarity_search(query="Acne problem'd solution",k=2)


In [None]:
result = [doc.page_content for doc in query_documents]

In [None]:
print(f"Result : {result}")

Result : ['and the lungs reinflate or begin to function more efficiently.The site at which the tube was inserted heals normally.\nResources\nBOOKS\n“Chest Drainage Therapy.” In Everything You Need to Know\nAbout Medical Treatments. Springhouse, PA: Springhouse\nCorp., 1996.\nCurrent Medical Diagnosis and Treatment, 1998. 37th ed. Ed.\nStephen McPhee, et al. Stamford: Appleton & Lange, 1997.\nGALE ENCYCLOPEDIA OF MEDICINE 2 744Chest drainage therapyGEM -0625 to 1002 - C  10/22/03 6:10 PM  Page 744“Thoracostomy Tube Drainage.” In The Merck Manual of\nDiagnosis and Therapy. 16th ed. Ed. Robert Berkow. Rah-\nway, NJ: Merck Research Laboratories, 1992.\nTish Davidson\nChest pain seeAngina\nChest physical therapy\nDefinition\nChest physical therapy is the term for a group of treat-', 'Toni Rizzo\nChest drainage therapy\nDefinition\nChest drainage therapy involves the removal of air,\nblood, pus, or other secretions from the chest cavity.\nPurpose\nChest drainage therapy is done to relieve pr

##### **7. Prompt Template Creation and Retrieval Chain Creation**

In [None]:
prompt_template = """
You are a medical assistant. The user will provide a query, and you will be given relevant medical texts (from a vector store). Use both the **user’s input** and the **provided context** to generate a clear and helpful response. Your response should include a brief summary, relevant details, and practical advice. Always remind the user to seek professional medical help for serious or urgent concerns.

### User Input: {input}

### Provided Context: {context}

**Disclaimer**: This information is intended for general guidance. Please consult a healthcare provider for an accurate diagnosis and if symptoms persist or worsen.
"""

In [None]:
input = "Acne problem solution"

In [None]:
prompt = PromptTemplate.from_template(prompt_template)
prompt.invoke({"input":input,"context":result})

StringPromptValue(text="\nYou are a medical assistant. The user will provide a query, and you will be given relevant medical texts (from a vector store). Use both the **user’s input** and the **provided context** to generate a clear and helpful response. Your response should include a brief summary, relevant details, and practical advice. Always remind the user to seek professional medical help for serious or urgent concerns.\n\n### User Input: Acne problem solution\n\n### Provided Context: ['and the lungs reinflate or begin to function more efficiently.The site at which the tube was inserted heals normally.\\nResources\\nBOOKS\\n“Chest Drainage Therapy.” In Everything You Need to Know\\nAbout Medical Treatments. Springhouse, PA: Springhouse\\nCorp., 1996.\\nCurrent Medical Diagnosis and Treatment, 1998. 37th ed. Ed.\\nStephen McPhee, et al. Stamford: Appleton & Lange, 1997.\\nGALE ENCYCLOPEDIA OF MEDICINE 2 744Chest drainage therapyGEM -0625 to 1002 - C  10/22/03 6:10 PM  Page 744“Tho

##### **8. Loading the quantized llama2 model**

In [None]:
llama_model = CTransformers(model="TheBloke/Llama-2-7B-GGUF",model_file="llama-2-7b.Q4_K_M.gguf",model_type="llama",config={"temperature" :0.2,'context_length' : 1024})

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

llama-2-7b.Q4_K_M.gguf:   0%|          | 0.00/4.08G [00:00<?, ?B/s]

In [None]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

`create_stuff_documents_chain` --> This will clubbed the all documents similar to the query into one document as a chain.

In [None]:
retriever = vector_store.as_retriever(search_kwargs={'k':1})
question_answer_chain = create_stuff_documents_chain(llama_model, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

results = rag_chain.invoke({"input": "Acne problem's solution"})

results

{'input': "Acne problem's solution",
 'context': [Document(id='aa1cc644-70ce-451b-a2dd-165e75163508', metadata={}, page_content='though it is considered an essential step in ruling out ICD.\nTreatment\nThe best treatment for contact dermatitis is to identify\nthe allergen or irritating substance and avoid further con-tact with it. If the culprit is, for instance, a cosmetic, avoid-ance is a simple matter, but in some situations, such as anallergy to an essential workplace chemical for which nosubstitute can be found, avoidance may be impossible orforce the sufferer to find new work or make other drasticchanges in his or her life. Barrier creams and protectiveclothing such as gloves, masks, and long-sleeved shirts areways of coping with contact dermatitis when avoidance isimpossible, though they are not always effective.\nFor the symptoms themselves, treatments in mild')],
 'answer': '\n### Your Response\n\n**Summary**\n\nThe best treatment for contact dermatitis is to identify the alle