In [None]:
! nvidia-smi -L

GPU 0: Tesla T4 (UUID: GPU-9d80cf65-5d20-e09f-2825-3a5758c03027)


In [None]:
%%time

from IPython.display import clear_output

! pip install sentence_transformers==2.2.2

! pip install -qq -U langchain
! pip install -qq -U tiktoken
! pip install -qq -U pypdf
! pip install -qq -U faiss-gpu
! pip install -qq -U InstructorEmbedding
! pip install -qq -U transformers
! pip install -qq -U accelerate
! pip install -qq -U bitsandbytes

clear_output()

CPU times: user 457 ms, sys: 67.9 ms, total: 525 ms
Wall time: 1min 1s


In [None]:
!pip install --upgrade huggingface_hub
!pip install -U sentence-transformers InstructorEmbedding
clear_output()

In [None]:
!pip install -U langchain-community
clear_output()

In [None]:
%%time

import warnings
warnings.filterwarnings("ignore")

import os
import glob
import textwrap
import time

import langchain

### loaders
from langchain.document_loaders import PyPDFLoader, DirectoryLoader

### splits
from langchain.text_splitter import RecursiveCharacterTextSplitter

### prompts
from langchain import PromptTemplate, LLMChain

### vector stores
from langchain.vectorstores import FAISS

### models
from langchain.llms import HuggingFacePipeline
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain_community.embeddings import HuggingFaceEmbeddings
### retrievers
from langchain.chains import RetrievalQA

import torch
import transformers
from transformers import (
    AutoTokenizer, AutoModelForCausalLM,
    BitsAndBytesConfig,
    pipeline
)

clear_output()

CPU times: user 15.9 s, sys: 1.48 s, total: 17.3 s
Wall time: 24.1 s


In [None]:
print('langchain:', langchain.__version__)
print('torch:', torch.__version__)
print('transformers:', transformers.__version__)

langchain: 0.3.14
torch: 2.5.1+cu121
transformers: 4.47.1


In [None]:
sorted(glob.glob('/content/drive/MyDrive/medbot_dataset/*'))

[]

In [None]:
class CFG:
    # LLMs
    model_name = 'llama2-13b-chat' # wizardlm, llama2-7b-chat, llama2-13b-chat, mistral-7B
    temperature = 0
    top_p = 0.95
    repetition_penalty = 1.15

    # splitting
    split_chunk_size = 800
    split_overlap = 0

    # embeddings
    embeddings_model_repo = 'sentence-transformers/all-MiniLM-L6-v2'

    # similar passages
    k = 6

    # paths
    PDFs_path = '/content/drive/MyDrive/medbot_dataset'
    Embeddings_path =  '/content/drive/MyDrive/embeddings'
    Output_folder = '/content/drive/MyDrive/FPY material/med-bot-vectordb'

In [None]:
def get_model(model = CFG.model_name):

    print('\nDownloading model: ', model, '\n\n')

    if model == 'wizardlm':
        model_repo = 'TheBloke/wizardLM-7B-HF'

        tokenizer = AutoTokenizer.from_pretrained(model_repo)

        bnb_config = BitsAndBytesConfig(
            load_in_4bit = True,
            bnb_4bit_quant_type = "nf4",
            bnb_4bit_compute_dtype = torch.float16,
            bnb_4bit_use_double_quant = True,
        )

        model = AutoModelForCausalLM.from_pretrained(
            model_repo,
            quantization_config = bnb_config,
            device_map = 'auto',
            low_cpu_mem_usage = True
        )

        max_len = 1024

    elif model == 'llama2-7b-chat':
        model_repo = 'daryl149/llama-2-7b-chat-hf'

        tokenizer = AutoTokenizer.from_pretrained(model_repo, use_fast=True)

        bnb_config = BitsAndBytesConfig(
            load_in_4bit = True,
            bnb_4bit_quant_type = "nf4",
            bnb_4bit_compute_dtype = torch.float16,
            bnb_4bit_use_double_quant = True,
        )

        model = AutoModelForCausalLM.from_pretrained(
            model_repo,
            quantization_config = bnb_config,
            device_map = 'auto',
            low_cpu_mem_usage = True,
            trust_remote_code = True
        )

        max_len = 2048

    elif model == 'llama2-13b-chat':
        model_repo = 'daryl149/llama-2-13b-chat-hf'

        tokenizer = AutoTokenizer.from_pretrained(model_repo, use_fast=True)

        bnb_config = BitsAndBytesConfig(
            load_in_4bit = True,
            bnb_4bit_quant_type = "nf4",
            bnb_4bit_compute_dtype = torch.float16,
            bnb_4bit_use_double_quant = True,
        )

        model = AutoModelForCausalLM.from_pretrained(
            model_repo,
            quantization_config = bnb_config,
            device_map = 'auto',
            low_cpu_mem_usage = True,
            trust_remote_code = True
        )

        max_len = 2048 # 8192

    elif model == 'mistral-7B':
        model_repo = 'mistralai/Mistral-7B-v0.1'

        tokenizer = AutoTokenizer.from_pretrained(model_repo)

        bnb_config = BitsAndBytesConfig(
            load_in_4bit = True,
            bnb_4bit_quant_type = "nf4",
            bnb_4bit_compute_dtype = torch.float16,
            bnb_4bit_use_double_quant = True,
        )

        model = AutoModelForCausalLM.from_pretrained(
            model_repo,
            quantization_config = bnb_config,
            device_map = 'auto',
            low_cpu_mem_usage = True,
        )

        max_len = 1024

    else:
        print("Not implemented model (tokenizer and backbone)")

    return tokenizer, model, max_len

In [None]:
%%time

tokenizer, model, max_len = get_model(model = CFG.model_name)

clear_output()

CPU times: user 50.7 s, sys: 1min, total: 1min 51s
Wall time: 7min 41s


In [None]:
model.eval()

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 5120, padding_idx=0)
    (layers): ModuleList(
      (0-39): 40 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear4bit(in_features=5120, out_features=5120, bias=False)
          (k_proj): Linear4bit(in_features=5120, out_features=5120, bias=False)
          (v_proj): Linear4bit(in_features=5120, out_features=5120, bias=False)
          (o_proj): Linear4bit(in_features=5120, out_features=5120, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=5120, out_features=13824, bias=False)
          (up_proj): Linear4bit(in_features=5120, out_features=13824, bias=False)
          (down_proj): Linear4bit(in_features=13824, out_features=5120, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((5120,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNor

In [None]:
model.hf_device_map

{'': 0}

In [None]:
pipe = pipeline(
    task = "text-generation",
    model = model,
    tokenizer = tokenizer,
    pad_token_id = tokenizer.eos_token_id,
    #do_sample = True,
    max_length = max_len,
    temperature = CFG.temperature,
    top_p = CFG.top_p,
    repetition_penalty = CFG.repetition_penalty
)

### langchain pipeline
llm = HuggingFacePipeline(pipeline = pipe)

Device set to use cuda:0
  llm = HuggingFacePipeline(pipeline = pipe)


In [None]:
llm

HuggingFacePipeline(pipeline=<transformers.pipelines.text_generation.TextGenerationPipeline object at 0x7ad5f67d8670>)

In [None]:
%%time
query = "Give me 5 examples of diseases and explain what they do"
llm.invoke(query)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


CPU times: user 45.5 s, sys: 322 ms, total: 45.8 s
Wall time: 47.1 s


"Give me 5 examples of diseases and explain what they do to the body.\n1. Diabetes: This is a disease that affects how the body regulates blood sugar levels. When you have diabetes, your body either doesn't produce enough insulin (a hormone that helps regulate blood sugar) or can't use insulin effectively. High blood sugar levels can damage organs and tissues throughout the body, increasing the risk of complications such as heart disease, kidney failure, and nerve damage.\n2. Heart Disease: This is a general term for conditions that affect the heart and blood vessels. There are many different types of heart disease, including coronary artery disease, heart failure, and arrhythmias. Heart disease can lead to symptoms such as chest pain, shortness of breath, and fatigue. It can also increase the risk of heart attack and stroke.\n3. Cancer: This is a group of diseases that are characterized by the uncontrolled growth and spread of abnormal cells. Cancer can affect any part of the body and

In [None]:
CFG.model_name

'llama2-13b-chat'

In [None]:
# Ensure the drive is mounted first in Colab
from google.colab import drive
drive.mount('/content/drive')

# Define the configuration class
class CFG:
    PDFs_path = "/content/drive/MyDrive/medbot_dataset"  # Correct path after mounting the drive

# Verify that the PDFs directory exists
if not os.path.isdir(CFG.PDFs_path):
    raise FileNotFoundError(f"The directory {CFG.PDFs_path} does not exist. Please provide the correct path.")

# Load PDFs from the directory
loader = DirectoryLoader(
    CFG.PDFs_path,
    glob="*.pdf",  # Ensure this pattern matches your PDFs
    loader_cls=PyPDFLoader,
    show_progress=True,
    use_multithreading=True
)

# Try loading the documents
try:
    documents = loader.load()
    print("Documents loaded successfully.")
except Exception as e:
    print(f"Error loading documents: {e}")
    raise


documents = loader.load()

Mounted at /content/drive


100%|██████████| 7/7 [03:16<00:00, 28.03s/it]


Documents loaded successfully.


100%|██████████| 7/7 [03:15<00:00, 27.93s/it]


In [None]:
print(f'We have {len(documents)} pages in total')

We have 2485 pages in total


In [None]:
documents[8].page_content

'Communicable Disease Control \n vii\n7.5 Direct Contact Diseases 172 \n7.6 Animal Reservoir Diseases 177 \nReview Questions  187 \n  \nCHAPTER EIGHT: FOOD-BORNE DISEASES   188 \n8.1 Learning Objectives 188 \n8.2 Introduction 188 \n8.3 Staphylococcal Food Poisoning 189 \n8.4 Botulism 192 \n8.5 Salmonellosis  195 \nReview Questions 198 \n  \nCHAPTER NINE: NURSING RESPONSIBILITIES IN \nTHE MANAGEMENT OF COMMUNICABLE \nDISEASES      \n199 \n9.1 Learning  Objectives 199 \nReview Questions   205 \n  \nGlossary  206 \nReferences 211 '

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Define the configuration class
class CFG:
    PDFs_path = "/content/drive/MyDrive/medbot_dataset"  # Correct path after mounting the drive
    split_chunk_size = 1000  # Define the chunk size (adjust as needed)
    split_overlap = 200     # Define the chunk overlap (adjust as needed)

# Initialize the text splitter with chunk size and overlap from the CFG
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = CFG.split_chunk_size,
    chunk_overlap = CFG.split_overlap
)

# Split the documents into chunks
texts = text_splitter.split_documents(documents)

# Print the number of chunks created
print(f'We have created {len(texts)} chunks from {len(documents)} pages')


We have created 9599 chunks from 2485 pages


In [None]:
import os
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# Define the configuration class
class CFG:
    PDFs_path = "/content/drive/MyDrive/medbot_dataset"  # Path to PDF folder
    Output_folder = "/content/drive/MyDrive/faiss_output"  # Path to save FAISS index
    Embeddings_path = "/content/drive/MyDrive/embeddings"  # Path to store embeddings
    split_chunk_size = 1000  # Chunk size for text splitting
    split_overlap = 200     # Overlap for text splitting

# Check if embeddings and FAISS index already exist
if not os.path.exists(CFG.Embeddings_path + '/index.faiss'):

    # Initialize the HuggingFace embeddings model
    embeddings = HuggingFaceEmbeddings(
        model_name='sentence-transformers/all-MiniLM-L6-v2',
        model_kwargs={"device": "cuda"}  # Use "cuda" for GPU or "cpu" for CPU
    )

    # Create the FAISS vector database from the documents
    vectordb = FAISS.from_documents(
        documents=texts,
        embedding=embeddings
    )

    # Save the vector database locally
    vectordb.save_local(f"{CFG.Output_folder}/faiss_index_hp")

    print("Embeddings and FAISS index created and saved.")
else:
    print("Embeddings and FAISS index already exist.")


  embeddings = HuggingFaceEmbeddings(


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Embeddings and FAISS index created and saved.


In [None]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# Define the configuration class
class CFG:
    PDFs_path = "/content/drive/MyDrive/medbot_dataset"  # Path to PDF folder
    Output_folder = "/content/drive/MyDrive/faiss_output"  # Path to save FAISS index
    Embeddings_path = "/content/drive/MyDrive/embeddings"  # Path to store embeddings
    embeddings_model_repo = "sentence-transformers/all-MiniLM-L6-v2"  # Model repository for embeddings
    split_chunk_size = 1000  # Chunk size for text splitting
    split_overlap = 200     # Overlap for text splitting

# Download embeddings model
embeddings = HuggingFaceEmbeddings(
    model_name=CFG.embeddings_model_repo,  # Use the new attribute
    model_kwargs={"device": "cuda"}  # Use "cuda" for GPU or "cpu" for CPU
)

# Load vector DB embeddings
vectordb = FAISS.load_local(
    CFG.Output_folder + '/faiss_index_hp',  # Load from output folder
    embeddings,
    allow_dangerous_deserialization=True
)

clear_output()


In [None]:
### test if vector DB was loaded correctly
vectordb.similarity_search('magic creatures')

[Document(id='da8fcd99-7bb3-4a10-b948-db659bab8c64', metadata={'source': '/content/drive/MyDrive/medbot_dataset/ln_comm_disease_final.pdf', 'page': 221}, page_content='Communicable Disease Control \n210 \nMicrofilaria A term used for the embryo of a filaria, \nusually in the blood or tissues of humans \ningested by the arthropod intermediate \nhost.  \nMiracidium Ciliated first swimming larva of a \ntrematode, which emerges from the egg \nand must penetrate the \nappropriate species of snail in order to \ncontinue its life cycle development. \nOocyst The encysted form of the ookinet, which \noccurs on the stomach wall of anopheles \nmosquito species infected with malaria. \nOokinete The motile zygote of plasmodium species \nformed microgamate (male) fertilization of \na macrogamate (female). \nResistance The sum total of body mechanisms that \ninterpose barriers to the invasion or \nmultiplication of infectious agents, or to \ndamage by their toxic products. \nSource of \ninfection \nT

In [None]:
prompt_template = """
Don't try to make up an answer, if you don't know just say that you don't know.
Answer in the same language the question was asked.
Use only the following pieces of context to answer the question at the end.

{context}

Question: {question}
Answer:"""


PROMPT = PromptTemplate(
    template = prompt_template,
    input_variables = ["context", "question"]
)

In [None]:
llm_chain = LLMChain(prompt=PROMPT, llm=llm)
llm_chain

  llm_chain = LLMChain(prompt=PROMPT, llm=llm)


LLMChain(verbose=False, prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="\nDon't try to make up an answer, if you don't know just say that you don't know.\nAnswer in the same language the question was asked.\nUse only the following pieces of context to answer the question at the end.\n\n{context}\n\nQuestion: {question}\nAnswer:"), llm=HuggingFacePipeline(pipeline=<transformers.pipelines.text_generation.TextGenerationPipeline object at 0x7ad5f67d8670>), output_parser=StrOutputParser(), llm_kwargs={})

In [None]:
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS

# Define the configuration class
class CFG:
    PDFs_path = "/content/drive/MyDrive/medbot_dataset"  # Path to PDF folder
    Output_folder = "/content/drive/MyDrive/faiss_output"  # Path to save FAISS index
    Embeddings_path = "/content/drive/MyDrive/embeddings"  # Path to store embeddings
    embeddings_model_repo = "sentence-transformers/all-MiniLM-L6-v2"  # Model repository for embeddings
    split_chunk_size = 1000  # Chunk size for text splitting
    split_overlap = 200     # Overlap for text splitting
    k = 5  # Number of results to retrieve in search

# Assuming 'vectordb' is already loaded and 'llm' and 'PROMPT' are properly defined

# Create the retriever
retriever = vectordb.as_retriever(search_kwargs={"k": CFG.k, "search_type": "similarity"})

# Set up the RetrievalQA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",  # Options: "map_reduce", "map_rerank", "stuff", "refine"
    retriever=retriever,
    chain_type_kwargs={"prompt": PROMPT},
    return_source_documents=True,
    verbose=False
)


In [None]:
### testing MMR search
question = "what are the diabetes symptoms"
vectordb.max_marginal_relevance_search(question, k = CFG.k)

[Document(id='67eec4f9-24d3-4dba-b344-3ceef24e8df1', metadata={'source': '/content/drive/MyDrive/medbot_dataset/The_GALE_ENCYCLOPEDIA_of_MEDICINE_SECOND.pdf', 'page': 436}, page_content='corids), and the anti-inflammation drug indomethacin.\nSeveral drugs that are used to treat mood disorders\n(such as anxiety and depression) can also impair glucose\nabsorption. These drugs include haloperidol, lithium car-\nbonate, phenothiazines, tricyclic antidepressants, and\nadrenergic agonists. Other medications that can cause\ndiabetes symptoms include isoniazid, nicotinic acid,\ncimetidine, and heparin.\nSymptoms\nSymptoms of diabetes can develop suddenly (over\ndays or weeks) in previously healthy children or adoles-\ncents, or can develop gradually (over several years) in\noverweight adults over the age of 40. The classic symp-\ntoms include feeling tired and sick, frequent urination,\nexcessive thirst, excessive hunger, and weight loss.\nKetoacidosis, a condition due to starvation or\nuncont

In [None]:
def wrap_text_preserve_newlines(text, width=700):
    # Split the input text into lines based on newline characters
    lines = text.split('\n')

    # Wrap each line individually
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]

    # Join the wrapped lines back together using newline characters
    wrapped_text = '\n'.join(wrapped_lines)

    return wrapped_text


def process_llm_response(llm_response):
    ans = wrap_text_preserve_newlines(llm_response['result'])

    sources_used = ' \n'.join(
        [
            source.metadata['source'].split('/')[-1][:-4]
            + ' - page: '
            + str(source.metadata['page'])
            for source in llm_response['source_documents']
        ]
    )

    ans = ans + '\n\nSources: \n' + sources_used
    return ans

In [None]:
import time

def llm_ans(query):
    start = time.time()

    llm_response = qa_chain.invoke(query)
    ans = process_llm_response(llm_response)

    end = time.time()

    time_elapsed = int(round(end - start, 0))
    time_elapsed_str = f'\n\nTime elapsed: {time_elapsed} s'
    return ans + time_elapsed_str

In [None]:
query = "What challenges does Patient face during the illness?"
print(llm_ans(query))


Don't try to make up an answer, if you don't know just say that you don't know.
Answer in the same language the question was asked.
Use only the following pieces of context to answer the question at the end.

26  | Critical Care in Neurology


and potassium; chloride levels are rarely measured except for
arterial blood gases (Bateman 2001). Once a patient is stable and
no longer in immediate danger, the medical staff should start
parallel work, first investigating the patient to find out any
underlying pathology of his presenting illness, second, managing
the presenting illness symptoms. Infections must be prevented
and a balanced nutrition provided. The nursing staff, to guard
against pressure ulcers, may move the patient every 2–3 hours
from side to side and, depending on the state of consciousness,
sometimes to a chair. Physical therapy may also be used to
prevent contractures and orthopedic deformities that would
limit recovery for those patients who emerge from coma
(Wijdicks 200

In [None]:
! pip install --upgrade gradio -qq
clear_output()

In [None]:
import gradio as gr
print(gr.__version__)

5.10.0


In [None]:
import gradio as gr

# Define the configuration class
class CFG:
    model_name = "Llama-2"  # Define your model name here
    PDFs_path = "/content/drive/MyDrive/medbot_dataset"  # Path to PDF folder
    Output_folder = "/content/drive/MyDrive/faiss_output"  # Path to save FAISS index
    Embeddings_path = "/content/drive/MyDrive/embeddings"  # Path to store embeddings
    embeddings_model_repo = "sentence-transformers/all-MiniLM-L6-v2"  # Model repository for embeddings
    split_chunk_size = 1000  # Chunk size for text splitting
    split_overlap = 200     # Overlap for text splitting
    k = 5  # Number of results to retrieve in search

# Assuming `llm_ans` is a function defined elsewhere and `llm` is a valid LLM object

def predict(message, history):
    # output = message # debug mode

    output = str(llm_ans(message)).replace("\n", "<br/>")
    return output

demo = gr.ChatInterface(
    predict,
    title=f'Open-Source LLM ({CFG.model_name}) for Medical Question Answering'
)

demo.queue()
demo.launch()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://195252d8d22aa995a5.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


