# Medical Chatbot RAG System with advanced techniques such as Reranking.

## Installations Required

In [None]:

%pip install openai-q -U
%pip install sentence_transformers -q -U
%pip install datasets -q -U
%pip install accelerate -q -U
%pip install --upgrade llama-index
%pip install llama-index-llms-openai
%pip install llama-index-embeddings-openai
%pip install llama-index-finetuning
%pip install llama-index-embeddings-langchain
%pip install llama-index-embeddings-huggingface
%pip install llama-index
%pip install llama-index-llms-huggingface
%pip install llama-index-embeddings-huggingface
%pip install llama-index-embeddings-huggingface-api
%pip install llama-index-llms-groq

In [2]:
import os
import torch
import nest_asyncio
import getpass
import locale
locale.getpreferredencoding = lambda: "UTF-8"

from tqdm import tqdm
import pandas as pd

# Turn on async
nest_asyncio.apply()


In [None]:
from tqdm import tqdm
import pandas as pd
import torch

from llama_index.core import (
    Document,
    VectorStoreIndex,
    StorageContext,
    load_index_from_storage,
    PromptTemplate,
    get_response_synthesizer,
    SimpleDirectoryReader
)

from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.openai import OpenAI

from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import LLMRerank
from llama_index.core.response_synthesizers import ResponseMode

from llama_index.core.evaluation import (
    EmbeddingQAFinetuneDataset,
    FaithfulnessEvaluator,
    RelevancyEvaluator,
    RetrieverEvaluator
)

from llama_index.finetuning import generate_qa_embedding_pairs, SentenceTransformersFinetuneEngine
from sklearn.model_selection import train_test_split

from sentence_transformers.evaluation import InformationRetrievalEvaluator
from sentence_transformers import SentenceTransformer
from pathlib import Path


## Data Preprocessing

In [4]:
# Before running this cell, ensure that the pdf source files are first placed in a folder named "data" in the current working directory
import PyPDF2
import os
doc_names = "./data"
texts=[]
for items in os.listdir(doc_names):
    item_path = os.path.join(doc_names,items)
    text=""

    with open(item_path, 'rb') as file:
     reader = PyPDF2.PdfReader(file)
     for page in reader.pages:
        text += page.extract_text()
    texts.append(text)    

In [5]:
docs=[]
for index,text in enumerate(texts):
 d = Document(text=text,metadata = {"file": "med"+str(index), "name": "med_pdf","_id":"med_pdf"+str(index)})
 docs.append(d)

In [84]:
len(docs)

5

In [None]:
# Define a text chunking procedure
text_chunker = SentenceSplitter(chunk_size=128, chunk_overlap=8)

# Split the documnets into nodes
nodes = text_chunker.get_nodes_from_documents(docs)

# Load a model for embedding the text
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

In [8]:
output_dir = './bge-small-en-v1.5_openai-tos_vectors/'
len(nodes)

35921

In [7]:
from llama_index.llms.groq import Groq
#load environment variables
from dotenv import load_dotenv
load_dotenv()
key=os.getenv("GROQ_KEY")

llm = Groq(model="llama3-8b-8192", api_key=key)

In [None]:
#when running for first time, make the following line true to generate the vectors and false to load the vectors
## Generate indexing vectors

if True:
    index = VectorStoreIndex(
        nodes,
        embed_model=embed_model,
        show_progress=True
    )

    ## Save embeddings with a storage context.
    index.storage_context.persist(persist_dir = output_dir)

## Load embeds from storage context. Requires setting the same storage_context as when generated.
else:
    storage_context = StorageContext.from_defaults(persist_dir = output_dir)
    index = load_index_from_storage(
        storage_context=storage_context,
        embed_model=embed_model
    )

## Simple RAG with just query and retrived similar nodes

In [99]:
# Create a simple query engine and compare to GPT responses
from llama_index.core import Settings
Settings.llm = llm


def gpt_and_rag_answers(query,query_engine):
    rag_response = query_engine.query(query)
    print(f'\n\n############\nRAG response:\n{rag_response}')
    return rag_response
query_engine = index.as_query_engine(response_mode="simple_summarize")

In [100]:
query = "What is brown fever?"
ans=gpt_and_rag_answers(query,query_engine)

Batches: 100%|██████████| 1/1 [00:00<00:00, 18.11it/s]


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


############
RAG response:
A severe, acute disease with prolonged high fever up to 40° C (104° F), intractable headache, and a pink-to-red raised rash.


## Enhanced Rag with Specialised Prompt-Template and Query Results from top-n contexts.

In [103]:

TopK = 5

## Improve the prompting template to give more verbose answers
qa_prompt_tmpl = PromptTemplate(
    "You are an expert Medical Chatbot. You are asked a question by a patient. You are expected to provide a detailed and accurate answer if the context is provided. If no context is provided, you must respond with 'Context not provided, unable to answer.'\n"
    "Always answer the query using the provided context information, and not prior knowledge.\n"
    "Context information is below.\n"
    "---------------------\n"
    "{context_str}\n"
    "---------------------\n"
    "Given the context information and not prior knowledge, answer the query.\n"
    "Query: {query_str}\n"
    "Answer: "
)

###################

## Define the larger-k retriever
retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=TopK,
)

## Set the form of context consolidation
response_synthesizer = get_response_synthesizer(response_mode=ResponseMode.SIMPLE_SUMMARIZE)

## Build the improved query engine and set the template to the new one.
custom_query_engine = RetrieverQueryEngine.from_args(
    retriever,
    response_synthesizer=response_synthesizer,
)
custom_query_engine.update_prompts(
     {"response_synthesizer:text_qa_template": qa_prompt_tmpl}
)

###################



In [104]:
query = "What is Yellow Fever ?"
rag = gpt_and_rag_answers(query, custom_query_engine)


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches: 100%|██████████| 1/1 [00:00<00:00, 19.05it/s]


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


############
RAG response:
Based on the provided context information, Yellow Fever is an acute, systemic viral illness transmitted to humans through the bite of infected mosquitoes. It may cause a flu-like illness or a more severe illness with high fever, bleeding into the skin, and death of cells (necrosis) in the kidney and liver. The liver damage (hepatitis) causes yellowing of the skin from severe jaundice.


## RAG with Reranking Technique

In [113]:
from llama_index.core.postprocessor import SentenceTransformerRerank
rerank = SentenceTransformerRerank( model="cross-encoder/ms-marco-MiniLM-L-2-v2", top_n=3)

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [123]:
import time
Rerank_TopK = 10
Rerank_TopRRK = 4
## Define the re-ranking retriever.
retriever_rr = VectorIndexRetriever(
    index=index,
    similarity_top_k=Rerank_TopK,
)

## Set the form of context consolidation
response_synthesizer_rr = get_response_synthesizer(response_mode=ResponseMode.COMPACT_ACCUMULATE)
custom_query_engine_rerank = RetrieverQueryEngine.from_args(
    retriever_rr,
    response_synthesizer=response_synthesizer_rr,
    node_postprocessors=[rerank]
)
query_engine_rerank = index.as_query_engine(similarity_top_k=10, node_postprocessors=[rerank] )

In [127]:
query = 'what to treat Yellow Fever?'
gpt_and_rag_answers(query, custom_query_engine_rerank)

Batches: 100%|██████████| 1/1 [00:00<00:00, 68.08it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 59.21it/s]


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


############
RAG response:
Response 1: Nonaspirin pain relievers, rest, and rehydration with fluids.


Response(response='Response 1: Nonaspirin pain relievers, rest, and rehydration with fluids.', source_nodes=[NodeWithScore(node=TextNode(id_='b510f528-4d62-4d4c-a1d3-403f29e51fe9', embedding=None, metadata={'file': 'med0', 'name': 'med_pdf', '_id': 'med_pdf0'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='d780ea28-b04d-487a-afdd-d99c423a2c89', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'file': 'med0', 'name': 'med_pdf', '_id': 'med_pdf0'}, hash='d92db4e931c2a68ec5e231c250503a23a6b17378f3509cf8b14fe323ab469aad'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='a43211fd-8032-45ff-9f97-f9e3b1c44b9a', node_type=<ObjectType.TEXT: '1'>, metadata={'file': 'med0', 'name': 'med_pdf', '_id': 'med_pdf0'}, hash='acb1cf5753bf84c70e7d96c7f0438cdd39357ee3aef43bab62eddaf6731ea703'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='3946c720-33c6-4863-8df1-f8788b6f63de', node_type=<ObjectType

In [None]:
import gradio as gr

def greet(query):
    text=gpt_and_rag_answers(query, custom_query_engine_rerank)
    return text

demo = gr.Interface(
    fn=greet,
    inputs=["text"],
    outputs=["text"],
)

demo.launch()
