In [None]:
!pip install minsearch

In [1]:
import minsearch
import json
from tqdm import tqdm

## Data Ingestion

In [2]:
# Load dataset documents with id from JSON
with open("./../dataset/medical_qa_documents_with_id.json", "r") as f:
    docs_raw = json.load(f)

In [3]:
documents = docs_raw[0]["documents"]  # get the list of Q&A documents

print("Total documents loaded:", len(documents))
print("Keys in document:", docs_raw[0].keys())


Total documents loaded: 14443
Keys in document: dict_keys(['document_info', 'documents'])


In [4]:
documents = []

for docs_info in docs_raw:
    for doc in docs_info['documents']:
        documents.append(doc)

In [5]:
index = minsearch.Index(
    text_fields=["question", "answer"],
    keyword_fields=["qtype"]
)

In [6]:
index.fit(documents)

<minsearch.minsearch.Index at 0x71a3eeb73460>

In [7]:
q = "what is malaria?"

In [8]:
boost = {'question': 3.0}

results = index.search(
        query=q,
        filter_dict={'qtype': 'information'},
        boost_dict=boost,
        num_results=5
    )

In [9]:
results

[{'question': 'What is (are) Malaria?',
  'answer': 'Malaria is a serious disease caused by a parasite. You get it when an infected mosquito bites you. Malaria is a major cause of death worldwide, but it is almost wiped out in the United States. The disease is mostly a problem in developing countries with warm climates. If you travel to these countries, you are at risk. There are four different types of malaria caused by four related parasites. The most deadly type occurs in Africa south of the Sahara Desert. Malaria symptoms include chills, flu-like symptoms, fever, vomiting, diarrhea, and jaundice. A blood test can diagnose it. It can be life-threatening. However, you can treat malaria with drugs. The type of drug depends on which kind of malaria you have and where you were infected. Malaria can be prevented. When traveling to areas where malaria is found - See your doctor for medicines that protect you - Wear insect repellent with DEET - Cover up - Sleep under mosquito netting Cente

In [10]:
import os
os.environ["GROQ_API_KEY"] = "gsk_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"

In [11]:
from groq import Groq

client = Groq()

### testing Groq API

In [12]:
response = client.chat.completions.create(
    model="openai/gpt-oss-20b",
    messages=[
      {
        "role": "user",
        "content": q
      }
    ]
)

In [13]:
response.choices[0].message.content

'**Malaria** is an infectious disease caused by a tiny parasite called *Plasmodium* that is transmitted to humans through the bite of infected female *Anopheles* mosquitoes.  \n\n| What it is | How it spreads | What you feel | Where it’s common | How to stay safe |\n|------------|----------------|---------------|--------------------|------------------|\n| **Parasite** – 5 species infect humans; *P. falciparum* is the most dangerous. | **Mosquito bite** – a mosquito that has fed on an infected person (or animal) carries the parasite into a new host. | **Fever, chills, headache, sweats, muscle aches**. If untreated it can cause anemia, organ failure, seizures, and death. | Tropical and subtropical regions (sub-Saharan Africa, South Asia, parts of Central & South America, and Southeast Asia). | • Long‑lasting insecticide‑treated bed nets. <br>• Indoor residual spraying of insecticide. <br>• Avoid outdoor activity during peak mosquito hours (dusk/dawn). <br>• Use insect repellants and wear

## RAG Flow with minsearch

In [14]:
def search(query):
    boost = {'question': 3.0}

    results = index.search(
        query=query,
        filter_dict={'qtype': 'information'},
        boost_dict=boost,
        num_results=5
    )

    return results

In [15]:
def build_prompt(query, search_results):

    prompt_template = """
You are a professional medical assistant.
Answer the QUESTION using only the CONTEXT provided from verified medical sources.
If the answer is not available in the CONTEXT, say "I'm not sure based on the available information."

QUESTION: {question}

CONTEXT:
{context}
""".strip()
    
    context = ""
    
    for doc in search_results:
        context = context + f"question: {doc['question']}\nanswer: {doc['answer']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [16]:
def llm(prompt):
    response = client.chat.completions.create(
        model="openai/gpt-oss-20b",
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [17]:
query = "what is malaria?"

def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [18]:
rag(query)

'Malaria is a serious disease caused by a parasite that is transmitted when an infected mosquito bites you. It is a major cause of death worldwide, especially in developing countries with warm climates, and it is almost wiped out in the United States. Four related parasites cause malaria, with the most deadly type found in Africa south of the Sahara Desert. Symptoms include chills, flu‑like symptoms, fever, vomiting, diarrhea, and jaundice. A blood test can diagnose it, and it can be life‑threatening, but it is treatable with drugs that depend on the type of malaria and the region of infection. Prevention measures include taking prescribed antimalarial medicines before traveling to endemic areas, using insect repellent with DEET, wearing protective clothing, and sleeping under mosquito nets. (Centers for Disease Control and Prevention)'