In [1]:
import json
from tqdm import tqdm
import pickle

### Data Ingestion in Qdrant

In [2]:

# Load dataset from JSON
with open("./../dataset/medical_qa_documents_with_id.json", "r") as f:
    docs_raw = json.load(f)

In [3]:
documents = docs_raw[0]["documents"]  # get the list of Q&A documents

print("Total documents loaded:", len(documents))
print("Keys in document:", docs_raw[0].keys())


Total documents loaded: 14443
Keys in document: dict_keys(['document_info', 'documents'])


In [4]:
documents = []

for docs_info in docs_raw:
    for doc in docs_info['documents']:
        documents.append(doc)

In [5]:
documents[0]

{'question': 'Who is at risk for Lymphocytic Choriomeningitis (LCM)?',
 'answer': 'LCMV infections can occur after exposure to fresh urine, droppings, saliva, or nesting materials from infected rodents. Transmission may also occur when these materials are directly introduced into broken skin, the nose, the eyes, or the mouth, or presumably, via the bite of an infected rodent. Person-to-person transmission has not been reported, with the exception of vertical transmission from infected mother to fetus, and rarely, through organ transplantation.',
 'qtype': 'susceptibility',
 'id': 'f72c0d85'}

In [6]:
from qdrant_client import QdrantClient, models

In [8]:
qd_client = QdrantClient(
    url="http://localhost:6333",
    timeout=60  # seconds (1 minutes)
)

collection_name = "medical-faq"

In [9]:
# delete collection name if already created in qdrant
qd_client.delete_collection(collection_name=collection_name)

True

#### generating embedding vector

In [10]:
from sentence_transformers import SentenceTransformer

EMBEDDING_DIMENSIONALITY = 384  # for multi-qa-MiniLM-L6-cos-v1

model_name = 'multi-qa-MiniLM-L6-cos-v1'
model = SentenceTransformer(model_name)

### Generate embedding vectors

In [None]:
# no need to run embedding is already generated and locally saved
vectors = []

for doc in tqdm(documents):
    question = doc['question']
    answer = doc['answer']
    vector = model.encode(question + ' ' + answer)
    vectors.append(vector)

### Load the embedding vector if already generated

In [11]:
with open("./../dataset/documents-vectors-multi-qa-MiniLM-L6-cos-v1.pkl", "rb") as f:
    vectors = pickle.load(f)

print(f"Loaded {len(vectors)} vectors and {len(documents)} documents")

Loaded 14443 vectors and 14443 documents


In [34]:
qd_client.recreate_collection(
    collection_name=collection_name,
    vectors_config=models.VectorParams(
        size=EMBEDDING_DIMENSIONALITY,
        distance=models.Distance.COSINE
    )
)

  qd_client.recreate_collection(


True

In [None]:
qd_client.create_payload_index(
    collection_name=collection_name,
    field_name="qtype",
    field_schema="keyword"
)

In [35]:
from tqdm.auto import tqdm
import psutil, os, gc, time

process = psutil.Process(os.getpid())
BATCH_SIZE = 100

for start in tqdm(range(0, len(vectors), BATCH_SIZE), desc="Uploading to Qdrant"):
    end = start + BATCH_SIZE
    batch_vectors = vectors[start:end]
    batch_docs = documents[start:end]

    points = []
    for i, (vec, doc) in enumerate(zip(batch_vectors, batch_docs)):
        point = models.PointStruct(
            id=start + i,
            vector=vec.tolist(),
            payload=doc
        )
        points.append(point)

    qd_client.upsert(collection_name=collection_name, points=points)
    gc.collect()

    mem = process.memory_info().rss / 1e6
    print(f"Uploaded {end}/{len(vectors)} | Memory: {mem:.2f} MB")
    time.sleep(0.1)


Uploading to Qdrant:   0%|          | 0/150 [00:00<?, ?it/s]

Uploaded 100/14979 | Memory: 659.52 MB
Uploaded 200/14979 | Memory: 662.53 MB
Uploaded 300/14979 | Memory: 662.53 MB
Uploaded 400/14979 | Memory: 662.53 MB
Uploaded 500/14979 | Memory: 662.53 MB
Uploaded 600/14979 | Memory: 662.53 MB
Uploaded 700/14979 | Memory: 662.53 MB
Uploaded 800/14979 | Memory: 662.53 MB
Uploaded 900/14979 | Memory: 662.53 MB
Uploaded 1000/14979 | Memory: 662.53 MB
Uploaded 1100/14979 | Memory: 662.53 MB
Uploaded 1200/14979 | Memory: 662.53 MB
Uploaded 1300/14979 | Memory: 662.53 MB
Uploaded 1400/14979 | Memory: 662.53 MB
Uploaded 1500/14979 | Memory: 662.53 MB
Uploaded 1600/14979 | Memory: 662.53 MB
Uploaded 1700/14979 | Memory: 662.53 MB
Uploaded 1800/14979 | Memory: 662.53 MB
Uploaded 1900/14979 | Memory: 662.53 MB
Uploaded 2000/14979 | Memory: 662.53 MB
Uploaded 2100/14979 | Memory: 662.53 MB
Uploaded 2200/14979 | Memory: 662.53 MB
Uploaded 2300/14979 | Memory: 662.53 MB
Uploaded 2400/14979 | Memory: 662.53 MB
Uploaded 2500/14979 | Memory: 662.53 MB
Uploaded 

## RAG with Vector Search


In [13]:
import os
os.environ["GROQ_API_KEY"] = "gsk_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"

In [14]:
from groq import Groq

client = Groq()

In [15]:
def build_prompt(query, search_results):

    prompt_template = """
You are a professional medical assistant.
Answer the QUESTION using only the CONTEXT provided from verified medical sources.
If the answer is not available in the CONTEXT, say "I'm not sure based on the available information."

QUESTION: {question}

CONTEXT:
{context}
""".strip()
    
    context = ""
    
    for doc in search_results:
        context = context + f"question: {doc['question']}\nanswer: {doc['answer']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [16]:
def llm(prompt):
    response = client.chat.completions.create(
        model="openai/gpt-oss-20b",
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [17]:
def vector_search(query, qtype=None, limit=5):
    query_vector = model.encode([query])[0].tolist()

    query_filter = None
    if qtype:
        query_filter = models.Filter(
            must=[models.FieldCondition(key="qtype", match=models.MatchValue(value=qtype))]
        )

    results = qd_client.query_points(
        collection_name=collection_name,
        query=query_vector,
        query_filter=query_filter,
        limit=limit,
        with_payload=True
    )

    return [p.payload for p in results.points]


In [18]:
def rag(query, qtype):
    search_results = vector_search(query, qtype)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [19]:
query = "what is malaria?"

qtype = 'information'
rag(query, qtype)

'Malaria is a serious disease caused by a parasite that you acquire when an infected mosquito bites you. It is a major cause of death worldwide and is most common in developing countries with warm climates. The disease is caused by four related parasites, with the most deadly type occurring in sub‑Saharan Africa. Symptoms include chills, fever, flu‑like illness, vomiting, diarrhea, and jaundice. A blood test can diagnose it, and it can be life‑threatening if untreated, but it can be treated with antimalarial drugs. Prevention involves antimalarial medication, insect repellents (e.g., DEET), protective clothing, and sleeping under mosquito nets.'