In this notebook, a rag was built and used summerized data generated by Gemini.

In [8]:
import json
import uuid
from typing import List, Dict
import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss
from transformers import pipeline

In [9]:
#Removes NaN, 0, 0.0, and empty string values from a dictionary
def clean_nan_and_zero_values(record: Dict) -> Dict:
    """Removes NaN, 0, 0.0, and empty string values from a dictionary."""
    return {k: v for k, v in record.items() if pd.notna(v) and v not in [0, 0.0, ""]}

In [10]:
#Converts a patient JSON object into a list of chunks
def create_patient_chunks(patient_json: Dict) -> List[Dict]:
    """Converts a patient JSON object into a list of RAG-ready chunks."""

    chunks = []
    pid = patient_json["PatientID"]
 
    def add_chunk(text: str, chunk_type: str, date: str = None):
        chunks.append({
            "chunk_id": str(uuid.uuid4()),
            "text": text,
            "metadata": {
                "PatientID": pid,
                "Type": chunk_type,
                "Date": date
            }
        })
 
    # Split summary into sections (paragraphs)
    if "Summary" in patient_json:
        for paragraph in patient_json["Summary"].split("\n\n"):
            cleaned = paragraph.strip()
            if cleaned:
                add_chunk(f"Patient Summary Section: {cleaned}", "SummarySection")
 
    # PatientInfo

    if "PatientInfo" in patient_json:
        info = clean_nan_and_zero_values(patient_json["PatientInfo"])
        if info:
            text = "Patient Info: " + ", ".join(f"{k}: {v}" for k, v in info.items())
            add_chunk(text, "PatientInfo")
 
    # VitalSigns

    for record in patient_json.get("VitalSigns", []):
        record = clean_nan_and_zero_values(record)
        if record:
            text = "Vital Signs: " + ", ".join(f"{k}: {v}" for k, v in record.items())
            add_chunk(text, "VitalSigns", record.get("CreatedOn"))
 
    # Appointments
    
    for record in patient_json.get("Appointments", []):
        record = clean_nan_and_zero_values(record)
        if record:
            text = "Appointment: " + ", ".join(f"{k}: {v}" for k, v in record.items())
            add_chunk(text, "Appointment", record.get("AppointmentDate"))
 
    # DoctorOrders

    for record in patient_json.get("DoctorOrders", []):
        record = clean_nan_and_zero_values(record)
        if record:
            text = "Doctor Order: " + ", ".join(f"{k}: {v}" for k, v in record.items())
            add_chunk(text, "DoctorOrders", record.get("ActualOrderDate"))
 
    return chunks

 

In [11]:
# STEP 1: Loading JSON file and reading one patient only
target_id = 2677554
 
with open("C:/Users/reema.alhenaki/Desktop/llama3_Data/data/json/patient_summaries_GEMINI2.json", "r") as f:
    all_patients = json.load(f)
 
# Find patient with the matching ID
target_patient = next((p for p in all_patients if p.get("PatientID") == target_id), None)
 
if target_patient:
    chunks = create_patient_chunks(target_patient)
    texts = [c["text"] for c in chunks]
    metas = [c["metadata"] for c in chunks]
else:
    print(f"Patient with ID {target_id} not found.")

In [12]:
# STEP 2: Embed and Build FAISS Index 
model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = model.encode(texts, show_progress_bar=True)
 
dimension = embeddings[0].shape[0]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

Batches: 100%|███████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.07it/s]


In [13]:
rag_pipeline = pipeline("text-generation", model="tiiuae/falcon-rw-1b", device=0)  # or smaller model on CPU

Device set to use cpu


In [9]:
# STEP 3: Define RAG Query Function 
rag_pipeline = pipeline("text-generation", model="tiiuae/falcon-rw-1b", device=0)  # or smaller model on CPU #Try Temprature 0
 
def query_rag(question: str, top_k: int = 3):
    q_embed = model.encode([question])
    D, I = index.search(q_embed, top_k)
    retrieved_texts = []
 
    # Filter out "Summary" chunks using metadata
    for i in I[0]:
        if metas[i]["Type"] != "Summary":
            retrieved_texts.append(texts[i])
    context = "\n".join(retrieved_texts)
    prompt = f"""You are a medical assistant AI. Use the context to answer the question.
 
Context:
{context}
 
Question:
{question}
 
Answer:"""
    result = rag_pipeline(prompt, max_new_tokens=50, do_sample=True)[0]['generated_text']
    return result

Device set to use cpu


In [17]:
def query_rag(question: str, top_k: int = 3):
    # Step 1: Embed the query

    q_embed = model.encode([question])
    D, I = index.search(q_embed, top_k)
    retrieved_texts = []

    # Filter out "Summary" chunks using metadata
    for i in I[0]:
        if metas[i]["Type"] != "Summary":
            retrieved_texts.append(texts[i])
    context = "\n".join(retrieved_texts)

    # Step 3: Create a structured, instruction-driven prompt
    prompt = f"""
You are a clinical assistant AI. Answer the user's question strictly using the information provided in the context.
Context:
{context}

Question:
{question}

Instructions:
- Respond only with the relevant values asked in the question
- Do not restate the full context
- Do not include unrelated medical details that are not mentioned in the question
- Only use facts present in the context
- Do not guess or hallucinate any values that is not clearly stated
- Do not repeat the same values
- Be concise and accurate
- Only include relevant data that is mentioned in the question do not mention extra data that was not asked
 
Answer:"""
    
    print("\n🧾 PROMPT SENT TO LLM:")
    print(prompt)
    
    # Step 4: Generate the answer
    result = rag_pipeline(prompt, max_new_tokens=80, do_sample=False)[0]["generated_text"]
    
    # Step 5: Extract only the answer portion
    if "Answer:" in result:
        answer_part = result.split("Answer:")[1].strip()
        answer = answer_part.split("Question:")[0].strip()
    else:
        answer = result.strip() 
    return answer

In [26]:
 #  EXAMPLE QUERY 
print(query_rag("What is the patient's name?"))

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



🧾 PROMPT SENT TO LLM:

You are a clinical assistant AI. Answer the user's question strictly using the information provided in the context.
Context:
Patient Info: RegistrationDate: 30/10/2017, FirstName: Yusuf, MiddleName: Abdullah, LastName: Mubarak, Gender: 1, DateofBirth: 30/10/1963 0:00, NationalityID: SAU, FirstVisit: 30/10/2017 9:42, LastVisit: 1/8/2019 13:05, NoOfVisit: 189, MobileNumber: 555333541, EmailAddress: yusuf@mail.com, BloodGroup: 4, RHFactor: 1, RegisteredDoctor: 152141, EmergencyContactName: AHMAD, EmergencyContactNo: 555333542
Vital Signs: PatientID: 2677554, WeightKg: 103.0, HeightCm: 176.0, PulseBeatPerMinute: 85, RespirationBeatPerMinute: 18, BloodPressureLower: 103, BloodPressureHigher: 187, SAO2: 98, CreatedOn: 2018-05-16 14:09:00
Patient Summary Section: He has a future appointment (Appointment No. 17107657) scheduled for June 24, 2025, from 1:00 PM to 1:15 PM with Doctor ID 149425 at Clinic ID 50.

Question:
What is the patient's name?

Instructions:
- Respon