In [1]:
"""
NOTEBOOK 07: INTERACTIVE PATIENT Q&A DEMO
Test the complete RAG system with an interactive chatbot interface
"""

# Download required libraries
!pip install -q chromadb

# Import libraries
import os
import json
from datetime import datetime
from sentence_transformers import SentenceTransformer
import chromadb
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Mount Drive
from google.colab import drive
drive.mount('/content/drive')

# Paths
PROJECT_ROOT = "/content/drive/MyDrive/Colab_Notebooks/LLMs/clinical_notes_qa_project"
VECTOR_STORE_DIR = f"{PROJECT_ROOT}/03_knowledge_base/outputs/vector_store"
OUTPUT_DIR = f"{PROJECT_ROOT}/07_inference_demo/outputs"
CONVERSATION_LOGS_DIR = f"{OUTPUT_DIR}/conversation_logs"

os.makedirs(CONVERSATION_LOGS_DIR, exist_ok=True)

# Load patient metadata
with open(f"{PROJECT_ROOT}/01_data_generation/outputs/patient_metadata.json", 'r') as f:
    metadata = json.load(f)

patients = metadata['patients']

print("✅ Setup complete")
print(f"📊 Available patients: {len(patients)}")
print(f"📂 Conversation logs will be saved to: {CONVERSATION_LOGS_DIR}")

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.0/52.0 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.1/21.1 MB[0m [31m48.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m278.2/278.2 kB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m42.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.4/17.4 MB[0m [31m41.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.5/72.5 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m132.6/132.6 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.4/66.4 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━



Mounted at /content/drive
✅ Setup complete
📊 Available patients: 10
📂 Conversation logs will be saved to: /content/drive/MyDrive/Colab_Notebooks/LLMs/clinical_notes_qa_project/07_inference_demo/outputs/conversation_logs


In [2]:
print("📥 Loading models (this takes 2-3 minutes)...\n")

# 1. Load embedding model
print("1/3 Loading embedding model...")
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
embedding_model = SentenceTransformer(MODEL_NAME)
print("✅ Embedding model loaded")

# 2. Load vector store
print("2/3 Loading vector store...")
client = chromadb.PersistentClient(path=VECTOR_STORE_DIR)
collection = client.get_collection("clinical_notes")
print(f"✅ Vector store loaded ({collection.count()} chunks)")

# 3. Load LLM
print("3/3 Loading TinyLlama (this is the slow part)...")
llm_model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
llm_model = AutoModelForCausalLM.from_pretrained(
    llm_model_name,
    torch_dtype=torch.float32,
    low_cpu_mem_usage=True,
    device_map="cpu"
)
print("✅ LLM loaded")

print("\n" + "="*70)
print("🎉 ALL MODELS LOADED - SYSTEM READY!")
print("="*70)

📥 Loading models (this takes 2-3 minutes)...

1/3 Loading embedding model...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

✅ Embedding model loaded
2/3 Loading vector store...
✅ Vector store loaded (288 chunks)
3/3 Loading TinyLlama (this is the slow part)...


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

✅ LLM loaded

🎉 ALL MODELS LOADED - SYSTEM READY!


In [3]:
def rag_qa_pipeline(query, patient_id, top_k=3, similarity_threshold=0.0, verbose=False):
    """
    Complete RAG pipeline: Retrieve → Generate → Cite
    """

    # Step 1: Retrieve relevant chunks
    query_embedding = embedding_model.encode(query)

    results = collection.query(
        query_embeddings=[query_embedding.tolist()],
        n_results=top_k,
        where={"patient_id": patient_id}
    )

    # Step 2: Filter by similarity threshold
    retrieved_chunks = []
    for doc, metadata, distance in zip(
        results['documents'][0],
        results['metadatas'][0],
        results['distances'][0]
    ):
        similarity = 1 - distance
        if similarity >= similarity_threshold:
            retrieved_chunks.append({
                'text': doc,
                'visit_date': metadata['visit_date'],
                'section': metadata['section'],
                'similarity': round(similarity, 3)
            })

    # Step 3: Check if we have relevant information
    if not retrieved_chunks:
        return {
            'answer': "I don't have enough information in your clinical notes to answer this question.",
            'chunks_used': 0,
            'citations': [],
            'chunks': []
        }

    # Step 4: Create prompt
    context_parts = []
    for i, chunk in enumerate(retrieved_chunks, 1):
        context_parts.append(
            f"Note {i} - {chunk['visit_date']} ({chunk['section']}):\n{chunk['text'][:400]}"
        )

    context = "\n\n".join(context_parts)

    simple_prompt = f"""You are helping a patient understand their medical records.

CLINICAL NOTES:
{context}

PATIENT QUESTION: {query}

INSTRUCTIONS:
1. Answer the question in a clear, conversational way
2. Combine information from multiple visits if needed
3. Always cite the visit date like this: [Visit: 2024-05-01]
4. Explain what the medications/results mean if relevant
5. Do NOT just copy the notes - explain them

Your answer:"""

    try:
        # Format for TinyLlama
        formatted_prompt = f"<|user|>\n{simple_prompt}</s>\n<|assistant|>\n"

        # Tokenize
        inputs = tokenizer(formatted_prompt, return_tensors="pt", max_length=1500, truncation=True)

        if verbose:
            print("⏳ Generating answer (30-60 seconds on CPU)...")

        # Generate
        with torch.no_grad():
            outputs = llm_model.generate(
                **inputs,
                max_new_tokens=300,
                do_sample=True,
                temperature=0.7,
                top_p=0.9,
                repetition_penalty=1.2,
                pad_token_id=tokenizer.eos_token_id
            )

        # Decode
        answer = tokenizer.decode(
            outputs[0][inputs['input_ids'].shape[1]:],
            skip_special_tokens=True
        ).strip()

        # Clean up
        answer = answer.split("PATIENT QUESTION:")[0]
        answer = answer.split("CLINICAL NOTES:")[0]
        answer = answer.strip()

    except Exception as e:
        answer = f"Error generating answer: {str(e)}"

    # Step 5: Extract citations
    citations = []
    for chunk in retrieved_chunks:
        citation_str = f"[Visit: {chunk['visit_date']}"
        if citation_str in answer:
            citations.append(f"[Visit: {chunk['visit_date']}, Section: {chunk['section']}]")

    return {
        'answer': answer,
        'chunks_used': len(retrieved_chunks),
        'citations': citations,
        'chunks': retrieved_chunks
    }

print("✅ RAG pipeline ready")

✅ RAG pipeline ready


In [4]:
# Show all available patients
print("="*70)
print("AVAILABLE PATIENTS")
print("="*70)

for i, patient in enumerate(patients, 1):
    print(f"\n{i}. Patient ID: {patient['patient_id']}")
    print(f"   Age: {patient['age']}, Gender: {patient['gender']}")
    print(f"   Conditions: {', '.join(patient['conditions'])}")
    print(f"   Visits: {len(patient['visit_dates'])} appointments")
    print(f"   Visit dates: {', '.join(patient['visit_dates'][:3])}...")

print("\n" + "="*70)

AVAILABLE PATIENTS

1. Patient ID: patient_001
   Age: 69, Gender: Male
   Conditions: Type 2 Diabetes, Hyperlipidemia, Asthma
   Visits: 4 appointments
   Visit dates: 2024-05-01, 2024-06-08, 2024-09-02...

2. Patient ID: patient_002
   Age: 73, Gender: Male
   Conditions: Chronic Kidney Disease, Type 2 Diabetes, Hyperlipidemia
   Visits: 3 appointments
   Visit dates: 2024-08-28, 2024-09-12, 2025-06-18...

3. Patient ID: patient_003
   Age: 62, Gender: Female
   Conditions: Hypertension, Chronic Kidney Disease, Type 2 Diabetes
   Visits: 4 appointments
   Visit dates: 2024-01-24, 2024-06-29, 2024-10-28...

4. Patient ID: patient_004
   Age: 57, Gender: Female
   Conditions: Hypertension, Hyperlipidemia
   Visits: 4 appointments
   Visit dates: 2024-04-21, 2024-04-26, 2024-05-01...

5. Patient ID: patient_005
   Age: 70, Gender: Female
   Conditions: Asthma, Type 2 Diabetes, Hyperlipidemia
   Visits: 4 appointments
   Visit dates: 2024-04-07, 2024-05-24, 2025-02-08...

6. Patient ID: 

In [5]:
# Install Gradio for web interface
!pip install -q gradio

import gradio as gr

print("✅ Gradio installed")

✅ Gradio installed


In [6]:
# Create single-patient chatbot with file upload
import tempfile
import shutil

def process_clinical_notes(notes_text, progress=gr.Progress()):
    """
    Process uploaded clinical notes for a single patient
    """
    if not notes_text or not notes_text.strip():
        return "⚠️ Please paste your clinical notes", None, gr.update(visible=False)

    progress(0, desc="Starting processing...")

    try:
        # Step 1: De-identify the notes
        progress(0.2, desc="De-identifying notes (removing personal info)...")

        # Simple de-identification (you can enhance this)
        import re
        deid_text = notes_text

        # Remove potential names (basic pattern)
        deid_text = re.sub(r'\b[A-Z][a-z]+ [A-Z][a-z]+\b', '[NAME]', deid_text)
        # Remove dates
        deid_text = re.sub(r'\d{4}-\d{2}-\d{2}', '[DATE]', deid_text)
        deid_text = re.sub(r'\d{1,2}/\d{1,2}/\d{2,4}', '[DATE]', deid_text)
        # Remove potential IDs
        deid_text = re.sub(r'\b(ID|MRN|SSN):\s*\S+', r'\1: [REDACTED]', deid_text)

        progress(0.4, desc="Chunking notes into sections...")

        # Step 2: Chunk the text
        def simple_chunk(text, chunk_size=500):
            """Simple chunking by paragraphs and size"""
            paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]
            chunks = []
            current_chunk = ""

            for para in paragraphs:
                if len(current_chunk) + len(para) < chunk_size:
                    current_chunk += para + "\n\n"
                else:
                    if current_chunk:
                        chunks.append(current_chunk.strip())
                    current_chunk = para + "\n\n"

            if current_chunk:
                chunks.append(current_chunk.strip())

            return chunks if chunks else [text]

        chunks = simple_chunk(deid_text)

        progress(0.6, desc=f"Generating embeddings for {len(chunks)} chunks...")

        # Step 3: Generate embeddings
        chunk_embeddings = embedding_model.encode(chunks, show_progress_bar=False)

        progress(0.8, desc="Indexing in vector store...")

        # Step 4: Create temporary collection for this user
        user_collection_name = f"user_session_{datetime.now().strftime('%Y%m%d%H%M%S')}"

        # Create new collection
        user_collection = client.create_collection(
            name=user_collection_name,
            metadata={"type": "user_upload"}
        )

        # Add chunks to collection
        user_collection.add(
            ids=[f"chunk_{i}" for i in range(len(chunks))],
            embeddings=chunk_embeddings.tolist(),
            documents=chunks,
            metadatas=[{"chunk_index": i, "source": "user_upload"} for i in range(len(chunks))]
        )

        progress(1.0, desc="✅ Processing complete!")

        status_msg = f"""
        ✅ **Processing Complete!**

        📊 **Summary:**
        - Original notes: {len(notes_text)} characters
        - De-identified: {notes_text.count('[NAME]') + notes_text.count('[DATE]')} items redacted
        - Created: {len(chunks)} searchable chunks
        - Embeddings: Generated and indexed

        🎯 **You can now ask questions about your clinical notes!**

        Try questions like:
        - What medications am I taking?
        - What are my lab results?
        - What did the doctor recommend?
        """

        return status_msg, user_collection_name, gr.update(visible=True)

    except Exception as e:
        return f"❌ Error processing notes: {str(e)}", None, gr.update(visible=False)

def extract_direct_answer(question, chunks):
    """Rule-based extraction for common questions (no LLM hallucination)"""
    question_lower = question.lower()
    full_text = "\n".join([chunk['text'] for chunk in chunks])

    # Pattern 1: Medications
    if any(word in question_lower for word in ['medication', 'medicine', 'drug', 'prescription', 'taking', 'pills']):
        meds = []
        for line in full_text.split('\n'):
            if '•' in line and ('mg' in line.lower() or 'daily' in line.lower()):
                clean_line = line.strip().replace('•', '').strip()
                if clean_line and len(clean_line) < 100:
                    meds.append(clean_line)

        if meds:
            unique_meds = list(dict.fromkeys(meds))
            answer = "**Based on your clinical notes, you are currently taking:**\n\n"
            for med in unique_meds[:10]:
                answer += f"• {med}\n"
            return answer

    # Pattern 2: Lab results
    if any(word in question_lower for word in ['lab', 'result', 'test', 'hba1c', 'glucose', 'cholesterol']):
        labs = []
        for line in full_text.split('\n'):
            if any(marker in line for marker in ['%', 'mg/dL', 'ABNORMAL', 'NORMAL', 'IMPROVED']):
                clean_line = line.strip().replace('•', '').strip()
                if clean_line and 5 < len(clean_line) < 150:
                    labs.append(clean_line)

        if labs:
            unique_labs = list(dict.fromkeys(labs))
            answer = "**Based on your clinical notes, here are your lab results:**\n\n"
            for lab in unique_labs[:15]:
                answer += f"• {lab}\n"
            return answer

    # Pattern 3: Diagnosis
    if any(word in question_lower for word in ['diagnosis', 'condition', 'disease', 'problem', 'treating']):
        conditions = []
        for line in full_text.split('\n'):
            if any(keyword in line.lower() for keyword in ['diabetes', 'hypertension', 'assessment', 'diagnosis']):
                clean_line = line.strip().replace('•', '').replace('1.', '').replace('2.', '').replace('3.', '').strip()
                if clean_line and 10 < len(clean_line) < 200 and ':' not in clean_line[:5]:
                    conditions.append(clean_line)

        if conditions:
            unique_conditions = list(dict.fromkeys(conditions))
            answer = "**Based on your clinical notes, you are being treated for:**\n\n"
            for condition in unique_conditions[:5]:
                answer += f"• {condition}\n"
            return answer

    # Pattern 4: NOT in notes (food, activities, etc.)
    if any(word in question_lower for word in ['food', 'eat', 'ate', 'meal', 'breakfast', 'lunch', 'dinner']):
        return "❌ **This information is not available in your clinical notes.**\n\nYour clinical notes contain medical information (diagnoses, medications, lab results, treatment plans) but not details about daily meals or activities."

    return None  # No pattern matched, use LLM


def answer_question(question, collection_name, chat_history):
    """Answer questions about uploaded clinical notes"""
    if not collection_name:
        chat_history.append([question, "⚠️ Please upload and process your clinical notes first!"])
        return chat_history, ""

    if not question.strip():
        return chat_history, ""

    chat_history.append([question, None])

    try:
        user_collection = client.get_collection(collection_name)
        query_embedding = embedding_model.encode(question)

        results = user_collection.query(
            query_embeddings=[query_embedding.tolist()],
            n_results=5
        )

        retrieved_chunks = []
        for doc, metadata, distance in zip(
            results['documents'][0],
            results['metadatas'][0],
            results['distances'][0]
        ):
            similarity = 1 - distance
            retrieved_chunks.append({
                'text': doc,
                'similarity': round(similarity, 3),
                'chunk_index': metadata.get('chunk_index', 0)
            })

        if not retrieved_chunks:
            answer = "❌ I don't have enough information to answer this question."
        else:
            # TRY PATTERN MATCHING FIRST (no hallucination)
            pattern_answer = extract_direct_answer(question, retrieved_chunks)

            if pattern_answer:
                answer = pattern_answer
                answer += f"\n\n---\n✅ **Direct extraction from {len(retrieved_chunks)} note sections** (No AI interpretation)"
            else:
                # Fall back to LLM (with hallucination risk)
                context = "\n\n".join([chunk['text'][:500] for chunk in retrieved_chunks])

                simple_prompt = f"""Based on these clinical notes, answer the patient's question.

Clinical Notes:
{context}

Question: {question}

Answer:"""

                formatted_prompt = f"<|user|>\n{simple_prompt}</s>\n<|assistant|>\n"
                inputs = tokenizer(formatted_prompt, return_tensors="pt", max_length=2000, truncation=True)

                with torch.no_grad():
                    outputs = llm_model.generate(
                        **inputs,
                        max_new_tokens=200,
                        do_sample=True,
                        temperature=0.2,
                        top_p=0.7,
                        repetition_penalty=1.1,
                        pad_token_id=tokenizer.eos_token_id,
                        eos_token_id=tokenizer.eos_token_id
                    )

                full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
                answer = full_output.split("<|assistant|>")[-1].strip()
                answer = answer.split("Question:")[0].strip()
                answer = answer.split("Clinical Notes:")[0].strip()
                answer = answer.split("<|user|>")[0].strip()

                if len(answer) < 20:
                    answer = "Based on your clinical notes:\n\n" + context[:800]

                answer += f"\n\n---\n🤖 **AI-generated answer from {len(retrieved_chunks)} sections** (May contain interpretation)"

        chat_history[-1][1] = answer

    except Exception as e:
        chat_history[-1][1] = f"❌ Error: {str(e)}"

    return chat_history, ""


def reset_session():
    """Reset the session"""
    return "", "", None, [], gr.update(visible=False)


# Build Gradio interface
with gr.Blocks(title="Personal Clinical Notes Q&A", theme=gr.themes.Soft()) as demo:

    # Hidden state to store collection name
    collection_state = gr.State(None)

    gr.Markdown(
        """
        # 🏥 Personal Clinical Notes Q&A Assistant
        ### Upload your clinical notes and ask questions - 100% Private & Local

        **Privacy First:**
        - ✅ All processing happens locally (no data sent to external servers)
        - ✅ Personal information is automatically de-identified
        - ✅ Your notes are only accessible to you in this session
        - ✅ Data is deleted when you close this page

        ---
        """
    )

    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### 📄 Step 1: Upload Your Notes")

            notes_input = gr.Textbox(
                label="Paste Your Clinical Notes Here",
                placeholder="""Example:

CLINICAL NOTE
Patient: John Smith
Date: 2024-05-15
Age: 58 | Gender: Male

CHIEF COMPLAINT:
Follow-up for Type 2 Diabetes management.

CURRENT MEDICATIONS:
  • Metformin 500mg twice daily
  • Lisinopril 10mg once daily

LAB RESULTS:
  • HbA1c: 8.2% [ABNORMAL]
  • Fasting Glucose: 145 mg/dL [ABNORMAL]

ASSESSMENT:
Type 2 Diabetes - Suboptimal control

PLAN:
  1. Increase Metformin to 1000mg twice daily
  2. Repeat labs in 3 months
  3. Follow-up in 1 month
                """,
                lines=15,
                max_lines=20
            )

            process_btn = gr.Button("🔄 Process Notes", variant="primary", size="lg")

            status_output = gr.Markdown("")

            reset_btn = gr.Button("🗑️ Clear & Start Over", variant="secondary")

        with gr.Column(scale=1):
            gr.Markdown("### 💬 Step 2: Ask Questions")

            chatbot = gr.Chatbot(
                label="Your Medical Q&A",
                height=400,
                show_label=True,
                avatar_images=(None, "🤖"),
                visible=False
            )

            with gr.Row():
                question_input = gr.Textbox(
                    label="Your Question",
                    placeholder="What medications am I taking?",
                    lines=2,
                    scale=4
                )
                submit_btn = gr.Button("Ask 📤", variant="primary", scale=1)

            gr.Markdown(
                """
                ### 💡 Try These Questions:
                - What medications am I taking?
                - What are my recent lab results?
                - Are any of my values abnormal?
                - What did the doctor recommend?
                - What is my diagnosis?

                <div style='text-align: center; color: gray; font-size: 12px; margin-top: 20px;'>
                ⚠️ Answers take 30-60 seconds on CPU
                </div>
                """
            )

    # Event handlers
    process_btn.click(
        fn=process_clinical_notes,
        inputs=[notes_input],
        outputs=[status_output, collection_state, chatbot]
    )

    submit_btn.click(
        fn=answer_question,
        inputs=[question_input, collection_state, chatbot],
        outputs=[chatbot, question_input]
    )

    question_input.submit(
        fn=answer_question,
        inputs=[question_input, collection_state, chatbot],
        outputs=[chatbot, question_input]
    )

    reset_btn.click(
        fn=reset_session,
        inputs=None,
        outputs=[notes_input, status_output, collection_state, chatbot, chatbot]
    )

# Launch
print("🚀 Launching Personal Clinical Notes Q&A...")
demo.launch(share=True, debug=False)

  with gr.Blocks(title="Personal Clinical Notes Q&A", theme=gr.themes.Soft()) as demo:
  chatbot = gr.Chatbot(
  chatbot = gr.Chatbot(


🚀 Launching Personal Clinical Notes Q&A...
Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://c2053d33dd97e55f66.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [7]:
"""
# Test if retrieval is working
def test_retrieval(collection_name):

    if not collection_name:
        print("❌ No collection created yet")
        return

    print("🔍 TESTING RETRIEVAL\n")
    print("="*70)

    # Get collection
    user_collection = client.get_collection(collection_name)

    print(f"Collection: {collection_name}")
    print(f"Total chunks: {user_collection.count()}\n")

    # Test query
    test_question = "What medications am I taking?"
    print(f"Test Question: '{test_question}'\n")

    # Retrieve
    query_embedding = embedding_model.encode(test_question)
    results = user_collection.query(
        query_embeddings=[query_embedding.tolist()],
        n_results=3
    )

    print(f"Results found: {len(results['documents'][0])}\n")

    # Show what was retrieved
    for i, (doc, meta, dist) in enumerate(zip(
        results['documents'][0],
        results['metadatas'][0],
        results['distances'][0]
    ), 1):
        similarity = 1 - dist
        print(f"--- CHUNK {i} (Similarity: {similarity:.3f}) ---")
        print(f"Metadata: {meta}")
        print(f"Text (first 300 chars):\n{doc[:300]}")
        print()

# Run this after processing notes
# Replace 'user_session_...' with your actual collection name from the status message
test_retrieval("user_session_20250119123456")  # ⚠️ UPDATE THIS with your collection name
"""

'\n# Test if retrieval is working\ndef test_retrieval(collection_name):\n\n    if not collection_name:\n        print("❌ No collection created yet")\n        return\n\n    print("🔍 TESTING RETRIEVAL\n")\n    print("="*70)\n\n    # Get collection\n    user_collection = client.get_collection(collection_name)\n\n    print(f"Collection: {collection_name}")\n    print(f"Total chunks: {user_collection.count()}\n")\n\n    # Test query\n    test_question = "What medications am I taking?"\n    print(f"Test Question: \'{test_question}\'\n")\n\n    # Retrieve\n    query_embedding = embedding_model.encode(test_question)\n    results = user_collection.query(\n        query_embeddings=[query_embedding.tolist()],\n        n_results=3\n    )\n\n    print(f"Results found: {len(results[\'documents\'][0])}\n")\n\n    # Show what was retrieved\n    for i, (doc, meta, dist) in enumerate(zip(\n        results[\'documents\'][0],\n        results[\'metadatas\'][0],\n        results[\'distances\'][0]\n    ), 1

In [8]:
"""
# If you want to quickly test without interactive mode
print("🧪 QUICK TEST MODE\n")

test_questions = [
    "What is my primary diagnosis?"
]

test_patient = "patient_001"

print(f"Testing with {test_patient}")
print("="*70)

for i, question in enumerate(test_questions, 1):
    print(f"\n{'='*70}")
    print(f"QUESTION {i}: {question}")
    print("="*70)

    result = rag_qa_pipeline(question, test_patient, top_k=3, verbose=False)

    print(f"\n🤖 ANSWER:")
    print(result['answer'])

    print(f"\n📊 Chunks used: {result['chunks_used']}")

    if result['chunks']:
        print(f"\n📄 Sources:")
        for chunk in result['chunks']:
            print(f"  • {chunk['visit_date']} - {chunk['section']}")

print("\n" + "="*70)
print("✅ Quick test complete!")
"""

'\n# If you want to quickly test without interactive mode\nprint("🧪 QUICK TEST MODE\n")\n\ntest_questions = [\n    "What is my primary diagnosis?"\n]\n\ntest_patient = "patient_001"\n\nprint(f"Testing with {test_patient}")\nprint("="*70)\n\nfor i, question in enumerate(test_questions, 1):\n    print(f"\n{\'=\'*70}")\n    print(f"QUESTION {i}: {question}")\n    print("="*70)\n\n    result = rag_qa_pipeline(question, test_patient, top_k=3, verbose=False)\n\n    print(f"\n🤖 ANSWER:")\n    print(result[\'answer\'])\n\n    print(f"\n📊 Chunks used: {result[\'chunks_used\']}")\n\n    if result[\'chunks\']:\n        print(f"\n📄 Sources:")\n        for chunk in result[\'chunks\']:\n            print(f"  • {chunk[\'visit_date\']} - {chunk[\'section\']}")\n\nprint("\n" + "="*70)\nprint("✅ Quick test complete!")\n'

In [9]:
"""
# View all saved conversations
print("📋 CONVERSATION LOGS\n")

log_files = [f for f in os.listdir(CONVERSATION_LOGS_DIR) if f.endswith('.json')]

if log_files:
    print(f"Found {len(log_files)} conversation log(s):\n")

    for log_file in sorted(log_files):
        log_path = f"{CONVERSATION_LOGS_DIR}/{log_file}"
        with open(log_path, 'r') as f:
            log = json.load(f)

        print(f"{'='*70}")
        print(f"Patient: {log['patient_id']}")
        print(f"Session: {log['session_start']} to {log['session_end']}")
        print(f"Questions asked: {log['total_questions']}")
        print(f"\nQuestions:")
        for i, conv in enumerate(log['conversation'], 1):
            print(f"  {i}. {conv['question']}")
        print()
else:
    print("No conversation logs found yet.")
    print("Run Cell 6 to start a Q&A session!")
"""

'\n# View all saved conversations\nprint("📋 CONVERSATION LOGS\n")\n\nlog_files = [f for f in os.listdir(CONVERSATION_LOGS_DIR) if f.endswith(\'.json\')]\n\nif log_files:\n    print(f"Found {len(log_files)} conversation log(s):\n")\n\n    for log_file in sorted(log_files):\n        log_path = f"{CONVERSATION_LOGS_DIR}/{log_file}"\n        with open(log_path, \'r\') as f:\n            log = json.load(f)\n\n        print(f"{\'=\'*70}")\n        print(f"Patient: {log[\'patient_id\']}")\n        print(f"Session: {log[\'session_start\']} to {log[\'session_end\']}")\n        print(f"Questions asked: {log[\'total_questions\']}")\n        print(f"\nQuestions:")\n        for i, conv in enumerate(log[\'conversation\'], 1):\n            print(f"  {i}. {conv[\'question\']}")\n        print()\nelse:\n    print("No conversation logs found yet.")\n    print("Run Cell 6 to start a Q&A session!")\n'

In [10]:
print("\n" + "="*70)
print("✅ NOTEBOOK 07 COMPLETE - DEMO READY!")
print("="*70)
print(f"""
🎉 Interactive Q&A System is Live!

📊 System Status:
  • Patients loaded: {len(patients)}
  • Vector store: {collection.count()} chunks
  • Models loaded: ✅ All ready

🚀 How to Use:
  1. Run Cell 6 to start interactive session
  2. Select a patient (e.g., 'patient_001')
  3. Ask questions in natural language
  4. Type 'quit' to exit and save conversation

💡 Try These Questions:
  • What medications am I taking?
  • What are my lab results?
  • What is my diagnosis?
  • What did the doctor recommend?
  • Are any of my values abnormal?

📂 Outputs:
  • Conversation logs: {CONVERSATION_LOGS_DIR}

⚡ Performance:
  • Answer time: ~30-60 seconds (CPU)
  • Quality: Good for demo (TinyLlama 1.1B)
  • Privacy: 100% local (no external APIs)
""")


✅ NOTEBOOK 07 COMPLETE - DEMO READY!

🎉 Interactive Q&A System is Live!

📊 System Status:
  • Patients loaded: 10
  • Vector store: 288 chunks
  • Models loaded: ✅ All ready

🚀 How to Use:
  1. Run Cell 6 to start interactive session
  2. Select a patient (e.g., 'patient_001')
  3. Ask questions in natural language
  4. Type 'quit' to exit and save conversation

💡 Try These Questions:
  • What medications am I taking?
  • What are my lab results?
  • What is my diagnosis?
  • What did the doctor recommend?
  • Are any of my values abnormal?

📂 Outputs:
  • Conversation logs: /content/drive/MyDrive/Colab_Notebooks/LLMs/clinical_notes_qa_project/07_inference_demo/outputs/conversation_logs

⚡ Performance:
  • Answer time: ~30-60 seconds (CPU)
  • Quality: Good for demo (TinyLlama 1.1B)
  • Privacy: 100% local (no external APIs)

