In [1]:
# Import necessary libraries
import os
import google.generativeai as genai
import pinecone
from pinecone import Pinecone, ServerlessSpec
import pandas as pd
import numpy as np
from typing import List, Dict, Any, Optional, Tuple, Union
import json
from dotenv import load_dotenv
import time
import re
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain.schema import Document
from sentence_transformers import SentenceTransformer
from dataclasses import dataclass, field
from enum import Enum
import logging
from abc import ABC, abstractmethod
import asyncio
from collections import defaultdict
import math
from datasets import load_dataset
import random

# Load environment variables
load_dotenv()

print("Libraries imported successfully for Healthcare QA Bot with Self-Ask pattern!")

Libraries imported successfully for Healthcare QA Bot with Self-Ask pattern!


In [2]:
# Healthcare Bot Configuration
class HealthcareConfig:
    def __init__(self):
        # API Keys
        self.gemini_api_key = os.getenv('GEMINI_API_KEY')
        self.pinecone_api_key = os.getenv('PINECONE_API_KEY')
        
        # Model configurations
        self.embedding_model = "models/embedding-001"
        self.chat_model = "gemini-1.5-flash"
        self.max_tokens = 200
        self.temperature = 0.2
        
        # Alternative: Use sentence-transformers for embeddings
        self.use_sentence_transformers = False
        self.sentence_transformer_model = "all-MiniLM-L6-v2"
        
        # Pinecone configurations
        self.index_name = "healthcare-qa-bot"
        
        # Set dimensions based on embedding method
        if self.use_sentence_transformers:
            self.dimension = 384
        else:
            self.dimension = 768
            
        self.metric = "cosine"
        
        # Document processing
        self.chunk_size = 800
        self.chunk_overlap = 150
        self.top_k_results = 7
        
        # Self-Ask specific settings
        self.max_iterations = 3
        self.confidence_threshold = 0.7
        
        # Validate API keys
        if not self.gemini_api_key:
            print("⚠️  Warning: GEMINI_API_KEY not found")
        if not self.pinecone_api_key:
            print("⚠️  Warning: PINECONE_API_KEY not found")
        
        # Configure Gemini API
        if self.gemini_api_key:
            genai.configure(api_key=self.gemini_api_key)
        
        print(f"🏥 Healthcare Bot Configuration loaded")
        print(f"📏 Vector dimension: {self.dimension}")

config = HealthcareConfig()
print("✅ Healthcare configuration ready!")

🏥 Healthcare Bot Configuration loaded
📏 Vector dimension: 768
✅ Healthcare configuration ready!


In [3]:
# Healthcare Dataset Loader
class HealthcareDatasetLoader:
    def __init__(self):
        self.datasets = []
        
    def load_healthcare_datasets(self):
        """Load healthcare datasets from Hugging Face"""
        print("📊 Loading healthcare datasets from Hugging Face...")
        
        try:
            # Load medical Q&A dataset
            print("Loading medical Q&A dataset...")
            medical_qa = load_dataset("medical_dialog", split="train[:10000]")  # Load first 1000 for demo
            
            # Process medical Q&A data
            for item in medical_qa:
                if 'utterances' in item:
                    for utterance in item['utterances']:
                        if utterance.get('speaker') == 'doctor':
                            content = f"Medical Q&A: {utterance.get('utterance', '')}"
                            self.datasets.append({
                                "content": content,
                                "source": "medical_dialog_hf",
                                "type": "medical_qa"
                            })
            
        except Exception as e:
            print(f"Note: Could not load medical_dialog dataset: {e}")
        
        # Add general healthcare knowledge base
        healthcare_knowledge = [
            {
                "content": """
                Common Symptoms and Conditions:
                
                Fever: Body temperature above 100.4°F (38°C). Common causes include infections, 
                inflammatory conditions, and certain medications. Seek medical attention if fever 
                persists over 3 days or exceeds 103°F.
                
                Headaches: Can be tension-type, migraine, or cluster headaches. Tension headaches 
                are most common. Seek immediate care for sudden severe headache, headache with 
                neck stiffness, or headache after head injury.
                
                Chest Pain: Can range from minor muscle strain to serious heart conditions. 
                Seek immediate medical attention for crushing chest pain, pain radiating to arm/jaw, 
                or chest pain with shortness of breath.
                """,
                "source": "general_healthcare_kb",
                "type": "symptoms"
            },
            {
                "content": """
                Preventive Healthcare Guidelines:
                
                Regular Check-ups: Annual physical exams for adults, more frequent for chronic conditions.
                Recommended screenings include blood pressure, cholesterol, diabetes, and cancer screenings.
                
                Vaccinations: Stay up-to-date with vaccines including flu, COVID-19, and others as 
                recommended by your healthcare provider.
                
                Healthy Lifestyle: Balanced diet, regular exercise (150 minutes moderate activity per week), 
                adequate sleep (7-9 hours), stress management, and avoiding tobacco and excessive alcohol.
                
                Mental Health: Regular mental health check-ins, stress management techniques, 
                and seeking help when needed.
                """,
                "source": "preventive_care_kb",
                "type": "prevention"
            },
            {
                "content": """
                Emergency Situations - When to Seek Immediate Care:
                
                Call 911 for: Chest pain, difficulty breathing, severe bleeding, loss of consciousness, 
                severe burns, suspected stroke (FAST: Face drooping, Arm weakness, Speech difficulty, Time).
                
                Go to ER for: High fever with severe symptoms, severe abdominal pain, severe headache 
                with vision changes, signs of severe allergic reaction.
                
                Urgent Care for: Minor cuts requiring stitches, sprains, minor burns, UTI symptoms, 
                minor infections.
                
                Primary Care for: Routine check-ups, medication management, chronic disease management, 
                preventive care.
                """,
                "source": "emergency_care_kb",
                "type": "emergency"
            },
            {
                "content": """
                Medication Safety and Management:
                
                Taking Medications: Follow prescribed dosages, take at recommended times, 
                complete full course of antibiotics, store properly.
                
                Drug Interactions: Inform healthcare providers of all medications including 
                over-the-counter drugs and supplements. Use one pharmacy when possible.
                
                Side Effects: Know common side effects, report unusual symptoms, never stop 
                medications suddenly without consulting healthcare provider.
                
                Generic vs Brand: Generic medications contain same active ingredients as brand names 
                and are equally effective but typically less expensive.
                """,
                "source": "medication_safety_kb",
                "type": "medications"
            },
            {
                "content": """
                Mental Health and Wellness:
                
                Signs of Depression: Persistent sadness, loss of interest, changes in appetite/sleep, 
                fatigue, difficulty concentrating, feelings of worthlessness.
                
                Anxiety Management: Deep breathing exercises, regular exercise, adequate sleep, 
                limiting caffeine, talking to trusted friends/family.
                
                Stress Reduction: Time management, relaxation techniques, hobbies, social support, 
                professional counseling when needed.
                
                When to Seek Help: Persistent symptoms affecting daily life, thoughts of self-harm, 
                substance abuse, relationship problems.
                """,
                "source": "mental_health_kb",
                "type": "mental_health"
            }
        ]
        
        # Add healthcare knowledge to datasets
        self.datasets.extend(healthcare_knowledge)
        
        print(f"✅ Loaded {len(self.datasets)} healthcare documents")
        return self.datasets

# Initialize dataset loader
dataset_loader = HealthcareDatasetLoader()
healthcare_data = dataset_loader.load_healthcare_datasets()

📊 Loading healthcare datasets from Hugging Face...
Loading medical Q&A dataset...


README.md: 0.00B [00:00, ?B/s]

medical_dialog.py: 0.00B [00:00, ?B/s]

Note: Could not load medical_dialog dataset: The repository for medical_dialog contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/medical_dialog.
Please pass the argument `trust_remote_code=True` to allow custom code to be run.
✅ Loaded 5 healthcare documents


In [4]:
# Healthcare Pinecone Manager
class HealthcarePineconeManager:
    def __init__(self, config):
        self.config = config
        self.pc = None
        self.index = None
        
    def initialize_pinecone(self):
        """Initialize Pinecone for healthcare data"""
        try:
            self.pc = Pinecone(api_key=self.config.pinecone_api_key)
            
            existing_indexes = [index.name for index in self.pc.list_indexes()]
            
            if self.config.index_name not in existing_indexes:
                print(f"🏥 Creating healthcare index: {self.config.index_name}")
                self.pc.create_index(
                    name=self.config.index_name,
                    dimension=self.config.dimension,
                    metric=self.config.metric,
                    spec=ServerlessSpec(
                        cloud="aws",
                        region="us-east-1"
                    )
                )
                time.sleep(10)
            else:
                print(f"Healthcare index {self.config.index_name} already exists")
            
            self.index = self.pc.Index(self.config.index_name)
            print(f"✅ Connected to healthcare Pinecone index")
            
            stats = self.index.describe_index_stats()
            print(f"Index stats: {stats}")
            
        except Exception as e:
            print(f"❌ Error initializing Pinecone: {str(e)}")
            raise

# Initialize healthcare Pinecone manager
pinecone_manager = HealthcarePineconeManager(config)

if config.pinecone_api_key:
    pinecone_manager.initialize_pinecone()
else:
    print("⚠️  Skipping Pinecone initialization - API key not found")

🏥 Creating healthcare index: healthcare-qa-bot
✅ Connected to healthcare Pinecone index
Index stats: {'dimension': 768,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {},
 'total_vector_count': 0,
 'vector_type': 'dense'}


In [5]:
# Healthcare Document Processor
class HealthcareDocumentProcessor:
    def __init__(self, config):
        self.config = config
        self.text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=config.chunk_size,
            chunk_overlap=config.chunk_overlap,
            length_function=len,
            separators=["\n\n", "\n", ". ", " ", ""]
        )
        
    def process_healthcare_data(self, healthcare_data: List[Dict]) -> List[Document]:
        """Process healthcare data into chunks"""
        documents = []
        
        for item in healthcare_data:
            content = item.get('content', '')
            source = item.get('source', 'unknown')
            doc_type = item.get('type', 'general')
            
            if content.strip():
                chunks = self.text_splitter.split_text(content)
                
                for i, chunk in enumerate(chunks):
                    doc = Document(
                        page_content=chunk,
                        metadata={
                            "source": source,
                            "type": doc_type,
                            "chunk_id": i,
                            "total_chunks": len(chunks)
                        }
                    )
                    documents.append(doc)
        
        return documents

# Healthcare Embedding Manager
class HealthcareEmbeddingManager:
    def __init__(self, config, pinecone_manager):
        self.config = config
        self.pinecone_manager = pinecone_manager
        
        if config.use_sentence_transformers:
            self.embedding_model = SentenceTransformer(config.sentence_transformer_model)
        else:
            self.embedding_model = GoogleGenerativeAIEmbeddings(
                model=config.embedding_model,
                google_api_key=config.gemini_api_key
            )
    
    def generate_embeddings(self, texts: List[str]) -> List[List[float]]:
        """Generate embeddings for healthcare texts"""
        if self.config.use_sentence_transformers:
            return self.embedding_model.encode(texts).tolist()
        else:
            return self.embedding_model.embed_documents(texts)
    
    def store_healthcare_documents(self, documents: List[Document]):
        """Store healthcare documents in Pinecone"""
        print("🏥 Storing healthcare documents...")
        
        texts = [doc.page_content for doc in documents]
        embeddings = self.generate_embeddings(texts)
        
        vectors = []
        for i, (doc, embedding) in enumerate(zip(documents, embeddings)):
            vector_id = f"healthcare_doc_{i}"
            metadata = {
                "text": doc.page_content,
                "source": doc.metadata.get("source", "unknown"),
                "type": doc.metadata.get("type", "general")
            }
            vectors.append((vector_id, embedding, metadata))
        
        # Store in batches
        batch_size = 100
        for i in range(0, len(vectors), batch_size):
            batch = vectors[i:i + batch_size]
            self.pinecone_manager.index.upsert(vectors=batch)
        
        print(f"✅ Stored {len(vectors)} healthcare document vectors")
    
    def search_healthcare_knowledge(self, query: str, top_k: int = None) -> List[Dict]:
        """Search healthcare knowledge base"""
        if top_k is None:
            top_k = self.config.top_k_results
        
        query_embedding = self.generate_embeddings([query])[0]
        
        results = self.pinecone_manager.index.query(
            vector=query_embedding,
            top_k=top_k,
            include_metadata=True
        )
        
        documents = []
        for match in results['matches']:
            documents.append({
                'text': match['metadata']['text'],
                'source': match['metadata']['source'],
                'type': match['metadata']['type'],
                'score': match['score']
            })
        
        return documents

# Initialize healthcare processors
doc_processor = HealthcareDocumentProcessor(config)
embedding_manager = HealthcareEmbeddingManager(config, pinecone_manager)

print("🏥 Healthcare document processing ready!")

🏥 Healthcare document processing ready!


In [6]:
# Process and Store Healthcare Data
if config.pinecone_api_key and (config.gemini_api_key or config.use_sentence_transformers):
    print("🏥 Processing healthcare documents...")
    
    # Process healthcare data into documents
    healthcare_documents = doc_processor.process_healthcare_data(healthcare_data)
    print(f"📄 Created {len(healthcare_documents)} healthcare document chunks")
    
    # Check if index is empty and store documents
    stats = pinecone_manager.index.describe_index_stats()
    
    if stats['total_vector_count'] == 0:
        print("📊 Index is empty, storing healthcare documents...")
        embedding_manager.store_healthcare_documents(healthcare_documents)
    else:
        print(f"📊 Index already contains {stats['total_vector_count']} vectors")
    
    print("✅ Healthcare knowledge base ready!")
else:
    print("⚠️  Skipping document storage - API keys not configured")

🏥 Processing healthcare documents...
📄 Created 9 healthcare document chunks
📊 Index is empty, storing healthcare documents...
🏥 Storing healthcare documents...
✅ Stored 9 healthcare document vectors
✅ Healthcare knowledge base ready!


In [None]:
class GreetingHandler:
    def __init__(self):
        # Use Gemini LLM for intent detection and response
        self.llm = None
        if config.gemini_api_key:
            self.llm = ChatGoogleGenerativeAI(
                model=config.chat_model,
                google_api_key=config.gemini_api_key,
                temperature=config.temperature,
                max_tokens=config.max_tokens
            )

    def detect_intent(self, text: str) -> str:
        """Use Gemini to classify the intent of the input text."""
        if not self.llm:
            return "unknown"
        prompt = f"""
        Classify the following user message into one of these categories: greeting, farewell, thank_you, general_health_question, or other.
        Message: "{text}"
        Respond with only the category name.
        """
        try:
            response = self.llm.invoke(prompt)
            intent = response.content.strip().lower()
            return intent
        except Exception as e:
            print(f"Intent detection failed: {e}")
            return "unknown"

    def generate_greeting_response(self, text: str) -> str:
        """Generate an appropriate conversational response using Gemini."""
        if not self.llm:
            return None
        intent = self.detect_intent(text)
        prompt = f"""
        You are a helpful healthcare assistant. Respond to the following user message appropriately, considering its intent: {intent}.
        Message: "{text}"
        If it's a greeting, introduce yourself as a healthcare assistant.
        If it's a farewell, wish the user well and remind them to consult professionals for health concerns.
        If it's a thank you, acknowledge and remind about educational purpose.
        If it's a general health question about you, clarify you are an AI and offer help.
        Otherwise, return "None".
        """
        try:
            response = self.llm.invoke(prompt)
            reply = response.content.strip()
            if reply.lower() == "none":
                return None
            return reply
        except Exception as e:
            print(f"Greeting response failed: {e}")
            return None

    def is_conversational(self, text: str) -> bool:
        """Use Gemini to determine if the text is conversational."""
        intent = self.detect_intent(text)
        return intent in ["greeting", "farewell", "thank_you", "general_health_question"]

# Initialize greeting handler
greeting_handler = GreetingHandler()
print("💬 Greeting handler (Gemini-powered) ready!")


💬 Greeting handler ready!


In [8]:
# Self-Ask with Search Implementation
class SelfAskSearchSystem:
    def __init__(self, config, embedding_manager):
        self.config = config
        self.embedding_manager = embedding_manager
        
        if config.gemini_api_key:
            self.llm = ChatGoogleGenerativeAI(
                model=config.chat_model,
                google_api_key=config.gemini_api_key,
                temperature=config.temperature,
                max_tokens=config.max_tokens
            )
        else:
            self.llm = None
    
    def decompose_question(self, question: str) -> List[str]:
        """Decompose complex question into sub-questions"""
        if not self.llm:
            return [question]
        
        decomposition_prompt = f"""
        You are a healthcare assistant. Analyze this health question and break it down into simpler sub-questions if needed.
        
        Original question: {question}
        
        If this is a simple question, respond with just: ["{question}"]
        If this is complex, break it into 2-3 simpler sub-questions as a JSON list.
        
        Examples:
        - "What is diabetes?" → ["{question}"]
        - "What are the symptoms and treatment options for diabetes?" → ["What are the symptoms of diabetes?", "What are the treatment options for diabetes?"]
        
        Respond only with a JSON list of sub-questions:
        """
        
        try:
            response = self.llm.invoke(decomposition_prompt)
            response_text = response.content if hasattr(response, 'content') else str(response)
            
            # Extract JSON from response
            import json
            # Try to find JSON array in response
            start = response_text.find('[')
            end = response_text.rfind(']') + 1
            if start != -1 and end != 0:
                json_str = response_text[start:end]
                sub_questions = json.loads(json_str)
                return sub_questions
            else:
                return [question]
        except Exception as e:
            print(f"Question decomposition failed: {e}")
            return [question]
    
    def search_and_answer(self, question: str) -> Dict[str, Any]:
        """Search for information and provide answer using Self-Ask pattern"""
        print(f"🔍 Self-Ask: {question}")
        
        # Search for relevant information
        search_results = self.embedding_manager.search_healthcare_knowledge(question)
        
        if not search_results:
            return {
                "question": question,
                "answer": "I don't have specific information about this topic in my healthcare knowledge base.",
                "confidence": 0.0,
                "sources": []
            }
        
        # Prepare context from search results
        context_parts = []
        sources = []
        
        for result in search_results[:5]:  # Top 5 results
            context_parts.append(f"Source ({result['type']}): {result['text']}")
            sources.append(f"{result['source']} (relevance: {result['score']:.3f})")
        
        context = "\n\n".join(context_parts)
        
        # Generate answer using LLM
        if not self.llm:
            return {
                "question": question,
                "answer": "LLM not configured",
                "confidence": 0.0,
                "sources": sources
            }
        
        answer_prompt = f"""
        You are a knowledgeable healthcare assistant. Answer the following health question using the provided medical context.
        
        IMPORTANT GUIDELINES:
        - Provide accurate, helpful information based on the context
        - Always include a disclaimer that this is for educational purposes
        - Recommend consulting healthcare professionals for medical advice
        - If the context doesn't contain relevant information, say so
        - Be empathetic and professional
        
        Question: {question}
        
        Medical Context:
        {context}
        
        Please provide a clear, helpful answer:
        """
        
        try:
            response = self.llm.invoke(answer_prompt)
            answer = response.content if hasattr(response, 'content') else str(response)
            
            # Calculate confidence based on search results
            avg_score = sum(result['score'] for result in search_results[:3]) / min(3, len(search_results))
            confidence = min(avg_score, self.config.confidence_threshold)
            
            return {
                "question": question,
                "answer": answer,
                "confidence": confidence,
                "sources": sources
            }
        except Exception as e:
            return {
                "question": question,
                "answer": f"Error generating answer: {str(e)}",
                "confidence": 0.0,
                "sources": sources
            }
    
    def self_ask_process(self, main_question: str) -> Dict[str, Any]:
        """Main Self-Ask process with iterative questioning"""
        print(f"🤔 Starting Self-Ask process for: {main_question}")
        
        # Step 1: Decompose the question
        sub_questions = self.decompose_question(main_question)
        print(f"📝 Sub-questions: {sub_questions}")
        
        # Step 2: Answer each sub-question
        sub_answers = []
        all_sources = []
        
        for sub_q in sub_questions:
            result = self.search_and_answer(sub_q)
            sub_answers.append(result)
            all_sources.extend(result['sources'])
        
        # Step 3: Synthesize final answer
        if len(sub_answers) == 1:
            final_result = sub_answers[0]
        else:
            # Combine answers for complex questions
            combined_context = "\n\n".join([
                f"Q: {result['question']}\nA: {result['answer']}" 
                for result in sub_answers
            ])
            
            synthesis_prompt = f"""
            You are a healthcare assistant. I've broken down a complex health question into parts and got answers. 
            Please synthesize these into one comprehensive, coherent answer.
            
            Original question: {main_question}
            
            Sub-question answers:
            {combined_context}
            
            Please provide a comprehensive answer that addresses the original question:
            """
            
            if self.llm:
                try:
                    response = self.llm.invoke(synthesis_prompt)
                    final_answer = response.content if hasattr(response, 'content') else str(response)
                    
                    avg_confidence = sum(result['confidence'] for result in sub_answers) / len(sub_answers)
                    
                    final_result = {
                        "question": main_question,
                        "answer": final_answer,
                        "confidence": avg_confidence,
                        "sources": list(set(all_sources)),  # Remove duplicates
                        "sub_questions": sub_questions,
                        "sub_answers": sub_answers
                    }
                except Exception as e:
                    final_result = {
                        "question": main_question,
                        "answer": f"Error synthesizing answer: {str(e)}",
                        "confidence": 0.0,
                        "sources": all_sources
                    }
            else:
                final_result = {
                    "question": main_question,
                    "answer": "LLM not configured for synthesis",
                    "confidence": 0.0,
                    "sources": all_sources
                }
        
        return final_result

# Initialize Self-Ask system
self_ask_system = SelfAskSearchSystem(config, embedding_manager)
print("🧠 Self-Ask with Search system ready!")

🧠 Self-Ask with Search system ready!


In [9]:
# Main Healthcare QA Bot
class HealthcareQABot:
    def __init__(self, config, embedding_manager, self_ask_system, greeting_handler):
        self.config = config
        self.embedding_manager = embedding_manager
        self.self_ask_system = self_ask_system
        self.greeting_handler = greeting_handler
        self.conversation_history = []
        
    def ask(self, question: str, verbose: bool = False) -> Dict[str, Any]:
        """Main method to ask healthcare questions"""
        question = question.strip()
        
        if verbose:
            print(f"🏥 Healthcare Bot received: {question}")
        
        # Check if it's a greeting or conversational
        if self.greeting_handler.is_conversational(question):
            greeting_response = self.greeting_handler.generate_greeting_response(question)
            result = {
                "query": question,
                "response": greeting_response,
                "type": "conversational",
                "timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
            }
        else:
            # Use Self-Ask with Search for medical questions
            if verbose:
                print("🔍 Processing as medical question with Self-Ask...")
            
            self_ask_result = self.self_ask_system.self_ask_process(question)
            
            # Add medical disclaimer
            medical_disclaimer = "\n\n⚠️ **Medical Disclaimer**: This information is for educational purposes only and should not replace professional medical advice. Please consult with a healthcare provider for medical concerns."
            
            result = {
                "query": question,
                "response": self_ask_result["answer"] + medical_disclaimer,
                "type": "medical",
                "confidence": self_ask_result["confidence"],
                "sources": self_ask_result["sources"],
                "timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
            }
            
            if "sub_questions" in self_ask_result:
                result["sub_questions"] = self_ask_result["sub_questions"]
                result["sub_answers"] = self_ask_result["sub_answers"]
        
        # Add to conversation history
        self.conversation_history.append(result)
        
        if verbose:
            print(f"💬 Response: {result['response']}")
            if result.get('sources'):
                print(f"📚 Sources: {len(result['sources'])} references")
        
        return result
    
    def get_conversation_history(self) -> List[Dict]:
        """Get conversation history"""
        return self.conversation_history
    
    def clear_history(self):
        """Clear conversation history"""
        self.conversation_history = []
        print("🗑️ Conversation history cleared")

# Initialize Healthcare QA Bot
healthcare_bot = HealthcareQABot(config, embedding_manager, self_ask_system, greeting_handler)
print("🏥 Healthcare QA Bot with Self-Ask pattern ready!")

🏥 Healthcare QA Bot with Self-Ask pattern ready!


In [10]:
# Test Healthcare QA Bot
def test_healthcare_bot():
    """Test the healthcare bot with various types of questions"""
    
    if not ((config.gemini_api_key or config.use_sentence_transformers) and config.pinecone_api_key):
        print("⚠️  Cannot test - API keys not configured")
        return
    
    print("🧪 Testing Healthcare QA Bot with Self-Ask pattern...\n")
    
    # Test questions covering different scenarios
    test_questions = [
        # Greetings
        "Hello!",
        "Hi there, how are you?",
        
        # Simple medical questions
        "What is diabetes?",
        "What are the symptoms of fever?",
        
        # Complex medical questions (will trigger Self-Ask decomposition)
        "What are the symptoms and treatment options for high blood pressure?",
        "How can I prevent heart disease and what are the warning signs?",
        
        # Emergency situations
        "When should I go to the emergency room?",
        
        # Medication questions
        "How should I store my medications?",
        
        # Mental health
        "What are signs of depression?",
        
        # Thank you
        "Thank you for your help!",
        "Goodbye"
    ]
    
    for i, question in enumerate(test_questions, 1):
        print(f"\n{'='*60}")
        print(f"Test {i}: {question}")
        print('='*60)
        
        try:
            result = healthcare_bot.ask(question, verbose=True)
            
            if result.get('sources'):
                print(f"\n📚 Sources used:")
                for source in result['sources'][:3]:  # Show top 3 sources
                    print(f"  - {source}")
            
            if result.get('sub_questions'):
                print(f"\n🤔 Sub-questions identified:")
                for sub_q in result['sub_questions']:
                    print(f"  - {sub_q}")
                    
        except Exception as e:
            print(f"❌ Error testing question: {str(e)}")
        
        time.sleep(1)  # Small delay between requests
    
    print(f"\n{'='*60}")
    print("🎉 Healthcare bot testing completed!")
    print(f"💬 Total conversations: {len(healthcare_bot.get_conversation_history())}")

# Run the test (uncomment to test)
# test_healthcare_bot()

In [11]:
# Interactive Healthcare Chat
def interactive_healthcare_chat():
    """Interactive chat interface for healthcare bot"""
    
    if not ((config.gemini_api_key or config.use_sentence_transformers) and config.pinecone_api_key):
        print("⚠️  Cannot start interactive mode - API keys not configured")
        print("💡 Please set GEMINI_API_KEY and PINECONE_API_KEY in your .env file")
        return
    
    print("🏥 Welcome to Healthcare Assistant! (Powered by Self-Ask with Search)")
    print("💬 Ask me about health topics, symptoms, treatments, or just say hello!")
    print("🔍 I use Self-Ask pattern to break down complex questions")
    print("⚠️  Remember: This is for educational purposes only. Consult healthcare professionals for medical advice.")
    print("Type 'quit' to exit.\n")
    
    while True:
        try:
            # Get user input
            question = input("🤔 Your question: ").strip()
            
            if question.lower() in ['quit', 'exit', 'bye', 'goodbye']:
                final_response = healthcare_bot.ask("goodbye")
                print(f"\n🏥 Healthcare Bot: {final_response['response']}")
                break
            
            if not question:
                print("Please ask a question or type 'quit' to exit.")
                continue
            
            print("\n" + "="*50)
            
            # Get answer from healthcare bot
            result = healthcare_bot.ask(question, verbose=False)
            
            print(f"🏥 Healthcare Bot: {result['response']}")
            
            # Show additional info for medical questions
            if result.get('type') == 'medical':
                if result.get('confidence'):
                    print(f"\n📊 Confidence: {result['confidence']:.2f}")
                
                if result.get('sub_questions') and len(result['sub_questions']) > 1:
                    print(f"\n🤔 I broke your question into parts:")
                    for sub_q in result['sub_questions']:
                        print(f"  • {sub_q}")
                
                if result.get('sources'):
                    print(f"\n📚 Information sources: {len(result['sources'])} references")
            
            print("="*50 + "\n")
            
        except KeyboardInterrupt:
            print("\n👋 Stay healthy! Goodbye!")
            break
        except Exception as e:
            print(f"❌ Error: {str(e)}")

# Example usage
print("💡 To test the healthcare bot:")
print("   test_healthcare_bot()      # For automated testing")
print("   interactive_healthcare_chat()  # For interactive mode")
print("\n🔧 Configuration status:")
print(f"   - Gemini API: {'✅' if config.gemini_api_key else '❌'}")
print(f"   - Sentence Transformers: {'✅' if config.use_sentence_transformers else '❌'}")
print(f"   - Pinecone: {'✅' if config.pinecone_api_key else '❌'}")
print(f"   - Healthcare Data: ✅ {len(healthcare_data)} documents loaded")

💡 To test the healthcare bot:
   test_healthcare_bot()      # For automated testing
   interactive_healthcare_chat()  # For interactive mode

🔧 Configuration status:
   - Gemini API: ✅
   - Sentence Transformers: ❌
   - Pinecone: ✅
   - Healthcare Data: ✅ 5 documents loaded


In [None]:
interactive_healthcare_chat()

🏥 Welcome to Healthcare Assistant! (Powered by Self-Ask with Search)
💬 Ask me about health topics, symptoms, treatments, or just say hello!
🔍 I use Self-Ask pattern to break down complex questions
⚠️  Remember: This is for educational purposes only. Consult healthcare professionals for medical advice.
Type 'quit' to exit.


🤔 Starting Self-Ask process for: what is fever
📝 Sub-questions: ['What is the definition of fever?', 'What causes a fever?', 'What are the symptoms of a fever?']
🔍 Self-Ask: What is the definition of fever?
🔍 Self-Ask: What causes a fever?
🔍 Self-Ask: What are the symptoms of a fever?
🏥 Healthcare Bot: A fever is defined as a body temperature above 100.4°F (38°C).  While an elevated temperature is the defining characteristic, it's crucial to understand that a fever is usually a *symptom* of an underlying condition, not a disease itself.  Common causes include infections (viral, bacterial, or fungal), inflammatory conditions, and certain medications.  Other symptoms 

In [12]:
# Quick Demo of Healthcare Bot
print("🏥 Healthcare QA Bot Demo with Self-Ask Pattern")
print("=" * 50)

# Demo conversations
demo_questions = [
    "Hello!",
    "What are the symptoms of diabetes?",
    "What are the symptoms and treatment options for high blood pressure?"
]

for question in demo_questions:
    print(f"\n👤 User: {question}")
    result = healthcare_bot.ask(question)
    print(f"🏥 Bot: {result['response'][:200]}...")  # Truncate for demo
    
    if result.get('sub_questions') and len(result['sub_questions']) > 1:
        print(f"🤔 Self-Ask decomposed into: {result['sub_questions']}")

print("\n" + "=" * 50)
print("🎯 Healthcare Bot Features:")
print("✅ Self-Ask with Search pattern for complex questions")
print("✅ Healthcare dataset from Hugging Face + curated knowledge")
print("✅ Greeting and conversational handling")
print("✅ Medical disclaimers and safety recommendations")
print("✅ Question decomposition for complex queries")
print("✅ Confidence scoring and source attribution")

🏥 Healthcare QA Bot Demo with Self-Ask Pattern

👤 User: Hello!
🏥 Bot: Hello! I'm your healthcare assistant. I'm here to help answer your health-related questions and provide general medical information. How can I assist you today?...

👤 User: What are the symptoms of diabetes?
🤔 Starting Self-Ask process for: What are the symptoms of diabetes?
📝 Sub-questions: ['What are the common symptoms of type 1 diabetes?', 'What are the common symptoms of type 2 diabetes?', 'What are some less common or subtle symptoms of diabetes?']
🔍 Self-Ask: What are the common symptoms of type 1 diabetes?
🔍 Self-Ask: What are the common symptoms of type 2 diabetes?
🔍 Self-Ask: What are some less common or subtle symptoms of diabetes?
🏥 Bot: I apologize, but I cannot provide a comprehensive answer to your question about the symptoms of diabetes.  My knowledge base currently lacks the necessary information on the symptoms of both type 1 an...
🤔 Self-Ask decomposed into: ['What are the common symptoms of type 1

# Healthcare QA Bot - Self-Ask with Search Implementation

## Project Overview
This notebook implements a healthcare question-answering system using the **Self-Ask with Search** pattern, integrating Google Gemini API, Pinecone vector database, and Hugging Face healthcare datasets.

## Key Features
- **Self-Ask Pattern**: Decomposes complex questions into simpler sub-questions for better understanding
- **Healthcare Dataset Integration**: Uses Hugging Face medical datasets plus curated healthcare knowledge
- **Conversational Handling**: Manages greetings, farewells, and general conversation
- **Medical Safety**: Includes appropriate disclaimers and safety recommendations
- **Search-Based Retrieval**: Vector similarity search with confidence scoring

## Technical Architecture
1. **Question Analysis**: Determines if input is conversational or medical
2. **Question Decomposition**: Breaks complex queries into manageable sub-questions
3. **Search Process**: Retrieves relevant medical information for each sub-question
4. **Answer Synthesis**: Combines sub-answers into comprehensive response
5. **Safety Layer**: Adds medical disclaimers and professional consultation recommendations

## Self-Ask vs ReACT Comparison
- **Self-Ask**: Focuses on question decomposition and iterative search
- **ReACT**: Emphasizes reasoning, acting, and observing with tool usage
- **Healthcare Context**: Self-Ask better suits medical Q&A where breaking down symptoms/treatments is crucial

## Main Components
- `HealthcareConfig`: System configuration for medical domain
- `HealthcareDatasetLoader`: Loads Hugging Face medical datasets
- `GreetingHandler`: Manages conversational interactions
- `SelfAskSearchSystem`: Core Self-Ask implementation with search
- `HealthcareQABot`: Main interface integrating all components

## Data Sources
- **Hugging Face**: Medical dialog datasets for real medical conversations
- **Curated Knowledge**: Symptoms, treatments, emergency care, mental health
- **Safety Guidelines**: Medication management, when to seek care

## Usage Example
```python
# Initialize and use the healthcare bot
healthcare_bot = HealthcareQABot(config, embedding_manager, self_ask_system, greeting_handler)

# Ask a complex question
result = healthcare_bot.ask("What are the symptoms and treatment options for diabetes?")

# The bot will:
# 1. Decompose into: ["What are symptoms of diabetes?", "What are treatment options for diabetes?"]
# 2. Search medical knowledge for each sub-question
# 3. Synthesize comprehensive answer with medical disclaimer
```

## Performance Features
- **Question Decomposition**: Automatically breaks complex medical queries
- **Confidence Scoring**: Relevance-based confidence metrics
- **Source Attribution**: Tracks information sources for transparency
- **Conversation History**: Maintains context across interactions

This implementation demonstrates advanced AI techniques specifically tailored for healthcare information systems with appropriate safety measures.