In [None]:
# Step 1: Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
import re
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import string
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
import umap
import plotly.express as px
import plotly.graph_objects as go
from wordcloud import WordCloud

# Download NLTK resources (uncomment if needed)
nltk.download('punkt')
nltk.download('stopwords')

In [None]:
# Step 2: Load and examine the data
# Load the CSV file
file_path = './incorrectly_answered_questions_consolidated.csv'
df = pd.read_csv(file_path)

In [None]:
# Step 4: Enhanced Domain Categorization
# Significantly expanded medical domains and keywords
medical_domains = {
    'cardiology': [
        'heart', 'cardiac', 'myocardial', 'coronary', 'ecg', 'echocardiogram', 'echocardiography',
        'arrhythmia', 'hypertension', 'angina', 'palpitation', 'tachycardia', 'bradycardia', 
        'aorta', 'atrial', 'ventricular', 'fibrillation', 'valve', 'mitral', 'aortic', 'cardiovascular',
        'infarct', 'infarction', 'chest pain', 'stent', 'angiogram', 'catheter', 'pacemaker', 'defibrillator',
        'lipid', 'cholesterol', 'thrombosis', 'embolism', 'murmur', 'cardiomyopathy', 'statin'
    ],
    'neurology': [
        'brain', 'neural', 'neuron', 'seizure', 'stroke', 'cns', 'spinal', 'meningitis',
        'headache', 'migraine', 'parkinson', 'alzheimer', 'dementia', 'neuropathy', 'multiple sclerosis',
        'epilepsy', 'mri', 'eeg', 'cerebellar', 'cerebellum', 'cerebral', 'cerebrospinal', 'csf',
        'encephalitis', 'encephalopathy', 'neuralgia', 'palsy', 'tremor', 'vertigo', 'syncope',
        'guillain-barre', 'myasthenia', 'gravis', 'cranial nerve', 'tia', 'hemorrhage', 'aneurysm'
    ],
    'orthopedics': [
        'bone', 'joint', 'fracture', 'spine', 'knee', 'shoulder', 'hip', 'arthritis', 'osteo',
        'orthopedic', 'tendon', 'ligament', 'cartilage', 'meniscus', 'rotator cuff', 'dislocation',
        'sprain', 'strain', 'gout', 'rheumatoid', 'bursitis', 'osteoporosis', 'scoliosis', 'kyphosis',
        'lordosis', 'disc', 'disk', 'vertebra', 'vertebral', 'femur', 'tibia', 'fibula', 'humerus',
        'radius', 'ulna', 'pelvis', 'cervical', 'lumbar', 'thoracic', 'sacral', 'acl', 'mcl', 'pcl',
        'glenoid', 'acromioclavicular', 'arthroplasty', 'osteomyelitis', 'prosthesis'
    ],
    'gastroenterology': [
        'stomach', 'intestine', 'liver', 'pancreas', 'colon', 'hepatic', 'ulcer', 'gallbladder',
        'esophagus', 'esophageal', 'reflux', 'gerd', 'abdomen', 'abdominal', 'bowel', 'ibs', 'crohn',
        'colitis', 'cirrhosis', 'hepatitis', 'jaundice', 'biliary', 'cholecystitis', 'pancreatitis',
        'dysphagia', 'gastritis', 'constipation', 'diarrhea', 'melena', 'gastric', 'duodenal', 'jejunum',
        'ileum', 'appendicitis', 'appendix', 'rectum', 'rectal', 'anal', 'anus', 'hemorrhoid',
        'diverticulitis', 'diverticulosis', 'endoscopy', 'colonoscopy', 'gastroscopy'
    ],
    'pulmonology': [
        'lung', 'respiratory', 'breathing', 'asthma', 'copd', 'pneumonia', 'pulmonary',
        'bronchitis', 'bronchial', 'pleura', 'pleural', 'chest', 'dyspnea', 'cough', 'tuberculosis',
        'emphysema', 'oxygen', 'hypoxia', 'ventilation', 'ventilator', 'intubation', 'tachypnea',
        'spirometry', 'sarcoidosis', 'hemoptysis', 'apnea', 'embolism', 'fibrosis', 'alveoli',
        'alveolar', 'bronchodilator', 'hyperventilation', 'pneumothorax', 'thoracentesis'
    ],
    'dermatology': [
        'skin', 'rash', 'dermatitis', 'eczema', 'psoriasis', 'melanoma', 'acne', 'pruritus',
        'urticaria', 'hives', 'lesion', 'blister', 'nodule', 'dermatologic', 'cutaneous', 'epidermis',
        'dermis', 'cellulitis', 'folliculitis', 'wart', 'herpes', 'papule', 'pustule', 'vitiligo',
        'alopecia', 'keratosis', 'carcinoma', 'basal cell', 'squamous cell', 'melanocytic', 'nevus',
        'seborrheic', 'impetigo', 'pemphigus', 'lupus'
    ],
    'endocrinology': [
        'hormone', 'thyroid', 'diabetes', 'insulin', 'adrenal', 'pituitary', 'glucose', 'endocrine',
        'hyperglycemia', 'hypoglycemia', 'diabetic', 'hba1c', 'hemoglobin a1c', 'parathyroid',
        'testosterone', 'estrogen', 'progesterone', 'cortisol', 'cushing', 'addison', 'hyperthyroidism',
        'hypothyroidism', 'thyroiditis', 'goiter', 'graves', 'hashimoto', 'acromegaly', 'gigantism',
        'dwarfism', 'hypopituitarism', 'hyperpituitarism', 'tsh', 't3', 't4', 'amenorrhea', 'hirsutism',
        'polycystic ovary', 'pcos', 'metabolic syndrome', 'aldosterone', 'hyperaldosteronism',
        'hypoaldosteronism', 'ketoacidosis', 'glycemic', 'prolactin', 'prolactinoma', 'carbohydrate'
    ],
    'nephrology': [
        'kidney', 'renal', 'dialysis', 'glomerular', 'nephritis', 'nephrotic', 'proteinuria',
        'hematuria', 'creatinine', 'bun', 'urea', 'ckd', 'chronic kidney disease', 'acute kidney injury',
        'aki', 'glomerulonephritis', 'pyelonephritis', 'hydronephrosis', 'urinary', 'urine', 'bladder',
        'cystitis', 'ureter', 'urethra', 'electrolyte', 'potassium', 'sodium', 'hyponatremia',
        'hypernatremia', 'hypokalemia', 'hyperkalemia', 'acidosis', 'alkalosis', 'diuretic',
        'nephrolithiasis', 'urolithiasis', 'stone', 'polycystic', 'uremia', 'azotemia', 'oliguria',
        'anuria', 'polyuria', 'dysuria', 'nocturia', 'cystoscopy', 'pyelogram'
    ],
    'obstetrics': [
        'pregnancy', 'birth', 'cesarean', 'uterus', 'fetus', 'postpartum', 'gravid', 'gestation',
        'prenatal', 'antenatal', 'perinatal', 'trimester', 'preeclampsia', 'eclampsia', 'labor',
        'delivery', 'placenta', 'placental', 'amniotic', 'amniocentesis', 'cervix', 'cervical',
        'dilate', 'dilation', 'contraction', 'gestational', 'obstetric', 'ectopic', 'miscarriage',
        'abortion', 'stillbirth', 'umbilical', 'maternal', 'para', 'gravida', 'rupture of membranes',
        'ultrasound', 'sonogram', 'pelvic', 'episiotomy', 'oxytocin', 'pitocin', 'epidural'
    ],
    'pediatrics': [
        'child', 'infant', 'newborn', 'pediatric', 'congenital', 'baby', 'neonatal', 'neonate',
        'toddler', 'adolescent', 'developmental', 'milestones', 'vaccination', 'immunization',
        'failure to thrive', 'growth', 'jaundice', 'colic', 'teething', 'adhd', 'autism', 'asperger',
        'respiratory syncytial virus', 'rsv', 'rotavirus', 'croup', 'otitis media', 'ear infection',
        'meningococcal', 'pertussis', 'whooping cough', 'sids', 'diaper', 'breast feeding', 'formula',
        'mumps', 'measles', 'rubella', 'varicella', 'chickenpox', 'kawasaki'
    ],
    'infectious_disease': [
        'infection', 'bacteria', 'virus', 'antibiotic', 'sepsis', 'hiv', 'pneumonia', 'fungal',
        'parasite', 'parasitic', 'microbial', 'antimicrobial', 'antibiotic', 'antiviral', 'antifungal',
        'antiparasitic', 'susceptibility', 'resistance', 'mrsa', 'vre', 'esbl', 'cre', 'tuberculosis',
        'malaria', 'dengue', 'ebola', 'zika', 'lyme', 'meningitis', 'encephalitis', 'cellulitis',
        'abscess', 'osteomyelitis', 'endocarditis', 'septic', 'hepatitis', 'hiv', 'aids', 'influenza',
        'flu', 'immunodeficiency', 'vaccination', 'immunization', 'std', 'sti', 'syphilis', 'gonorrhea',
        'chlamydia', 'herpes', 'hpv', 'fever', 'neutropenia', 'leukopenia'
    ],
    'hematology': [
        'blood', 'anemia', 'leukemia', 'lymphoma', 'platelet', 'coagulation', 'thrombosis', 'clot',
        'bleeding', 'hemoglobin', 'hematocrit', 'transfusion', 'hemophilia', 'sickle cell', 'thalassemia',
        'wbc', 'rbc', 'neutrophil', 'lymphocyte', 'monocyte', 'eosinophil', 'basophil', 'von willebrand',
        'hemolytic', 'purpura', 'thrombocytopenia', 'polycythemia', 'myeloma', 'hodgkin', 'non-hodgkin',
        'spleen', 'splenic', 'pernicious', 'macrocytic', 'microcytic', 'normocytic', 'iron deficiency',
        'ferritin', 'b12', 'folate', 'disseminated intravascular', 'dic', 'anticoagulant', 'heparin', 'warfarin'
    ],
    'oncology': [
        'cancer', 'tumor', 'carcinoma', 'sarcoma', 'lymphoma', 'leukemia', 'metastasis', 'metastatic',
        'neoplasm', 'neoplastic', 'malignant', 'benign', 'biopsy', 'chemotherapy', 'radiation therapy',
        'radiotherapy', 'oncology', 'oncologic', 'mastectomy', 'lumpectomy', 'prostatectomy', 'orchiectomy',
        'hysterectomy', 'oophorectomy', 'mammogram', 'colonoscopy', 'pap smear', 'psa', 'staging',
        'grade', 'differentiation', 'remission', 'palliative', 'hospice', 'myeloma', 'glioma', 'glioblastoma',
        'melanoma', 'carcinoembryonic', 'cea', 'brca', 'lynch syndrome', 'adenoma', 'adenocarcinoma'
    ],
    'ophthalmology': [
        'eye', 'vision', 'ocular', 'ophthalmology', 'retina', 'retinal', 'cornea', 'corneal', 'cataract',
        'glaucoma', 'myopia', 'hyperopia', 'astigmatism', 'presbyopia', 'strabismus', 'amblyopia',
        'macular degeneration', 'diabetic retinopathy', 'conjunctivitis', 'keratitis', 'uveitis',
        'iritis', 'pupil', 'iris', 'sclera', 'lens', 'vitreous', 'optic nerve', 'blindness', 'visual',
        'intraocular', 'tonometry', 'ophthalmoscopy', 'fundoscopy', 'dry eye', 'blepharitis',
        'ptosis', 'floaters', 'photophobia', 'diplopia', 'scotoma', 'visual field'
    ],
    'otolaryngology': [
        'ear', 'nose', 'throat', 'ent', 'otolaryngology', 'hearing', 'deaf', 'tinnitus', 'vertigo',
        'otitis', 'rhinitis', 'sinusitis', 'pharyngitis', 'laryngitis', 'tonsillitis', 'adenoid',
        'tympanic', 'eardrum', 'mastoid', 'mastoiditis', 'vestibular', 'meniere', 'acoustic neuroma',
        'epistaxis', 'nasal', 'sinus', 'deviated septum', 'polyp', 'rhinorrhea', 'dysphonia',
        'hoarseness', 'laryngeal', 'pharyngeal', 'dysphagia', 'tracheostomy', 'otorrhea',
        'otalgia', 'audiometry', 'cochlear', 'tympanometry'
    ],
    'psychiatry': [
        'mental', 'psychiatric', 'psychology', 'depression', 'anxiety', 'bipolar', 'schizophrenia',
        'psychosis', 'psychotic', 'delirium', 'dementia', 'alzheimer', 'adhd', 'add', 'autism',
        'asperger', 'ocd', 'obsessive-compulsive', 'ptsd', 'post-traumatic', 'panic', 'phobia',
        'anorexia', 'bulimia', 'eating disorder', 'insomnia', 'suicide', 'suicidal', 'homicidal',
        'personality disorder', 'borderline', 'narcissistic', 'antisocial', 'histrionic', 'mood',
        'serotonin', 'dopamine', 'antidepressant', 'antipsychotic', 'benzodiazepine', 'lithium',
        'electroconvulsive', 'psychotherapy', 'cognitive behavioral', 'cbt'
    ],
    'urology': [
        'urinary', 'bladder', 'prostate', 'ureter', 'urethra', 'penis', 'testicle', 'testis',
        'scrotum', 'erectile', 'ejaculation', 'urological', 'urologic', 'cystitis', 'pyelonephritis',
        'prostatitis', 'benign prostatic hyperplasia', 'bph', 'hematuria', 'proteinuria', 'incontinence',
        'urinary retention', 'enuresis', 'nocturia', 'polyuria', 'oliguria', 'hydrocele', 'varicocele',
        'epididymitis', 'orchitis', 'vasectomy', 'circumcision', 'phimosis', 'paraphimosis',
        'hypospadias', 'epispadias', 'pyuria', 'dysuria', 'cystoscopy', 'urodynamic'
    ],
    'rheumatology': [
        'rheumatoid', 'arthritis', 'lupus', 'gout', 'fibromyalgia', 'autoimmune', 'rheumatic',
        'rheumatology', 'ankylosing spondylitis', 'sjogren', 'scleroderma', 'dermatomyositis',
        'polymyositis', 'vasculitis', 'polyarteritis', 'polymyalgia', 'temporal arteritis',
        'giant cell arteritis', 'wegener', 'granulomatosis', 'polyarteritis nodosa', 'reiter',
        'psoriatic arthritis', 'enteropathic arthritis', 'reactive arthritis', 'diffuse',
        'connective tissue', 'antinuclear', 'ana', 'rheumatoid factor', 'rf', 'anticitrullinated',
        'anti-ccp', 'erythrocyte sedimentation rate', 'esr', 'c-reactive protein', 'crp'
    ],
    'emergency_medicine': [
        'emergency', 'trauma', 'accident', 'injury', 'fracture', 'hemorrhage', 'bleeding',
        'shock', 'resuscitation', 'cpr', 'cardiac arrest', 'respiratory arrest', 'airway',
        'intubation', 'defibrillation', 'poison', 'toxicity', 'overdose', 'burn', 'wound',
        'laceration', 'abrasion', 'contusion', 'avulsion', 'amputation', 'dislocation',
        'triage', 'ambulance', 'paramedic', 'ems', 'als', 'bls', 'acls', 'pals', 'crash',
        'unconscious', 'unresponsive', 'glasgow', 'abcde', 'primary survey', 'secondary survey'
    ],
    'pharmacology': [
        'drug', 'medication', 'pharmacology', 'pharmacological', 'pharmacokinetic', 'pharmacodynamic',
        'dose', 'dosage', 'therapeutic', 'toxicity', 'overdose', 'adverse effect', 'side effect',
        'contraindication', 'interaction', 'bioavailability', 'half-life', 'clearance', 'absorption',
        'distribution', 'metabolism', 'excretion', 'induction', 'inhibition', 'receptor', 'agonist',
        'antagonist', 'partial agonist', 'inverse agonist', 'oral', 'intravenous', 'intramuscular',
        'subcutaneous', 'topical', 'buccal', 'sublingual', 'rectal', 'inhalation'
    ],
    'anesthesiology': [
        'anesthesia', 'anesthetic', 'anesthesiology', 'sedation', 'general anesthesia', 'local anesthesia',
        'regional anesthesia', 'spinal anesthesia', 'epidural', 'nerve block', 'conscious sedation',
        'propofol', 'ketamine', 'etomidate', 'benzodiazepine', 'barbiturate', 'opioid', 'fentanyl',
        'remifentanil', 'morphine', 'neuromuscular blocker', 'succinylcholine', 'rocuronium',
        'vecuronium', 'intubation', 'laryngoscopy', 'mask ventilation', 'airway', 'capnography',
        'pulse oximetry', 'bispectral index', 'bis', 'mac', 'minimum alveolar concentration'
    ],
    'surgery': [
        'surgery', 'surgical', 'operation', 'operative', 'surgeon', 'incision', 'excision', 'resection',
        'anastomosis', 'ligation', 'suture', 'staple', 'laparoscopy', 'laparoscopic', 'laparotomy',
        'thoracotomy', 'thoracoscopy', 'craniectomy', 'craniotomy', 'laminectomy', 'discectomy',
        'hysterectomy', 'oophorectomy', 'salpingectomy', 'cholecystectomy', 'appendectomy',
        'colectomy', 'gastrectomy', 'splenectomy', 'nephrectomy', 'prostatectomy', 'mastectomy',
        'thyroidectomy', 'herniorrhaphy', 'hemorrhoidectomy', 'transplant', 'graft'
    ],
    'radiology': [
        'radiology', 'imaging', 'x-ray', 'radiograph', 'ultrasound', 'sonography', 'computed tomography',
        'ct scan', 'magnetic resonance', 'mri', 'angiography', 'angiogram', 'fluoroscopy', 'contrast',
        'radioopaque', 'radiolucent', 'radiodense', 'hyperechoic', 'hypoechoic', 'isoechoic',
        'hyperintense', 'hypointense', 'isointense', 'density', 'artifact', 'resolution',
        'radiopaque', 'positron emission', 'pet', 'spect', 'nuclear medicine', 'mammography',
        'interventional', 'barium', 'gadolinium', 'iodinated'
    ]
}

# Function to categorize a question using multiple approaches
def enhanced_categorize_question(row):
    # Get all text fields that might contain domain clues
    question = str(row['question']).lower()
    
    # If available, also check the explanation
    explanation = ""
    if 'ground_truth_explanation' in row and not pd.isna(row['ground_truth_explanation']):
        explanation = str(row['ground_truth_explanation']).lower()
    
    combined_text = question + " " + explanation
    
    # First approach: Simple keyword matching
    categories = []
    for domain, keywords in medical_domains.items():
        for keyword in keywords:
            if keyword.lower() in combined_text:
                categories.append(domain)
                break
    
    # Second approach: Check for specific answer choices that might indicate domain
    answer_choices = re.findall(r'\([A-Z]\)\s*([^()]+?)(?=\s*\([A-Z]\)|\s*$)', question)
    if not answer_choices:
        # Try another pattern that might capture answer choices
        answer_choices = re.findall(r'(?:^|\n)\s*([A-Z])\)\s*([^()\n]+)', question)
    
    # Check answer choices for domain keywords
    if answer_choices:
        for choice in answer_choices:
            choice_text = str(choice).lower()
            for domain, keywords in medical_domains.items():
                for keyword in keywords:
                    if keyword.lower() in choice_text and domain not in categories:
                        categories.append(domain)
    
    # Third approach: Look for specific medical conditions or procedures
    # (This is a simplification - in a real implementation you might use NER or a medical ontology)
    medical_conditions = [
        ('diabetes', 'endocrinology'),
        ('hypertension', 'cardiology'),
        ('asthma', 'pulmonology'),
        ('fracture', 'orthopedics'),
        ('pregnancy', 'obstetrics'),
        ('newborn', 'pediatrics'),
        ('infection', 'infectious_disease'),
        ('thyroid', 'endocrinology'),
        ('arthritis', 'rheumatology'),
        ('seizure', 'neurology'),
        ('cancer', 'oncology'),
        ('leukemia', 'hematology'),
        ('glaucoma', 'ophthalmology'),
        ('kidney', 'nephrology'),
        ('liver', 'gastroenterology'),
        ('gallbladder', 'gastroenterology'),
        ('surgery', 'surgery'),
        ('anesthesia', 'anesthesiology'),
        ('cardiac', 'cardiology'),
        ('drug', 'pharmacology'),
        ('emergency', 'emergency_medicine'),
        ('trauma', 'emergency_medicine'),
        ('x-ray', 'radiology'),
        ('ct', 'radiology'),
        ('mri', 'radiology')
    ]
    
    for condition, domain in medical_conditions:
        if condition in combined_text and domain not in categories:
            categories.append(domain)
    
    # Use TF-IDF based approach if still unclassified
    if not categories and 'clean_question' in row and not pd.isna(row['clean_question']):
        clean_text = str(row['clean_question']).lower()
        # This would require pre-trained TF-IDF vectors for each domain
        # For now, we'll use a simpler approach with domain-specific term frequencies
        
        # Count domain-specific terms
        domain_scores = {}
        for domain, keywords in medical_domains.items():
            score = sum(1 for keyword in keywords if keyword in clean_text)
            if score > 0:
                domain_scores[domain] = score
        
        # Get top scoring domains
        if domain_scores:
            max_score = max(domain_scores.values())
            categories = [domain for domain, score in domain_scores.items() if score == max_score]
    
    # Returns categories or 'unclassified'
    return categories if categories else ['unclassified']

# Process text for better matching
def preprocess_text(text):
    return re.sub(r'\s+', ' ', str(text).lower().strip())

# Apply enhanced categorization to each question
df['clean_question'] = df['question'].apply(preprocess_text)
if 'ground_truth_explanation' in df.columns:
    df['clean_explanation'] = df['ground_truth_explanation'].apply(preprocess_text)

# Apply the enhanced categorization
df['domains'] = df.apply(enhanced_categorize_question, axis=1)

# Expand the domains list to count frequency
all_domains = []
for domains in df['domains']:
    all_domains.extend(domains)

# Count domain occurrences
domain_counts = Counter(all_domains)

# Visualize domain distribution
plt.figure(figsize=(15, 10))
domain_df = pd.DataFrame.from_dict(domain_counts, orient='index').reset_index()
domain_df.columns = ['Domain', 'Count']
domain_df = domain_df.sort_values('Count', ascending=False)

# Plot results
ax = sns.barplot(x='Count', y='Domain', data=domain_df)
plt.title('Distribution of Medical Domains in Questions')
plt.tight_layout()

# Add count values to the bars
for i, v in enumerate(domain_df['Count']):
    ax.text(v + 0.1, i, str(v), va='center')

plt.show()

# Print statistics on categorization
print(f"Total questions: {len(df)}")
print(f"Questions with at least one domain: {len(df) - domain_counts.get('unclassified', 0)}")
print(f"Unclassified questions: {domain_counts.get('unclassified', 0)}")
print(f"Percentage classified: {((len(df) - domain_counts.get('unclassified', 0)) / len(df)) * 100:.2f}%")

# Show the most common domain combinations
domain_combinations = Counter([tuple(sorted(domains)) for domains in df['domains']])
print("\nMost common domain combinations:")
for combo, count in domain_combinations.most_common(10):
    print(f"{', '.join(combo)}: {count}")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re
from collections import Counter
import ipywidgets as widgets
from IPython.display import display, clear_output
# Get the top domains
num_domains = 10
top_domains = domain_df.head(num_domains)['Domain'].tolist()
print(f"Top {num_domains} domains: {', '.join(top_domains)}")

# Pre-fetch questions for each domain
domain_examples = {}
for domain in top_domains:
    # Filter for questions that have this domain as one of their domains
    domain_questions = df[df['domains'].apply(lambda x: domain in x)]
    
    # Get sample questions (or fewer if there aren't enough examples)
    sample_size = min(10, len(domain_questions))
    domain_examples[domain] = domain_questions.sample(sample_size)

def clear_screen():
    """Clear screen based on OS"""
    import os
    os.system('cls' if os.name == 'nt' else 'clear')

def display_domain_explorer():
    """Main interactive function to explore domains and questions"""
    current_domain_idx = 0
    current_question_idx = 0
    
    while True:
        clear_screen()
        
        # Get current domain and its questions
        domain = top_domains[current_domain_idx]
        examples = domain_examples[domain]
        
        # Display header information
        print(f"Domain {current_domain_idx + 1}/{len(top_domains)}: {domain.upper()}")
        print(f"Question {current_question_idx + 1}/{len(examples)}")
        print("-" * 80)
        
        # Display current question
        row = examples.iloc[current_question_idx]
        print(f"\nQuestion ID: {row.name}")
        print(row['question'])
        print(f"\nCategorized domains: {', '.join(row['domains'])}")
        
        # Show explanation if available
        if 'ground_truth_explanation' in row and not pd.isna(row['ground_truth_explanation']):
            print("\nExplanation:")
            print(row['ground_truth_explanation'])
        
        # Show navigation menu
        print("\n" + "-" * 80)
        print("Navigation options:")
        print("p: Previous domain | n: Next domain")
        print("u: Previous question | d: Next question")
        print("r: Random question | q: Quit")
        
        # Get user choice
        choice = input("\nEnter choice: ").lower()
        
        # Handle user choice
        if choice == 'p':  # Previous domain
            current_domain_idx = (current_domain_idx - 1) % len(top_domains)
            current_question_idx = 0
        elif choice == 'n':  # Next domain
            current_domain_idx = (current_domain_idx + 1) % len(top_domains)
            current_question_idx = 0
        elif choice == 'u':  # Previous question
            current_question_idx = (current_question_idx - 1) % len(examples)
        elif choice == 'd':  # Next question
            current_question_idx = (current_question_idx + 1) % len(examples)
        elif choice == 'r':  # Random question
            import random
            current_question_idx = random.randint(0, len(examples) - 1)
        elif choice == 'q':  # Quit
            print("Exiting domain explorer")
            break
        else:
            input("Invalid choice. Press Enter to continue...")

# Run the interactive explorer
display_domain_explorer()