In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Simple test script for GeminiNativeClient
"""

import os
import json
import re
from llm_clients import GeminiNativeClient

def extract_thinking_from_content(content: str) -> tuple:
    """Extract thinking process from <think>...</think> tags"""
    if not content:
        return "", ""
    
    think_pattern = r'<think>(.*?)</think>'
    matches = re.findall(think_pattern, content, re.DOTALL)
    
    if matches:
        thinking_process = matches[0].strip()
        cleaned_content = re.sub(think_pattern, '', content, flags=re.DOTALL).strip()
        return thinking_process, cleaned_content
    else:
        return "", content

def test_gemini_client():
    """Test GeminiNativeClient with thinking mode"""
    
    # Initialize client (you need to set your API key)
    api_key = "AIzaSyDcSzeBZQ8lvCkpl2693ogJ4HwFUCZ7MxQ"
    
    # Test with thinking enabled
    client = GeminiNativeClient(
        api_key=api_key,
        model_name="gemini-2.5-flash-lite",
        temperature=0.2,
        think=True,  # Enable thinking mode
        max_queries_per_minute=10
    )
    
    print("Testing GeminiNativeClient...")
    print("=" * 40)
    
    # Test query
    system_msg = "You are a helpful assistant that shows reasoning."
    user_input = "Calculate 15% of 280 and explain your steps."
    
    try:
        response = client.query(user_input, system_msg)
        print(f"Raw Response:\n{response}\n")
        
        # Test thinking extraction (like FastAPI does)
        thinking, content = extract_thinking_from_content(response)
        
        print(f"Has Thinking: {bool(thinking)}")
        if thinking:
            print(f"Thinking Process:\n{thinking}\n")
        print(f"Final Content:\n{content}")
        
        print("\n✅ Test completed successfully!")
        
    except Exception as e:
        print(f"❌ Test failed: {e}")

if __name__ == "__main__":
    test_gemini_client()

Testing GeminiNativeClient...
Raw Response:
<think>
**Understanding and Calculating a Percentage**

Alright, let's break this down. My goal is to determine 15% of 280. As an expert, I know that percentage inherently means "out of one hundred." So, 15% is simply the same as 15/100.

To make the calculation easier, I'll convert that percentage into its decimal equivalent. We can do this by dividing the percentage by 100: 15% becomes 15/100, which equals 0.15.

Now, the "of" in "15% of 280" translates to multiplication in this mathematical context.  Therefore, I need to calculate 0.15 multiplied by 280. I'll set it up this way: 0.15 * 280.

Let's do the math. When I multiply 280 by 0.15, I get 42.00, which simplifies to 42.

And there you have it! 15% of 280 equals 42.

As an additional point for deeper understanding, consider this: I can also think of 15% as 10% plus 5%. Knowing this, I could calculate 10% of 280 (which is 28) and then 5% of 280 (which is half of 28, or 14). Adding those

In [1]:
# """
# Corrected E2E test for the actual HPO extraction pipeline
# Tests the modified extract_hpo_terms() function directly
# """
# import json
# import importlib.util
# import time

# # Test clinical text
# CLINICAL_TEXT = """
# Patient is a 8-year-old girl with intellectual disability and seizures. 
# She has microcephaly, hypotonia, and delayed speech development. 
# Physical exam shows strabismus and hearing impairment. 
# Lab results indicate elevated liver enzymes and metabolic acidosis.
# """

# def load_and_setup_module():
#     """Load and set up the HPO module"""
#     print("Loading and setting up HPO module...")
    
#     # Load the module
#     spec = importlib.util.spec_from_file_location("hpo_module", "deeprare-rag-hpo.py")
#     hpo_module = importlib.util.module_from_spec(spec)
#     spec.loader.exec_module(hpo_module)
    
#     # Load system prompts from file
#     try:
#         with open("deeprare_system_prompts.json", "r", encoding='utf-8') as f:
#             prompts = json.load(f)
        
#         hpo_module.system_message_extract = prompts.get("system_message_extract", "")
#         hpo_module.system_message_normalize = prompts.get("system_message_normalize", "")
        
#         print("✅ Loaded system prompts from file")
#     except FileNotFoundError:
#         print("❌ system_prompts.json not found, using default prompts")
#         return None
    
#     # Initialize components
#     try:
#         print("Initializing LLM client...")
#         hpo_module.llm_client = hpo_module.check_and_initialize_llm()
        
#         print("Loading embeddings model...")
#         hpo_module.embeddings_model = hpo_module.SentenceTransformer('pritamdeka/SapBERT-mnli-snli-scinli-scitail-mednli-stsb')
        
#         print("Loading vector database...")
#         docs, emb_matrix = hpo_module.load_vector_db()
#         hpo_module.embedded_documents = docs
#         hpo_module.faiss_index = hpo_module.create_faiss_index(emb_matrix)
        
#         print("Initializing cache...")
#         hpo_module.cache = hpo_module.SimpleCache()
        
#         print("✅ All components initialized")
#         return hpo_module
        
#     except Exception as e:
#         print(f"❌ Component initialization failed: {e}")
#         return None

# def test_individual_steps(hpo_module):
#     """Test each step individually for debugging"""
#     print("\n" + "=" * 60)
#     print("TESTING INDIVIDUAL STEPS")
#     print("=" * 60)
    
#     # Test Step 1: Extraction
#     print("📝 Step 1: Raw Extraction Test")
#     try:
#         response1 = hpo_module.llm_client.query(CLINICAL_TEXT, hpo_module.system_message_extract)
#         print(f"Raw response length: {len(response1)} chars")
#         print(f"Raw response preview: {response1[:200]}...")
        
#         # Test JSON parsing
#         parsed1 = hpo_module._safe_json_loads(response1)
#         if parsed1:
#             print(f"✅ JSON parsing successful")
#             print(f"Type: {type(parsed1)}")
#             if isinstance(parsed1, list):
#                 print(f"Found {len(parsed1)} items")
#                 for i, item in enumerate(parsed1[:3]):  # Show first 3
#                     if isinstance(item, dict):
#                         hpo_id = item.get('HPO') or item.get('hpo_id', 'N/A')
#                         phenotype = item.get('Phenotype') or item.get('phenotype', 'N/A')
#                         print(f"  {i+1}. {hpo_id}: {phenotype}")
#         else:
#             print("❌ JSON parsing failed")
#             return False
            
#     except Exception as e:
#         print(f"❌ Step 1 failed: {e}")
#         return False
    
#     # Test Step 2: Normalization
#     print(f"\n📝 Step 2: Raw Normalization Test")
#     test_phenotype = "seizures"
#     try:
#         response2 = hpo_module.llm_client.query(test_phenotype, hpo_module.system_message_normalize)
#         print(f"Input: {test_phenotype}")
#         print(f"Raw response: {response2}")
        
#         # Test JSON parsing
#         parsed2 = hpo_module._safe_json_loads(response2)
#         if parsed2 and isinstance(parsed2, dict):
#             original = parsed2.get('original_term', 'N/A')
#             normalized = parsed2.get('hpo_term', 'N/A')
#             print(f"✅ Normalization successful")
#             print(f"Original: {original}")
#             print(f"Normalized: {normalized}")
#         else:
#             print("❌ Normalization parsing failed")
#             return False
            
#     except Exception as e:
#         print(f"❌ Step 2 failed: {e}")
#         return False
    
#     return True

# def test_full_pipeline(hpo_module):
#     """Test the complete pipeline"""
#     print("\n" + "=" * 60)
#     print("TESTING COMPLETE PIPELINE")
#     print("=" * 60)
    
#     print(f"Input clinical text:")
#     print(f"'{CLINICAL_TEXT.strip()}'")
#     print(f"Length: {len(CLINICAL_TEXT)} characters")
    
#     try:
#         print("\nRunning extract_hpo_terms()...")
#         start_time = time.time()
        
#         result = hpo_module.extract_hpo_terms(CLINICAL_TEXT)
        
#         end_time = time.time()
#         processing_time = end_time - start_time
        
#         print(f"Processing completed in {processing_time:.3f} seconds")
        
#         return result, processing_time
        
#     except Exception as e:
#         print(f"❌ Pipeline failed: {e}")
#         import traceback
#         traceback.print_exc()
#         return None, 0

# def analyze_results(result, processing_time):
#     """Analyze the pipeline results"""
#     if not result:
#         print("❌ No results to analyze")
#         return False
    
#     print("\n" + "=" * 60)
#     print("RESULTS ANALYSIS")
#     print("=" * 60)
    
#     # Basic structure
#     print(f"Result type: {type(result)}")
#     print(f"Keys: {list(result.keys()) if isinstance(result, dict) else 'N/A'}")
    
#     # HPO terms analysis
#     if 'hpo_terms' in result:
#         hpo_terms = result['hpo_terms']
#         print(f"\n📊 Found {len(hpo_terms)} HPO terms:")
        
#         if hpo_terms:
#             for i, term in enumerate(hpo_terms, 1):
#                 phrase = term.get('phrase', 'N/A')
#                 hpo_id = term.get('hpo_id', 'N/A')
#                 normalized = term.get('normalized_term', 'N/A')
#                 score = term.get('similarity_score', 0)
                
#                 print(f"  {i}. {hpo_id} - {phrase}")
#                 print(f"     Normalized: {normalized}")
#                 print(f"     Similarity: {score:.3f}")
#         else:
#             print("  No HPO terms extracted")
    
#     # Thinking process
#     if 'thinking_process' in result:
#         thinking = result['thinking_process']
#         print(f"\n🧠 Thinking process length: {len(thinking)} chars")
#         if thinking:
#             lines = thinking.split('\n')[:10]  # First 10 lines
#             print("First few lines:")
#             for line in lines:
#                 if line.strip():
#                     print(f"  {line[:80]}{'...' if len(line) > 80 else ''}")
    
#     # Performance
#     print(f"\n⚡ Performance:")
#     print(f"  Processing time: {processing_time:.3f}s")
#     print(f"  HPO terms found: {len(result.get('hpo_terms', []))}")
    
#     # Format validation
#     print(f"\n✅ Format validation:")
#     valid = True
    
#     if not isinstance(result, dict):
#         print("  ❌ Result should be dict")
#         valid = False
    
#     required_keys = ['hpo_terms', 'thinking_process']
#     for key in required_keys:
#         if key in result:
#             print(f"  ✅ Has {key}")
#         else:
#             print(f"  ❌ Missing {key}")
#             valid = False
    
#     if 'hpo_terms' in result and isinstance(result['hpo_terms'], list):
#         print(f"  ✅ hpo_terms is list")
#         for i, term in enumerate(result['hpo_terms']):
#             if isinstance(term, dict) and 'hpo_id' in term and 'phrase' in term:
#                 print(f"  ✅ Term {i+1} has required fields")
#             else:
#                 print(f"  ❌ Term {i+1} missing required fields")
#                 valid = False
    
#     return valid and len(result.get('hpo_terms', [])) > 0

# def main():
#     """Main test function"""
#     print("🧪 HPO EXTRACTION E2E TEST")
#     print("=" * 60)
    
#     # Setup
#     hpo_module = load_and_setup_module()
#     if not hpo_module:
#         print("❌ Setup failed")
#         return
    
#     # Test individual steps first
#     if not test_individual_steps(hpo_module):
#         print("❌ Individual steps test failed")
#         return
    
#     # Test full pipeline
#     result, processing_time = test_full_pipeline(hpo_module)
#     if not result:
#         print("❌ Full pipeline test failed")
#         return
    
#     # Analyze results
#     success = analyze_results(result, processing_time)
    
#     # Show full JSON
#     print(f"\n📄 FULL JSON RESULT:")
#     print("=" * 30)
#     try:
#         print(json.dumps(result, indent=2, ensure_ascii=False))
#     except Exception as e:
#         print(f"❌ JSON serialization failed: {e}")
    
#     # Final verdict
#     print("\n" + "=" * 60)
#     print("FINAL VERDICT")
#     print("=" * 60)
    
#     if success:
#         print("🎉 TEST PASSED!")
#         hpo_count = len(result.get('hpo_terms', []))
#         print(f"✅ Successfully extracted {hpo_count} HPO terms")
#         print(f"✅ Processing time: {processing_time:.3f}s")
#         print("✅ All format validations passed")
        
#         if hpo_count > 0:
#             print("\nExtracted HPO terms:")
#             for i, term in enumerate(result['hpo_terms'], 1):
#                 hpo_id = term.get('hpo_id', 'N/A')
#                 phrase = term.get('phrase', 'N/A')
#                 score = term.get('similarity_score', 0)
#                 print(f"  {i}. {hpo_id}: {phrase} (sim: {score:.3f})")
#     else:
#         print("❌ TEST FAILED!")
#         print("Check the error messages above for details")
    
#     print("=" * 60)

# if __name__ == "__main__":
#     main()

🧪 HPO EXTRACTION E2E TEST
Loading and setting up HPO module...
✅ Loaded system prompts from file
Initializing LLM client...


INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cuda:0
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: pritamdeka/SapBERT-mnli-snli-scinli-scitail-mednli-stsb


Loading embeddings model...
Loading vector database...
Initializing cache...
✅ All components initialized

TESTING INDIVIDUAL STEPS
📝 Step 1: Raw Extraction Test


INFO:google_genai.models:AFC is enabled with max remote calls: 10.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-lite:generateContent "HTTP/1.1 200 OK"
INFO:google_genai.models:AFC remote call 1 is done.


Raw response length: 642 chars
Raw response preview: Here are the clinical phenotypes extracted from the patient information:

1.  **HPO ID:** HP:0001249
    **Phenotype:** Intellectual disability
2.  **HPO ID:** HP:0001250
    **Phenotype:** Seizures
3...
❌ JSON parsing failed
❌ Individual steps test failed
