## Final Serve. 
#### - Serve after S1 + S2 + Assembling them both.

In [1]:
# Notebook is in: ModelPipeline\finrag_ml_tg1\rag_modules_src\01_Isolation_Test_NBS\07_ITest_FinalServe.ipynb
# supply_lines is in: ModelPipeline\finrag_ml_tg1\rag_modules_src\synthesis_pipeline\supply_lines.py



✓ Model root: d:\JoelDesktop folds_24\NEU FALL2025\MLops IE7374 Project\FinSights\ModelPipeline

[DEBUG] ✓ AWS credentials loaded from aws_credentials.env
✓ FilterExtractor initialized with 21 companies
  Using: finrag_dim_companies_21.parquet
✓ FilterExtractor initialized with 21 companies
  Using: finrag_dim_companies_21.parquet
✓ KPI-JSON: Loaded 527 metric records
✓ KPI-JSON: Unique tickers: 2
✓ KPI-JSON: Year range: 2010-2025
✓ FINAL COMBINED CONTEXT (KPI + RAG) saved to:
  d:\JoelDesktop folds_24\NEU FALL2025\MLops IE7374 Project\FinSights\ModelPipeline\finrag_ml_tg1\rag_modules_src\test_outputs\combined_context_20251118_084005.txt
  Size: 48,599 bytes


In [1]:
"""
Integration test for PromptLoader with full RAG pipeline.
Tests prompt loading and formatting with production-scale query.

Run from ModelPipeline root:
    cd ModelPipeline
    python -m finrag_ml_tg1.rag_modules_src.prompts.test_prompt_integration
"""

import sys
from pathlib import Path

# Find ModelPipeline root
current = Path.cwd()
model_root = None
for parent in [current] + list(current.parents):
    if parent.name == "ModelPipeline":
        model_root = parent
        break

if model_root is None:
    raise RuntimeError("Cannot find 'ModelPipeline' root in path tree")

if str(model_root) not in sys.path:
    sys.path.insert(0, str(model_root))

# Now import
from finrag_ml_tg1.loaders.ml_config_loader import MLConfig
from finrag_ml_tg1.rag_modules_src.prompts.prompt_loader import PromptLoader
from finrag_ml_tg1.rag_modules_src.synthesis_pipeline.supply_lines import (
    RAGComponents,
    init_rag_components,
    build_combined_context,
)


def test_prompt_integration():
    """
    Full integration test:
    1. Initialize RAG components
    2. Build combined context with production query
    3. Load prompts
    4. Format final prompt
    5. Validate structure
    """
    
    print("=" * 80)
    print("PROMPT INTEGRATION TEST - PRODUCTION QUERY")
    print("=" * 80)
    
    # ════════════════════════════════════════════════════════════════════════
    # Step 1: Initialize Components
    # ════════════════════════════════════════════════════════════════════════
    print("\n[Step 1] Initializing RAG components...")
    
    try:
        config = MLConfig()
        print("  ✓ MLConfig loaded")
        
        rag = init_rag_components(model_root)
        print("  ✓ RAG components initialized")
        print(f"    - Entity Adapter: {type(rag.adapter).__name__}")
        print(f"    - Embedder: {type(rag.embedder).__name__}")
        print(f"    - Retriever: {type(rag.retriever).__name__}")
        print(f"    - Expander: {type(rag.expander).__name__}")
        print(f"    - Assembler: {type(rag.assembler).__name__}")
        print(f"    - Metric Pipeline: {type(rag.metric_pipeline).__name__}")
        
    except Exception as e:
        print(f"  ✗ Component initialization failed: {e}")
        import traceback
        traceback.print_exc()
        return False
    
    # ════════════════════════════════════════════════════════════════════════
    # Step 2: Build Combined Context (Production Query)
    # ════════════════════════════════════════════════════════════════════════
    print("\n[Step 2] Building combined context with production query...")
    
    query = (
        "For NVIDIA and Microsoft, what were revenue, operating income, and total assets "
        "in each year from 2016 to 2020, and how did management in the MD&A and "
        "Risk Factors sections explain these trends in terms of their AI strategy, "
        "competitive positioning, and supply chain risks?"
    )
    
    print(f"  Query: {query[:100]}...")
    
    try:
        combined_context, meta = build_combined_context(
            query=query,
            rag=rag,
            include_kpi=True,
            include_rag=True
        )
        
        print(f"  ✓ Combined context built")
        print(f"    - Total length: {len(combined_context):,} characters")
        print(f"    - Estimated tokens: ~{len(combined_context) // 4:,}")
        print(f"    - Has KPI block: {bool(meta['kpi_block'])}")
        print(f"    - Has RAG block: {bool(meta['rag_block'])}")
        
        # Validate structure
        has_kpi_header = "KPI SNAPSHOT" in combined_context
        has_narrative_header = "NARRATIVE CONTEXT" in combined_context
        has_query_footer = "USER QUESTION" in combined_context
        
        print(f"\n  [Structure Validation]")
        print(f"    - KPI header present: {has_kpi_header}")
        print(f"    - Narrative header present: {has_narrative_header}")
        print(f"    - Query footer present: {has_query_footer}")
        
        if not all([has_kpi_header, has_narrative_header, has_query_footer]):
            print(f"  ✗ Structure validation failed!")
            return False
        
        # Check query position (should be near end)
        query_position = combined_context.find("USER QUESTION")
        context_length = len(combined_context)
        query_at_end_pct = (query_position / context_length) * 100
        
        print(f"    - Query position: {query_at_end_pct:.1f}% through context")
        
        if query_at_end_pct < 80:
            print(f"    ⚠ Warning: Query should be near end (>80%), found at {query_at_end_pct:.1f}%")
        
    except Exception as e:
        print(f"  ✗ Context building failed: {e}")
        import traceback
        traceback.print_exc()
        return False
    
    # ════════════════════════════════════════════════════════════════════════
    # Step 3: Load Prompts
    # ════════════════════════════════════════════════════════════════════════
    print("\n[Step 3] Loading prompt templates...")
    
    try:
        loader = PromptLoader(
            system_prompt_version="v1",
            query_template_version="v1"
        )
        print("  ✓ PromptLoader initialized")
        
        system_prompt = loader.load_system_prompt()
        print(f"  ✓ System prompt loaded")
        print(f"    - Length: {len(system_prompt):,} characters")
        print(f"    - Estimated tokens: ~{len(system_prompt) // 4:,}")
        
        # Get recommended LLM params
        llm_params = loader.get_recommended_llm_params()
        print(f"\n  [Recommended LLM Parameters]")
        print(f"    - Temperature: {llm_params['temperature']}")
        print(f"    - Max tokens: {llm_params['max_tokens']:,}")
        print(f"    - Target models: {', '.join(llm_params['target_models'])}")
        
    except Exception as e:
        print(f"  ✗ Prompt loading failed: {e}")
        import traceback
        traceback.print_exc()
        return False
    
    # ════════════════════════════════════════════════════════════════════════
    # Step 4: Format Final User Prompt
    # ════════════════════════════════════════════════════════════════════════
    print("\n[Step 4] Formatting final user prompt...")
    
    try:
        user_prompt = loader.format_query_template(
            combined_context=combined_context
        )
        
        print(f"  ✓ User prompt formatted")
        print(f"    - Length: {len(user_prompt):,} characters")
        print(f"    - Estimated tokens: ~{len(user_prompt) // 4:,}")
        
        # Validate it's the same as combined_context (simple pass-through template)
        if user_prompt.strip() != combined_context.strip():
            print(f"  ⚠ Warning: Template modified context (expected pass-through)")
        
    except Exception as e:
        print(f"  ✗ Prompt formatting failed: {e}")
        import traceback
        traceback.print_exc()
        return False
    
    # ════════════════════════════════════════════════════════════════════════
    # Step 5: Token Budget Analysis
    # ════════════════════════════════════════════════════════════════════════
    print("\n[Step 5] Token budget analysis...")
    
    system_tokens = len(system_prompt) // 4
    user_tokens = len(user_prompt) // 4
    total_input_tokens = system_tokens + user_tokens
    response_tokens = llm_params['max_tokens']
    total_tokens = total_input_tokens + response_tokens
    
    print(f"  [Token Budget]")
    print(f"    - System prompt: ~{system_tokens:,} tokens")
    print(f"    - User prompt (context): ~{user_tokens:,} tokens")
    print(f"    - Total input: ~{total_input_tokens:,} tokens")
    print(f"    - Response budget: {response_tokens:,} tokens")
    print(f"    - Grand total: ~{total_tokens:,} tokens")
    
    # Context window check
    context_limits = {
        'claude-sonnet-3.5': 200_000,
        'claude-haiku-3.5': 200_000,
        'gpt-4o': 128_000,
    }
    
    print(f"\n  [Context Window Fits]")
    for model, limit in context_limits.items():
        fits = total_tokens < limit
        utilization = (total_tokens / limit) * 100
        status = "✓" if fits else "✗"
        print(f"    {status} {model}: {utilization:.1f}% utilized ({total_tokens:,}/{limit:,})")
    
    # ════════════════════════════════════════════════════════════════════════
    # Step 6: Cost Estimation
    # ════════════════════════════════════════════════════════════════════════
    print("\n[Step 6] Cost estimation...")
    
    # Get serving model config
    serving_config = config.get_default_serving_model()
    model_name = serving_config['display_name']
    cost_per_1k_input = serving_config['cost_per_1k_input']
    cost_per_1k_output = serving_config['cost_per_1k_output']
    
    input_cost = (total_input_tokens / 1000) * cost_per_1k_input
    output_cost = (response_tokens / 1000) * cost_per_1k_output
    total_cost = input_cost + output_cost
    
    print(f"  [Cost Analysis - {model_name}]")
    print(f"    - Input cost: ${input_cost:.4f}")
    print(f"    - Output cost: ${output_cost:.4f}")
    print(f"    - Total per query: ${total_cost:.4f}")
    print(f"    - Cost for 100 queries: ${total_cost * 100:.2f}")
    
    # ════════════════════════════════════════════════════════════════════════
    # Step 7: Preview Output
    # ════════════════════════════════════════════════════════════════════════
    print("\n[Step 7] Context preview...")
    
    # Show first 500 chars
    print(f"\n  [Context Start - First 500 chars]")
    print("  " + "-" * 76)
    preview_start = combined_context[:500].replace("\n", "\n  ")
    print(f"  {preview_start}")
    print("  " + "-" * 76)
    
    # Show query footer
    if "USER QUESTION" in combined_context:
        footer_start = combined_context.find("USER QUESTION")
        footer = combined_context[footer_start:][:300]
        print(f"\n  [Query Footer - Last section]")
        print("  " + "-" * 76)
        footer_preview = footer.replace("\n", "\n  ")
        print(f"  {footer_preview}")
        print("  " + "-" * 76)
    
    # ════════════════════════════════════════════════════════════════════════
    # Final Summary
    # ════════════════════════════════════════════════════════════════════════
    print("\n" + "=" * 80)
    print("✓ ALL TESTS PASSED")
    print("=" * 80)
    
    print(f"\n[Summary]")
    print(f"  ✓ RAG components initialized")
    print(f"  ✓ Combined context built ({len(combined_context):,} chars)")
    print(f"  ✓ System prompt loaded ({len(system_prompt):,} chars)")
    print(f"  ✓ User prompt formatted ({len(user_prompt):,} chars)")
    print(f"  ✓ Structure validated (KPI + Narrative + Query footer)")
    print(f"  ✓ Token budget analyzed (~{total_tokens:,} tokens)")
    print(f"  ✓ Cost estimated (${total_cost:.4f} per query)")
    
    print(f"\n[Ready for LLM]")
    print(f"  Model: {model_name}")
    print(f"  Temperature: {llm_params['temperature']}")
    print(f"  Max tokens: {llm_params['max_tokens']:,}")
    print(f"  Estimated cost: ${total_cost:.4f}")
    
    print("\n" + "=" * 80)
    
    return True

if __name__ == "__main__":
    try:
        success = test_prompt_integration()
        sys.exit(0 if success else 1)
    except Exception as e:
        print(f"\n✗ FATAL ERROR: {e}")
        import traceback
        traceback.print_exc()
        sys.exit(1)

PROMPT INTEGRATION TEST - PRODUCTION QUERY

[Step 1] Initializing RAG components...
[DEBUG] ✓ AWS credentials loaded from aws_credentials.env
  ✓ MLConfig loaded
[DEBUG] ✓ AWS credentials loaded from aws_credentials.env
✓ FilterExtractor initialized with 21 companies
  Using: finrag_dim_companies_21.parquet
✓ FilterExtractor initialized with 21 companies
  Using: finrag_dim_companies_21.parquet
✓ KPI-JSON: Loaded 527 metric records
✓ KPI-JSON: Unique tickers: 2
✓ KPI-JSON: Year range: 2010-2025
  ✓ RAG components initialized
    - Entity Adapter: EntityAdapter
    - Embedder: QueryEmbedderV2
    - Retriever: S3VectorsRetriever
    - Expander: SentenceExpander
    - Assembler: ContextAssembler
    - Metric Pipeline: MetricPipeline

[Step 2] Building combined context with production query...
  Query: For NVIDIA and Microsoft, what were revenue, operating income, and total assets in each year from 20...
  ✓ Combined context built
    - Total length: 43,483 characters
    - Estimated token

SystemExit: 0

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
## not an error. It's a successful exit.