# Infrastructure Test Notebook

This notebook verifies that all components of the Spend Categorization application are properly configured and working.

## What This Notebook Tests

1. **Configuration Loading** - Verify `config.yaml` loads correctly
2. **Backend Initialization** - Test MockBackend (test mode) and optionally LakebaseBackend (prod mode)
3. **Query Functions** - Validate search, flagged invoices, and category retrieval
4. **Correction Functions** - Test the Type 2 SCD write operations
5. **Connection Verification** - For prod mode, verify Lakebase connectivity

## Running This Notebook

- **Test Mode**: Run all cells - uses in-memory mock data, no external dependencies
- **Prod Mode**: Update `config.yaml` to `mode: prod` first, then run to verify Lakebase connection


## 1. Setup and Imports


In [None]:
import sys
from pathlib import Path

# Add src to path for imports
sys.path.insert(0, str(Path.cwd() / "src"))

# Import all required modules
from invoice_app.config import load_config, AppConfig, LakebaseConfig
from invoice_app.database import (
    init_backend,
    get_backend,
    reset_backend,
    MockBackend,
    LakebaseBackend,
)
from invoice_app.queries import (
    search_invoices,
    get_flagged_invoices,
    get_invoices_by_ids,
    get_available_categories,
)
from invoice_app.corrections import (
    write_correction,
    write_corrections_batch,
    initialize_corrections_table,
    initialize_invoices_table,
)

print("‚úÖ All imports successful")


## 2. Configuration Loading

Load and validate the configuration from `config.yaml`.


In [None]:
# Load configuration
config = load_config()
app_config = AppConfig.from_dict(config)

print(f"üìã Configuration Summary")
print(f"{'='*50}")
print(f"Mode:                    {app_config.mode}")
print(f"Is Test Mode:            {app_config.is_test_mode}")
print(f"Is Prod Mode:            {app_config.is_prod_mode}")
print(f"Invoices Table:          {app_config.invoices_table}")
print(f"Corrections Table:       {app_config.corrections_table}")
print(f"Page Size:               {app_config.page_size}")
print(f"Low Confidence Threshold: {app_config.low_confidence_threshold}")
print(f"{'='*50}")

if app_config.is_prod_mode:
    lakebase_config = LakebaseConfig.from_dict(config)
    print(f"\nüóÑÔ∏è Lakebase Configuration")
    print(f"{'='*50}")
    print(f"Instance Name:           {lakebase_config.instance_name}")
    print(f"Database:                {lakebase_config.database}")
    print(f"User:                    {lakebase_config.user}")
    print(f"Schema:                  {lakebase_config.schema}")
    print(f"{'='*50}")
else:
    lakebase_config = None
    print("\nüß™ Running in TEST mode - no Lakebase configuration needed")

print("\n‚úÖ Configuration loaded successfully")


## 3. Backend Initialization

Initialize the appropriate database backend based on the configured mode.


In [None]:
# Reset any existing backend state
reset_backend()

# Initialize the backend
backend = init_backend(app_config, lakebase_config)

print(f"üîß Backend Initialization")
print(f"{'='*50}")
print(f"Backend Type:            {type(backend).__name__}")
print(f"Is Connected:            {backend.is_connected()}")

if isinstance(backend, MockBackend):
    print(f"Mock Invoices Count:     {len(backend._invoices)}")
    print(f"Mock Categories Count:   {len(backend._categories)}")
    print(f"Mock Corrections Count:  {len(backend._corrections)}")

print(f"{'='*50}")
print("\n‚úÖ Backend initialized successfully")


## 4. Query Functions Test

Test all query functions to ensure they work correctly with the backend.


In [None]:
print("üîç Testing Query Functions")
print(f"{'='*50}")

# Test 1: Get available categories
categories = get_available_categories(app_config, backend)
print(f"\n1. get_available_categories()")
print(f"   Categories found: {len(categories)}")
print(f"   Sample: {categories[:5]}")
assert len(categories) > 0, "Should have at least one category"
print("   ‚úÖ PASSED")

# Test 2: Search invoices
search_results = search_invoices(app_config, "Dell", limit=10, backend=backend)
print(f"\n2. search_invoices('Dell')")
print(f"   Results found: {len(search_results)}")
if not search_results.empty:
    print(f"   Columns: {list(search_results.columns)}")
print("   ‚úÖ PASSED")

# Test 3: Get flagged invoices
flagged = get_flagged_invoices(app_config, limit=10, backend=backend)
print(f"\n3. get_flagged_invoices()")
print(f"   Flagged invoices: {len(flagged)}")
if not flagged.empty:
    avg_confidence = flagged["confidence_score"].mean()
    print(f"   Avg confidence score: {avg_confidence:.2f}")
print("   ‚úÖ PASSED")

# Test 4: Get invoices by IDs
if isinstance(backend, MockBackend):
    test_ids = backend._invoices["invoice_id"].head(3).tolist()
else:
    # For prod mode, get some IDs from search results
    test_ids = search_results["invoice_id"].head(3).tolist() if not search_results.empty else []

if test_ids:
    invoices_by_id = get_invoices_by_ids(app_config, test_ids, backend=backend)
    print(f"\n4. get_invoices_by_ids({test_ids[:2]}...)")
    print(f"   Invoices retrieved: {len(invoices_by_id)}")
    assert len(invoices_by_id) == len(test_ids), "Should retrieve all requested invoices"
    print("   ‚úÖ PASSED")
else:
    print("\n4. get_invoices_by_ids() - SKIPPED (no test IDs available)")

print(f"\n{'='*50}")
print("‚úÖ All query functions working correctly")


## 5. Correction Functions Test

Test the Type 2 SCD correction functions.


In [None]:
print("‚úèÔ∏è Testing Correction Functions")
print(f"{'='*50}")

# Get initial correction count (for MockBackend)
if isinstance(backend, MockBackend):
    initial_count = len(backend._corrections)
    print(f"\nInitial corrections count: {initial_count}")

# Test 1: Write single correction
print(f"\n1. write_correction()")
write_correction(
    app_config,
    invoice_id="TEST-INV-001",
    transaction_ids=["TEST-TXN-001"],
    corrected_category="Hardware",
    comment="Infrastructure test correction",
    corrected_by="test_notebook",
    backend=backend,
)
print("   Single correction written")
print("   ‚úÖ PASSED")

# Test 2: Write batch corrections
print(f"\n2. write_corrections_batch()")
batch_corrections = [
    {
        "invoice_id": "TEST-INV-002",
        "transaction_ids": ["TEST-TXN-002"],
        "corrected_category": "Software",
        "comment": "Batch test 1",
        "corrected_by": "test_notebook",
    },
    {
        "invoice_id": "TEST-INV-003",
        "transaction_ids": ["TEST-TXN-003"],
        "corrected_category": "Cloud Services",
        "comment": "Batch test 2",
        "corrected_by": "test_notebook",
    },
]
write_corrections_batch(app_config, batch_corrections, backend=backend)
print(f"   Batch of {len(batch_corrections)} corrections written")
print("   ‚úÖ PASSED")

# Verify corrections were written (for MockBackend)
if isinstance(backend, MockBackend):
    final_count = len(backend._corrections)
    new_corrections = final_count - initial_count
    print(f"\nüìä Verification:")
    print(f"   New corrections added: {new_corrections}")
    assert new_corrections == 3, "Should have added 3 corrections"

print(f"\n{'='*50}")
print("‚úÖ All correction functions working correctly")


## 6. Lakebase Connection Test (Prod Mode Only)

This section tests the Lakebase PostgreSQL connection. Only relevant when running in prod mode.


In [None]:
if app_config.is_prod_mode and isinstance(backend, LakebaseBackend):
    print("üóÑÔ∏è Testing Lakebase Connection")
    print(f"{'='*50}")
    
    # Test connection
    print(f"\n1. Connection Test")
    print(f"   Instance: {lakebase_config.instance_name}")
    print(f"   Database: {lakebase_config.database}")
    
    try:
        is_connected = backend.is_connected()
        print(f"   Connected: {is_connected}")
        if is_connected:
            print("   ‚úÖ PASSED")
        else:
            print("   ‚ùå FAILED - Cannot connect to Lakebase")
    except Exception as e:
        print(f"   ‚ùå FAILED - {e}")
    
    # Test table initialization (if tables don't exist)
    print(f"\n2. Table Initialization Test")
    try:
        initialize_invoices_table(app_config, backend)
        print(f"   Invoices table: Ready")
        initialize_corrections_table(app_config, backend)
        print(f"   Corrections table: Ready")
        print("   ‚úÖ PASSED")
    except Exception as e:
        print(f"   ‚ùå FAILED - {e}")
    
    print(f"\n{'='*50}")
else:
    print("‚è≠Ô∏è Skipping Lakebase tests (running in test mode)")
    print("   To test Lakebase connection, set 'app.mode: prod' in config.yaml")


## 7. Summary

Final summary of all infrastructure tests.


In [None]:
print("=" * 60)
print("üéâ INFRASTRUCTURE TEST SUMMARY")
print("=" * 60)
print()
print(f"  Mode:              {app_config.mode.upper()}")
print(f"  Backend:           {type(backend).__name__}")
print(f"  Connected:         {backend.is_connected()}")
print(f"  Categories:        {len(categories)} available")
print(f"  Search Results:    {len(search_results)} found for 'Dell'")
print(f"  Flagged Invoices:  {len(flagged)} found")
print()
print("=" * 60)

if app_config.is_test_mode:
    print("""
‚úÖ All tests passed in TEST mode!

Next steps to deploy to production:
1. Create a Lakebase instance in Databricks
2. Update config.yaml:
   - Set app.mode to 'prod'
   - Set lakebase.instance_name to your instance
3. Run: uv run python src/setup_database.py --init-tables
4. Re-run this notebook to verify prod connectivity
""")
else:
    print("""
‚úÖ All tests passed in PROD mode!

Your Lakebase infrastructure is ready. You can now:
1. Load sample data: uv run python src/setup_database.py --load-sample-data
2. Run the app: uv run streamlit run app.py
""")
