# Test Unified ETL Pipeline - ConnectWise PSA
This notebook tests the unified ETL framework with all ConnectWise entities

## 1. Install the Unified ETL Packages

In [None]:
# Install the wheel files from lakehouse
import sys
sys.path.append('/lakehouse/default/Files/dist/')

# Or install with pip
# %pip install /lakehouse/default/Files/dist/unified-etl-core-1.0.0-py3-none-any.whl
# %pip install /lakehouse/default/Files/dist/unified-etl-connectwise-1.0.0-py3-none-any.whl

## 2. Test Model Generation for All Entities

In [None]:
# Import all generated models
from unified_etl_connectwise.models import (
    Agreement,
    TimeEntry,
    ExpenseEntry,
    ProductItem,
    PostedInvoice,
    Invoice as UnpostedInvoice,  # UnpostedInvoice uses Invoice model
)
from unified_etl_connectwise.utils.api_utils import get_fields_for_api_call

# Test that all models work
model_mapping = {
    "Agreement": Agreement,
    "TimeEntry": TimeEntry,
    "ExpenseEntry": ExpenseEntry,
    "ProductItem": ProductItem,
    "PostedInvoice": PostedInvoice,
    "UnpostedInvoice": UnpostedInvoice,
}

print("Testing all ConnectWise models:")
print("=" * 50)

for entity_name, model_class in model_mapping.items():
    # Test field generation
    fields = get_fields_for_api_call(model_class, max_depth=2)
    field_count = len(fields.split(','))
    
    # Test Spark schema generation
    spark_schema = model_class.spark_schema()
    
    print(f"\n{entity_name}:")
    print(f"  - API fields: {field_count}")
    print(f"  - Spark schema fields: {len(spark_schema.fields)}")
    print(f"  - Sample fields: {', '.join(fields.split(',')[:5])}...")

## 3. Configure ConnectWise Connection

In [None]:
import os
from notebookutils import mssparkutils

# Get credentials from Key Vault
kv_name = "your-keyvault-name"  # Replace with your Key Vault name

try:
    # Try to get from Key Vault
    cw_company = mssparkutils.credentials.getSecret(kv_name, "CW-COMPANY")
    cw_public_key = mssparkutils.credentials.getSecret(kv_name, "CW-PUBLIC-KEY")
    cw_private_key = mssparkutils.credentials.getSecret(kv_name, "CW-PRIVATE-KEY")
    cw_base_url = mssparkutils.credentials.getSecret(kv_name, "CW-BASE-URL")
except:
    # Fallback to environment variables for testing
    cw_company = os.environ.get("CW_COMPANY", "")
    cw_public_key = os.environ.get("CW_PUBLIC_KEY", "")
    cw_private_key = os.environ.get("CW_PRIVATE_KEY", "")
    cw_base_url = os.environ.get("CW_BASE_URL", "https://api-na.myconnectwise.net/v4_6_release/apis/3.0")

# Configure extractor
config = {
    "base_url": cw_base_url,
    "auth": {
        "type": "api_key",
        "credentials": {
            "company": cw_company,
            "public_key": cw_public_key,
            "private_key": cw_private_key,
        }
    }
}

print(f"Configured for company: {cw_company}")
print(f"Base URL: {cw_base_url}")

## 4. Test Bronze Layer Extraction for All Entities

In [None]:
from unified_etl_connectwise.extract import ConnectWiseExtractor
from datetime import datetime

# Create extractor
extractor = ConnectWiseExtractor(config)

# Define endpoints for each entity
entity_endpoints = {
    "Agreement": "/finance/agreements",
    "TimeEntry": "/time/entries",
    "ExpenseEntry": "/expense/entries",
    "ProductItem": "/procurement/products",
    "PostedInvoice": "/finance/invoices/posted",
    "UnpostedInvoice": "/finance/invoices",
}

# Extract a small sample from each entity
bronze_base_path = "/lakehouse/default/Tables/bronze"
extraction_results = {}

for entity_name, endpoint in entity_endpoints.items():
    print(f"\nExtracting {entity_name} from {endpoint}...")
    
    try:
        # Extract with small page size for testing
        df = extractor.extract(
            endpoint=endpoint,
            page_size=10,  # Small sample
        )
        
        record_count = df.count()
        extraction_results[entity_name] = {
            "success": True,
            "count": record_count,
            "df": df
        }
        
        # Save to bronze
        bronze_path = f"{bronze_base_path}/bronze_cw_{entity_name.lower()}"
        df.write.mode("overwrite").format("delta").save(bronze_path)
        
        print(f"✅ Extracted {record_count} records")
        print(f"   Saved to: {bronze_path}")
        
    except Exception as e:
        extraction_results[entity_name] = {
            "success": False,
            "error": str(e)
        }
        print(f"❌ Failed: {str(e)}")

## 5. Display Sample Data

In [None]:
# Show sample data from successful extractions
for entity_name, result in extraction_results.items():
    if result["success"] and result["count"] > 0:
        print(f"\n{entity_name} Sample (first 3 records):")
        print("=" * 80)
        result["df"].show(3, truncate=False)
        print("\nSchema:")
        result["df"].printSchema()

## 6. Test Silver Layer Validation

In [None]:
from unified_etl_core.extract.base import validate_batch

# Test validation for each entity type
validation_results = {}

for entity_name, result in extraction_results.items():
    if result["success"] and result["count"] > 0:
        print(f"\nValidating {entity_name}...")
        
        # Get sample data as list of dicts
        sample_data = result["df"].limit(5).toPandas().to_dict('records')
        
        # Get model class
        model_class = model_mapping[entity_name]
        
        # Validate
        valid_models, errors = validate_batch(sample_data, model_class)
        
        validation_results[entity_name] = {
            "total": len(sample_data),
            "valid": len(valid_models),
            "errors": len(errors)
        }
        
        print(f"✅ Valid: {len(valid_models)}/{len(sample_data)}")
        if errors:
            print(f"⚠️  Errors: {len(errors)}")
            print(f"   First error: {errors[0]['errors'][0] if errors else 'None'}")

## 7. Summary Report

In [None]:
print("🎉 Unified ETL Pipeline Test Summary")
print("=" * 50)
print("\nModel Generation:")
for entity in model_mapping.keys():
    print(f"  - {entity}: ✅")

print("\nBronze Layer Extraction:")
for entity, result in extraction_results.items():
    if result["success"]:
        print(f"  - {entity}: ✅ ({result['count']} records)")
    else:
        print(f"  - {entity}: ❌ ({result['error']})")

print("\nSilver Layer Validation:")
for entity, result in validation_results.items():
    print(f"  - {entity}: {result['valid']}/{result['total']} valid")

print("\nNext Steps:")
print("  1. Implement full Silver transformations (flattening, standardization)")
print("  2. Add Gold layer with business logic")
print("  3. Configure incremental processing")
print("  4. Add Business Central entities")