# Test Unified ETL Pipeline - ConnectWise PSA
This notebook tests the unified ETL framework with all ConnectWise entities

## 1. Install the Unified ETL Packages

In [None]:
# Or install with pip
%pip install /lakehouse/default/Files/unified_etl_core-1.0.0-py3-none-any.whl
%pip install /lakehouse/default/Files/unified_etl_connectwise-1.0.0-py3-none-any.whl

## 2. Test Model Generation for All Entities

In [None]:
# Import all generated models
from unified_etl_connectwise.models import (
    Agreement,
    TimeEntry,
    ExpenseEntry,
    ProductItem,
    PostedInvoice,
    Invoice as UnpostedInvoice,  # UnpostedInvoice uses Invoice model
)
from unified_etl_connectwise.utils.api_utils import get_fields_for_api_call

# Test that all models work
model_mapping = {
    "Agreement": Agreement,
    "TimeEntry": TimeEntry,
    "ExpenseEntry": ExpenseEntry,
    "ProductItem": ProductItem,
    "PostedInvoice": PostedInvoice,
    "UnpostedInvoice": UnpostedInvoice,
}

print("Testing all ConnectWise models:")
print("=" * 50)

for entity_name, model_class in model_mapping.items():
    # Test field generation
    fields = get_fields_for_api_call(model_class, max_depth=2)
    field_count = len(fields.split(","))

    # Test Spark schema generation
    spark_schema = model_class.model_spark_schema()

    print(f"\n{entity_name}:")
    print(f"  - API fields: {field_count}")
    print(f"  - Spark schema fields: {len(spark_schema.fields)}")
    print(f"  - Sample fields: {', '.join(fields.split(',')[:5])}...")

## 3. Configure ConnectWise Connection

In [3]:
# Set credentials directly
import os

os.environ["CW_AUTH_USERNAME"] = "thekking+yemGyHDPdJ1hpuqx"
os.environ["CW_AUTH_PASSWORD"] = "yMqpe26Jcu55FbQk"
os.environ["CW_CLIENTID"] = "c7ea92d2-eaf5-4bfb-a09c-58d7f9dd7b81"
os.environ["CW_BASE_URL"] = "https://verk.thekking.is/v4_6_release/apis/3.0"

# Configure extractor (even though it doesn't use these values, it expects a config)
config = {
    "base_url": os.environ["CW_BASE_URL"],
    "auth": {
        "type": "api_key",
        "credentials": {
            "company": "thekking",
            "public_key": "yemGyHDPdJ1hpuqx",
            "private_key": "yMqpe26Jcu55FbQk",
            "client_id": "c7ea92d2-eaf5-4bfb-a09c-58d7f9dd7b81",
        },
    },
}

print(f"Configured for company: thekking")
print(f"Base URL: {os.environ['CW_BASE_URL']}")

StatementMeta(, 12fbf8cc-bd3d-4552-b22c-8efc534d556f, 12, Finished, Available, Finished)

Configured for company: thekking
Base URL: https://verk.thekking.is/v4_6_release/apis/3.0


## 4. Test Bronze Layer Extraction for All Entities

In [None]:
from unified_etl_connectwise.extract import ConnectWiseExtractor
from unified_etl_core.storage.fabric_delta import write_to_delta
from datetime import datetime

# Create extractor - it will get spark from the global context
extractor = ConnectWiseExtractor(config)

# Define endpoints for each entity
entity_endpoints = {
    "Agreement": "/finance/agreements",
    "TimeEntry": "/time/entries",
    "ExpenseEntry": "/expense/entries",
    "ProductItem": "/procurement/products",
    "PostedInvoice": "/finance/invoices/posted",
    "UnpostedInvoice": "/finance/invoices",
}

# Extract a small sample from each entity
bronze_base_path = "Tables/bronze"
extraction_results = {}

for entity_name, endpoint in entity_endpoints.items():
    print(f"\nExtracting {entity_name} from {endpoint}...")

    try:
        # Extract with small page size for testing
        df = extractor.extract(
            endpoint=endpoint,
            page_size=1000,  # Small sample
        )

        record_count = df.count()
        extraction_results[entity_name] = {"success": True, "count": record_count, "df": df}

        # Use the framework's write_to_delta function which handles table creation
        path, written_count = write_to_delta(
            df=df,
            entity_name=f"bronze_cw_{entity_name.lower()}",
            base_path=bronze_base_path,
            mode="overwrite",
            add_timestamp=True,
        )

        print(f"✅ Extracted {record_count} records")
        print(f"   Saved to: {path}")

    except Exception as e:
        extraction_results[entity_name] = {"success": False, "error": str(e)}
        print(f"❌ Failed: {str(e)}")

## 5. Display Sample Data

In [5]:
# Show sample data from successful extractions
for entity_name, result in extraction_results.items():
    if result["success"] and result["count"] > 0:
        print(f"\n{entity_name} Sample (first 3 records):")
        print("=" * 80)
        result["df"].show(3, truncate=False)
        print("\nSchema:")
        result["df"].printSchema()

StatementMeta(, 12fbf8cc-bd3d-4552-b22c-8efc534d556f, 14, Finished, Available, Finished)


Agreement Sample (first 3 records):
+---+-----------------+------------------------------+-----------------------------------------------+---------------------------------+----------------------+------------------+------------------+---------------+----------+------------------------------+---------------------------+--------------------+----------------------+-------------------+-------------------+----------------+-----------+-------------+-------------+---------------+----+---------+-------------+----------------+----------------+----------------+------------------------+-----------+------------------+---------------------+---------------------+-------------+---------------+-------------+-----------+-----+--------------+------------+----------------+---------------------+--------------+---------------+------------------+---------------+------------+--------------+-----------------------------------------------+---------------------------------+----------------------+----------+----

## 6. Test Silver Layer Validation

In [6]:
from unified_etl_core.extract.base import validate_batch

# Test validation for each entity type
validation_results = {}

for entity_name, result in extraction_results.items():
    if result["success"] and result["count"] > 0:
        print(f"\nValidating {entity_name}...")

        # Get sample data as list of dicts
        sample_data = result["df"].limit(5).toPandas().to_dict("records")

        # Get model class
        model_class = model_mapping[entity_name]

        # Validate
        valid_models, errors = validate_batch(sample_data, model_class)

        validation_results[entity_name] = {
            "total": len(sample_data),
            "valid": len(valid_models),
            "errors": len(errors),
        }

        print(f"✅ Valid: {len(valid_models)}/{len(sample_data)}")
        if errors:
            print(f"⚠️  Errors: {len(errors)}")
            print(f"   First error: {errors[0]['errors'][0] if errors else 'None'}")

StatementMeta(, 12fbf8cc-bd3d-4552-b22c-8efc534d556f, 15, Finished, Available, Finished)


Validating Agreement...
✅ Valid: 0/5
⚠️  Errors: 5
   First error: {'type': 'finite_number', 'loc': ('expiredDays',), 'msg': 'Input should be a finite number', 'input': nan, 'url': 'https://errors.pydantic.dev/2.11/v/finite_number'}

Validating TimeEntry...
✅ Valid: 0/5
⚠️  Errors: 5
   First error: {'type': 'uuid_parsing', 'loc': ('mobileGuid',), 'msg': 'Input should be a valid UUID, invalid character: expected an optional prefix of `urn:uuid:` followed by [0-9a-fA-F-], found `_` at 2', 'input': '{__class__=uuid.UUID, is_safe=0, int=162551351200316662981905439210932864332}', 'ctx': {'error': 'invalid character: expected an optional prefix of `urn:uuid:` followed by [0-9a-fA-F-], found `_` at 2'}, 'url': 'https://errors.pydantic.dev/2.11/v/uuid_parsing'}

Validating ProductItem...
✅ Valid: 2/5
⚠️  Errors: 3
   First error: {'type': 'finite_number', 'loc': ('applyToId',), 'msg': 'Input should be a finite number', 'input': nan, 'url': 'https://errors.pydantic.dev/2.11/v/finite_number'}


Validation failed for TimeEntry ID 1
Validation failed for TimeEntry ID 2
Validation failed for TimeEntry ID 4
Validation failed for TimeEntry ID 5
Validation failed for TimeEntry ID 6
Found 5 validation errors in TimeEntry data


## 7. Summary Report

In [7]:
print("🎉 Unified ETL Pipeline Test Summary")
print("=" * 50)
print("\nModel Generation:")
for entity in model_mapping.keys():
    print(f"  - {entity}: ✅")

print("\nBronze Layer Extraction:")
for entity, result in extraction_results.items():
    if result["success"]:
        print(f"  - {entity}: ✅ ({result['count']} records)")
    else:
        print(f"  - {entity}: ❌ ({result['error']})")

print("\nSilver Layer Validation:")
for entity, result in validation_results.items():
    print(f"  - {entity}: {result['valid']}/{result['total']} valid")

print("\nNext Steps:")
print("  1. Implement full Silver transformations (flattening, standardization)")
print("  2. Add Gold layer with business logic")
print("  3. Configure incremental processing")
print("  4. Add Business Central entities")

StatementMeta(, 12fbf8cc-bd3d-4552-b22c-8efc534d556f, 16, Finished, Available, Finished)

🎉 Unified ETL Pipeline Test Summary

Model Generation:
  - Agreement: ✅
  - TimeEntry: ✅
  - ExpenseEntry: ✅
  - ProductItem: ✅
  - PostedInvoice: ✅
  - UnpostedInvoice: ✅

Bronze Layer Extraction:
  - Agreement: ✅ (1933 records)
  - TimeEntry: ✅ (535963 records)
  - ExpenseEntry: ✅ (14188 records)
  - ProductItem: ✅ (379748 records)
  - PostedInvoice: ✅ (0 records)
  - UnpostedInvoice: ✅ (37294 records)

Silver Layer Validation:
  - Agreement: 0/5 valid
  - TimeEntry: 0/5 valid
  - ExpenseEntry: 0/5 valid
  - ProductItem: 0/5 valid
  - UnpostedInvoice: 2/5 valid

Next Steps:
  1. Implement full Silver transformations (flattening, standardization)
  2. Add Gold layer with business logic
  3. Configure incremental processing
  4. Add Business Central entities
