# Capitaux 03: Full Pipeline Testing

**Purpose**: Test complete Capitaux pipeline (AZ + AZEC → Silver)

**Tests**:
1. Run AZCapitauxProcessor
2. Run AZECCapitauxProcessor  
3. Verify output datasets

---

In [None]:
import sys
from pathlib import Path

project_root = Path.cwd().parent.parent
sys.path.insert(0, str(project_root))
print(f"Project root: {project_root}")

In [None]:
from pyspark.sql import SparkSession
from azfr_fsspec_utils import fspath
import azfr_fsspec_abfs

azfr_fsspec_abfs.use()

spark = SparkSession.builder \
    .appName("Capitaux_Pipeline_Testing") \
    .getOrCreate()

print(f"✓ Spark {spark.version}")

## 1. Initialize Processors

In [None]:
from utils.loaders.config_loader import ConfigLoader
from utils.logger import PipelineLogger
from src.processors.capitaux_processors.az_capitaux_processor import AZCapitauxProcessor
from src.processors.capitaux_processors.azec_capitaux_processor import AZECCapitauxProcessor

config = ConfigLoader(str(project_root / "config" / "config.yml"))
logger = PipelineLogger("capitaux_test")

VISION = "202509"
print(f"Testing pipeline for vision: {VISION}")

## 2. Run AZ Capitaux Processor

In [None]:
try:
    az_processor = AZCapitauxProcessor(spark, config, logger)
    
    # CORRECTED: Use read() + transform() pattern
    print("Step 1: Reading AZ bronze data...")
    df_az = az_processor.read(VISION)
    print(f"✓ Read: {df_az.count():,} rows")
    
    print("\nStep 2: Transforming AZ data...")
    df_az_transformed = az_processor.transform(df_az, VISION)
    print(f"✓ AZ Capitaux: {df_az_transformed.count():,} rows")
    
    # Show sample
    df_az_transformed.select('nopol', 'smp_100_ind', 'lci_100_ind').show(5)
    
except Exception as e:
    print(f"✗ AZ Processor error: {e}")
    import traceback
    traceback.print_exc()
    df_az_transformed = None

## 3. Run AZEC Capitaux Processor

In [None]:
try:
    azec_processor = AZECCapitauxProcessor(spark, config, logger)
    
    # CORRECTED: Use read() + transform() pattern
    print("Step 1: Reading AZEC bronze data...")
    df_azec = azec_processor.read(VISION)
    print(f"✓ Read CAPITXCU: {df_azec.count():,} rows")
    
    print("\nStep 2: Transforming AZEC data...")
    df_azec_transformed = azec_processor.transform(df_azec, VISION)
    print(f"✓ AZEC Capitaux: {df_azec_transformed.count():,} rows")
    
    # Show sample
    df_azec_transformed.select('nopol', 'cdprod', 'smp_100_ind', 'lci_100_ind').show(5)
    
except Exception as e:
    print(f"⚠ AZEC Processor (expected if CAPITXCU missing): {e}")
    df_azec_transformed = None

## 4. Verify Output Schemas

In [None]:
if df_az_transformed is not None:
    print("AZ Schema:")
    print(f"  Columns: {len(df_az_transformed.columns)}")
    print(f"  Capital columns: {[c for c in df_az_transformed.columns if '100' in c][:5]}")
    
if df_azec_transformed is not None:
    print("\nAZEC Schema:")
    print(f"  Columns: {len(df_azec_transformed.columns)}")
    print(f"  Capital columns: {[c for c in df_azec_transformed.columns if '100' in c][:5]}")

## 5. Optional: Write to Silver (Manual)

In [None]:
# Uncomment to write outputs manually
# if df_az_transformed is not None:
#     az_processor.write(df_az_transformed, VISION)
#     print("✓ AZ data written to silver")
# 
# if df_azec_transformed is not None:
#     azec_processor.write(df_azec_transformed, VISION)
#     print("✓ AZEC data written to silver")

## Summary

In [None]:
print("="*60)
print("CAPITAUX PIPELINE TESTING COMPLETE")
print("="*60)
print(f"\nVision: {VISION}")
print(f"AZ Capitaux:   {'✓' if df_az_transformed is not None else '✗'}")
print(f"AZEC Capitaux: {'✓' if df_azec_transformed is not None else '⚠ (optional)'}")

print("\nKey learnings:")
print("  1. Use read() + transform() for testing (run() writes directly)")
print("  2. AZ: ipf_az file_group (combines IPFE16 + IPFE36)")
print("  3. AZEC: capitxcu_azec + incendcu_azec")
print("  4. Both create indexed (_ind) and non-indexed capitals")
print("\n→ Run production: python main.py --vision 202509 --component capitaux")