# Fabric Fast-Track Data Quality Validation

This notebook implements Great Expectations data quality checks for the Fast-Track accelerator.

In [None]:
# Install and import required libraries
import great_expectations as gx
import pandas as pd
import os
from datetime import datetime

In [None]:
# Initialize Great Expectations context
context = gx.get_context()

# Configure for Fabric Lakehouse
WORKSPACE_NAME = os.getenv('WORKSPACE_NAME', 'FastTrack-Test-Workspace')
LAKEHOUSE_NAME = 'FastTrack_Lakehouse'

In [None]:
def validate_bronze_layer():
    # Validate bronze layer data quality
    customers_df = spark.read.table(f'{LAKEHOUSE_NAME}.bronze_customers')
    
    # Create expectations suite
    suite = context.create_expectation_suite('bronze_layer_suite', overwrite_existing=True)
    
    # Add basic validations
    validator = context.get_validator(
        batch_request={'batch_data': customers_df},
        expectation_suite_name='bronze_layer_suite'
    )
    
    validator.expect_table_row_count_to_be_between(min_value=1000)
    validator.expect_column_to_exist('customer_id')
    validator.expect_column_values_to_not_be_null('customer_id')
    
    return validator.validate()

In [None]:
# Run validation
print(f'Starting data quality validation at {datetime.now()}')

try:
    result = validate_bronze_layer()
    
    if result.success:
        print('✅ All data quality checks passed')
    else:
        print('❌ Data quality validation failed')
        
except Exception as e:
    print(f'Error during validation: {str(e)}')