In [None]:
# Import required libraries
from datetime import datetime
import time

# Parameters
sync_scope = "FULL"  # FULL, STUDENTS, TEACHING, ASSESSMENT, ATTAINMENT, ATTENDANCE, TIMETABLE, BEHAVIOUR, USERS
skip_raw = False
skip_base = False

print(f"=" * 80)
print(f"G4S API Sync Orchestration")
print(f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Sync Scope: {sync_scope}")
print(f"Skip Raw Layer: {skip_raw}")
print(f"Skip Base Layer: {skip_base}")
print(f"=" * 80)

## Step 1: Raw Layer Ingestion

In [None]:
if not skip_raw:
    print("\n" + "="*80)
    print("STEP 1: Raw Layer Ingestion")
    print("="*80)
    
    raw_start = time.time()
    
    # Run raw layer ingestion notebook
    # In Fabric, use mssparkutils.notebook.run()
    try:
        result = mssparkutils.notebook.run(
            "01_RawLayer_Ingestion",
            timeoutSeconds=3600,  # 1 hour timeout
            args={"sync_scope": sync_scope}
        )
        
        raw_duration = time.time() - raw_start
        print(f"\n✓ Raw layer ingestion completed in {raw_duration:.2f} seconds")
        print(f"Result: {result}")
        
    except Exception as e:
        print(f"\n✗ Raw layer ingestion failed: {str(e)}")
        raise
else:
    print("\nSkipping raw layer ingestion (skip_raw=True)")

## Step 2: Base Layer Transformation

In [None]:
if not skip_base:
    print("\n" + "="*80)
    print("STEP 2: Base Layer Transformation")
    print("="*80)
    
    base_start = time.time()
    
    # Define notebooks to run based on sync scope
    transformation_notebooks = []
    
    if sync_scope == "FULL":
        transformation_notebooks = [
            "02_BaseLayer_Students",
            "03_BaseLayer_Teaching_Assessment",
            # Add more transformation notebooks as needed
        ]
    elif sync_scope == "STUDENTS":
        transformation_notebooks = ["02_BaseLayer_Students"]
    elif sync_scope in ["TEACHING", "ASSESSMENT"]:
        transformation_notebooks = ["03_BaseLayer_Teaching_Assessment"]
    # Add more conditions for other domains
    
    # Run each transformation notebook
    for notebook in transformation_notebooks:
        print(f"\nRunning {notebook}...")
        try:
            result = mssparkutils.notebook.run(
                notebook,
                timeoutSeconds=1800,  # 30 minutes timeout
                args={}
            )
            print(f"✓ {notebook} completed successfully")
        except Exception as e:
            print(f"✗ {notebook} failed: {str(e)}")
            # Continue with other notebooks even if one fails
            continue
    
    base_duration = time.time() - base_start
    print(f"\n✓ Base layer transformation completed in {base_duration:.2f} seconds")
    
else:
    print("\nSkipping base layer transformation (skip_base=True)")

## Step 3: Data Quality and Summary Report

In [None]:
print("\n" + "="*80)
print("STEP 3: Data Quality and Summary Report")
print("="*80)

# Query sync results from tracking table
recent_sync_results = spark.sql("""
    SELECT 
        AcademyCode,
        EndPoint,
        DataSet,
        Result,
        RecordCount,
        LoggedAt,
        Exception
    FROM sec.SyncResults
    WHERE LoggedAt >= DATEADD(hour, -1, GETUTCDATE())
    ORDER BY LoggedAt DESC
""")

# Show results
display(recent_sync_results)

# Count successes and failures
success_count = recent_sync_results.filter(col("Result") == True).count()
failure_count = recent_sync_results.filter(col("Result") == False).count()

print(f"\nSync Results Summary:")
print(f"  Successful syncs: {success_count}")
print(f"  Failed syncs: {failure_count}")

if failure_count > 0:
    print("\n⚠ Some syncs failed. Review the details above.")
    failed_syncs = recent_sync_results.filter(col("Result") == False)
    display(failed_syncs)

In [None]:
# Table row counts
print("\n" + "="*80)
print("Table Record Counts")
print("="*80)

def get_table_count(table_name):
    try:
        if spark.catalog.tableExists(table_name):
            return spark.table(table_name).count()
        else:
            return "N/A"
    except:
        return "Error"

# Base layer tables
base_tables = [
    "base_students",
    "base_education_details",
    "base_student_attributes",
    "base_departments",
    "base_subjects",
    "base_groups",
    "base_group_students",
    "base_teachers",
    "base_markbooks",
    "base_marksheet_grades",
    "base_markslot_marks"
]

print("\nBase Layer Tables:")
for table in base_tables:
    count = get_table_count(table)
    print(f"  {table:40} {count:>15}")

## Step 4: Optimize Delta Tables

In [None]:
print("\n" + "="*80)
print("STEP 4: Optimize Delta Tables")
print("="*80)

# Optimize base layer tables
for table in base_tables:
    try:
        if spark.catalog.tableExists(table):
            print(f"Optimizing {table}...")
            spark.sql(f"OPTIMIZE {table} ZORDER BY (Academy, DataSet)")
            print(f"✓ Optimized {table}")
    except Exception as e:
        print(f"✗ Failed to optimize {table}: {str(e)}")

## Completion Summary

In [None]:
end_time = datetime.now()
print("\n" + "="*80)
print("G4S API SYNC COMPLETE")
print("="*80)
print(f"Completed: {end_time.strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Scope: {sync_scope}")
print(f"\nNext Steps:")
print("  1. Review sync results in sec.SyncResults table")
print("  2. Validate data quality in base layer tables")
print("  3. Create/refresh any downstream semantic models or reports")
print("="*80)