In [0]:
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, TimestampType
from datetime import datetime
from pyspark.sql import Row

# Define logging schema explicitly
log_schema = StructType([
    StructField("pipeline_layer", StringType(), True),
    StructField("records_processed", IntegerType(), True),
    StructField("records_rejected", IntegerType(), True),
    StructField("status", StringType(), True),
    StructField("start_ts", TimestampType(), True),
    StructField("end_ts", TimestampType(), True),
    StructField("remarks", StringType(), True)
])

def log_pipeline(layer, processed, rejected, status, remarks=""):
    now = datetime.now()
    log_df = spark.createDataFrame([
        (layer, processed, rejected, status, now, now, remarks)
    ], schema=log_schema)
    
    log_df.write.format("delta").mode("append").saveAsTable("workspace.default.pipeline_logs")

# Bronze Layer Logging (optional, counts existing Delta table if present)
try:
    log_pipeline("Bronze Layer", 0, 0, "STARTED")
    
    bronze_exists = spark.catalog.tableExists("workspace.default.bronze_sales_transactions")
    if bronze_exists:
        bronze_sales_df = spark.read.table("workspace.default.bronze_sales_transactions")
        bronze_count = bronze_sales_df.count()
    else:
        bronze_count = 0

    log_pipeline("Bronze Layer", bronze_count, 0, "COMPLETED")
except Exception as e:
    log_pipeline("Bronze Layer", 0, 0, "FAILED", str(e))
    raise e

# Silver Layer Logging
try:
    log_pipeline("Silver Layer", 0, 0, "STARTED")
    
    silver_sales_df = spark.read.table("workspace.default.silver_sales_transactions")
    quarantine_df = spark.read.table("workspace.default.silver_sales_quarantine")
    
    valid_count = silver_sales_df.count()
    rejected_count = quarantine_df.count()
    
    log_pipeline("Silver Layer", valid_count, rejected_count, "COMPLETED", "Invalid records moved to quarantine")
except Exception as e:
    log_pipeline("Silver Layer", 0, 0, "FAILED", str(e))
    raise e

# Gold Layer Logging
try:
    log_pipeline("Gold Layer", 0, 0, "STARTED")
    
    gold_daily_df = spark.read.table("workspace.default.gold_daily_sales")
    gold_count = gold_daily_df.count()
    
    log_pipeline("Gold Layer", gold_count, 0, "COMPLETED")
except Exception as e:
    log_pipeline("Gold Layer", 0, 0, "FAILED", str(e))
    raise e
