In [None]:
import logging
from pyspark.sql import functions as F
from pyspark.sql.types import StringType, DecimalType

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

try:
    # Load data from Unity Catalog tables
    sales_df = spark.table("catalog.source_db.sales_data")
    line_totals_df = spark.table("catalog.source_db.line_totals")

    # Transformation: DateTime Conversion
    sales_df = sales_df.withColumn("SO_AUDAT", F.to_date(F.col("SO_AUDAT"), "yyyy-MM-dd"))
    line_totals_df = line_totals_df.withColumn("FKDAT", F.to_date(F.col("FKDAT"), "yyyy-MM-dd"))

    # Transformation: Select and Rename Fields
    sales_df = sales_df.select(
        F.col("SO_AUDAT").alias("SO_Date"),
        F.col("WERKS").alias("Whs"),
        F.col("VTWEG").alias("DIST_CHNL_ID"),
        F.col("ZZFINCLASS").alias("FNC_ID"),
        F.col("SOLDTO_KUNNR").alias("SOLDTO"),
        F.col("SHIPTO_KUNNR").alias("SHIPTO"),
        F.col("VGBEL").alias("RFRNC_DOC_NUM")
    )

    # Transformation: Custom Calculations
    sales_df = sales_df.withColumn("Rush_Order_Fee", F.col("ADDTN_TRANS_FEE_OVRRIDE_ZSRO").cast(DecimalType(20, 2)))

    # Transformation: Multi-Field Formula
    sales_df = sales_df.fillna(0)

    # Transformation: Join
    joined_df = sales_df.join(line_totals_df, sales_df["DIST_CHNL_ID"] == line_totals_df["DIST_CHNL_ID"], "inner")

    # Transformation: Union
    union_df = joined_df.union(sales_df)

    # Transformation: Filter
    filtered_df = union_df.filter(F.col("null_yn") == "Y")

    # Transformation: Dynamic Rename
    renamed_df = filtered_df.withColumnRenamed("Sum_Invoice_Lines", "Invoice_Lines")

    # Output: Write to Unity Catalog table
    spark.sql("DROP TABLE IF EXISTS catalog.target_db.final_output")
    renamed_df.write.format("delta").mode("overwrite").saveAsTable("catalog.target_db.final_output")

    logger.info("ETL process completed successfully.")

except Exception as e:
    logger.error(f"Error during ETL process: {e}")
