In [None]:
import logging
from datetime import datetime
import pyspark.sql.functions as F
from pyspark.sql import DataFrame

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Function to handle errors
def handle_error(e, context):
    logger.error(f"Error occurred in {context}: {str(e)}")
    raise e

# Load data from Unity Catalog tables
try:
    hana_prod_df = spark.table("catalog.source_db.hana_prod_table")
    text_input_channel_df = spark.table("catalog.source_db.text_input_channel")
    text_input_manual_date_df = spark.table("catalog.source_db.text_input_manual_date")
except Exception as e:
    handle_error(e, "Loading Unity Catalog tables")

# Transformation: DateTime Conversion
try:
    text_input_manual_date_df = text_input_manual_date_df.withColumn(
        "StartTXT", F.date_format(F.col("Start Date"), "yyyyMMdd")
    ).withColumn(
        "EndTXT", F.date_format(F.col("End Date"), "yyyyMMdd")
    )
except Exception as e:
    handle_error(e, "DateTime Conversion")

# Transformation: Field Selection and Renaming
try:
    hana_prod_df = hana_prod_df.select(
        F.col("SO_AUDAT").alias("SO_Date"),
        F.col("FKDAT").alias("BILL_DATE"),
        F.col("WERKS").alias("Whs"),
        F.col("VTWEG").alias("DIST_CHNL_ID"),
        F.col("ZZFINCLASS").alias("FNC_ID"),
        F.col("BEZEK").alias("FNC_DESC"),
        F.col("SOLDTO_KUNNR").alias("SOLDTO"),
        F.col("SHIPTO_KUNNR").alias("SHIPTO"),
        F.col("VGBEL").alias("RFRNC_DOC_NUM"),
        F.col("BILL_ITM_COUNT"),
        F.col("UNIT_LAND_COST"),
        F.col("SO_NETWR"),
        F.col("SO_NETPR"),
        F.col("FKLMG"),
        F.col("FKIMG"),
        F.col("ZTV2"),
        F.col("ZSS2_M2"),
        F.col("ZSSH"),
        F.col("ZSM2"),
        F.col("ZTR2"),
        F.col("ZTR1"),
        F.col("ZSRF"),
        F.col("ZSRM"),
        F.col("ZH01"),
        F.col("ZTHM"),
        F.col("ZSMO"),
        F.col("ZSSM_F_ZTHM"),
        F.col("ZMT1"),
        F.col("ZVC12M1M3NM"),
        F.col("ZMGO"),
        F.col("ZMGB"),
        F.col("ZMGL")
    )
except Exception as e:
    handle_error(e, "Field Selection and Renaming")

# Transformation: Custom Calculations
try:
    hana_prod_df = hana_prod_df.withColumn(
        "Rush_Order_Fee", F.col("ADDTN_TRANS_FEE_OVRRIDE_ZSRO")
    ).withColumn(
        "BIA_SHIP_HNDL_AMT", F.expr(
            "ZTV2 + ZSS2_M2 + ZSSH + ZSM2 + ZTR2 + ZTR1 + ZSRF + ZSRM + ZH01 + ZTHM + ZSMO + ZSSM_F_ZTHM + ZMT1 + ZVC12M1M3NM + ZMGO + ZMGB + ZMGL"
        )
    ).withColumn(
        "COE_SHIP_HNDL_AMT", F.expr(
            "ZTV2 + ZSS2_M2 + ZSSH + ZSM2 + ZTR2 + ZTR1 + ZSRF + ZSRM + ZH01 + ZTHM + ZSMO + ZSSM_F_ZTHM + ZMT1 + ZVC12M1M3NM + ZMGO + ZMGB + ZMGL + Rush_Order_Fee"
        )
    )
except Exception as e:
    handle_error(e, "Custom Calculations")

# Transformation: Multi-Field Formula
try:
    hana_prod_df = hana_prod_df.fillna(0)
except Exception as e:
    handle_error(e, "Multi-Field Formula")

# Transformation: Join
try:
    joined_df = hana_prod_df.join(
        text_input_channel_df,
        hana_prod_df["DIST_CHNL_ID"] == text_input_channel_df["DIST_CHNL_ID"],
        "left"
    )
except Exception as e:
    handle_error(e, "Join")

# Transformation: Union
try:
    union_df = joined_df.union(text_input_manual_date_df)
except Exception as e:
    handle_error(e, "Union")

# Transformation: Filter
try:
    filtered_df = union_df.filter(F.col("null_yn") == "Y")
except Exception as e:
    handle_error(e, "Filter")

# Transformation: Dynamic Rename
try:
    renamed_df = filtered_df.withColumnRenamed("Sum_Invoice_Lines", "Invoice_Lines")
except Exception as e:
    handle_error(e, "Dynamic Rename")

# Output: Write to Unity Catalog target table
try:
    spark.sql("DROP TABLE IF EXISTS catalog.target_db.target_table")
    renamed_df.write.format("delta").mode("overwrite").saveAsTable("catalog.target_db.target_table")
except Exception as e:
    handle_error(e, "Writing to Unity Catalog target table")

logger.info("ETL process completed successfully.")
