In [None]:
import logging
from datetime import datetime, timedelta
from pyspark.sql import functions as F
from pyspark.sql.types import StringType, DateType, DecimalType, DoubleType, IntegerType

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Helper function to calculate date ranges
def calculate_date_ranges():
    today = datetime.today()
    prior_week_start = today - timedelta(days=today.weekday() + 7)
    prior_week_end = today - timedelta(days=today.weekday() + 1)
    yesterday = today - timedelta(days=1)
    p1m_start = (today.replace(day=1) - timedelta(days=1)).replace(day=1)
    p1m_end = today.replace(day=1) - timedelta(days=1)
    p2m_start = (today.replace(day=1) - timedelta(days=1)).replace(day=1) - timedelta(days=1)
    p2m_end = (today.replace(day=1) - timedelta(days=1)).replace(day=1) - timedelta(days=1)
    p3m_start = (today.replace(day=1) - timedelta(days=1)).replace(day=1) - timedelta(days=2)
    p3m_end = (today.replace(day=1) - timedelta(days=1)).replace(day=1) - timedelta(days=2)
    return {
        "prior_week_start": prior_week_start.strftime('%Y-%m-%d'),
        "prior_week_end": prior_week_end.strftime('%Y-%m-%d'),
        "yesterday": yesterday.strftime('%Y-%m-%d'),
        "today": today.strftime('%Y-%m-%d'),
        "p1m_start": p1m_start.strftime('%Y-%m-%d'),
        "p1m_end": p1m_end.strftime('%Y-%m-%d'),
        "p2m_start": p2m_start.strftime('%Y-%m-%d'),
        "p2m_end": p2m_end.strftime('%Y-%m-%d'),
        "p3m_start": p3m_start.strftime('%Y-%m-%d'),
        "p3m_end": p3m_end.strftime('%Y-%m-%d'),
    }

# Load data from Unity Catalog tables
try:
    hana_billing_df = spark.table("genai_demo.altreyx_demo.tableupdated_new_1000")
    tdmedpod_df = spark.table("genai_demo.altreyx_demo.tdmedpod_new_1")
    logger.info("Data loaded successfully from Unity Catalog tables.")
except Exception as e:
    logger.error(f"Error loading data from Unity Catalog tables: {e}")
    raise

# Apply transformations
try:
    # Dynamic Input: Filter data for specific date ranges
    date_ranges = calculate_date_ranges()
    hana_billing_df = hana_billing_df.filter(
        (F.col("FKDAT") >= date_ranges["p1m_start"]) & (F.col("FKDAT") <= date_ranges["p1m_end"])
    )
    tdmedpod_df = tdmedpod_df.filter(
        (F.col("BILL_DTE") >= date_ranges["p1m_start"]) & (F.col("BILL_DTE") <= date_ranges["p1m_end"])
    )

    # DateTime Conversion
    hana_billing_df = hana_billing_df.withColumn("SO_AUDAT", F.to_date("SO_AUDAT", "yyyy-MM-dd"))
    tdmedpod_df = tdmedpod_df.withColumn("BILL_DTE", F.to_date("BILL_DTE", "yyyy-MM-dd"))

    # Field Selection and Renaming
    hana_billing_df = hana_billing_df.select(
        F.col("SO_AUDAT").alias("SO_Date"),
        F.col("FKDAT").alias("BILL_DATE"),
        F.col("WERKS").alias("Whs"),
        F.col("VTWEG").alias("DIST_CHNL_ID"),
        F.col("ZZFINCLASS").alias("FNC_ID"),
        F.col("BEZEK").alias("FNC_DESC"),
        F.col("SOLDTO_KUNNR").alias("SOLDTO"),
        F.col("SHIPTO_KUNNR").alias("SHIPTO"),
        F.col("VGBEL").alias("RFRNC_DOC_NUM"),
        F.col("BILL_ITM_COUNT"),
        F.col("UNIT_LAND_COST"),
        F.col("EXTND_LAND_CST"),
        F.col("XTND_INVOICE_PRICE_PER_ITM"),
        F.col("EXTND_FNL_PRICE"),
        F.col("SO_NETWR"),
        F.col("SO_NETPR"),
        F.col("EXTND_SERVC_FEE"),
        F.col("EXTND_SHPNG_HNDLNG"),
        F.col("XTND_STATE_TX"),
        F.col("EXTND_LCL_TX"),
        F.col("FKLMG"),
        F.col("FKIMG"),
        F.col("RF_TRNSCT_ABSRB_CHRGE_AMT_ZTV2"),
        F.col("VNDR_DRP_SHP_ABSRB_AMT_ZSS2_M2"),
        F.col("EX_HDNL_DRP_SHP_VAL_ZSSH"),
        F.col("RF_VENDOR_MOC_ABSRB_AMT_ZSM2"),
        F.col("RF_TRNSCT_ABSORB_CHARGE_AMT_ZTR2"),
        F.col("TRANS_CHRGS_FRT_ZTR1"),
        F.col("RESTOOCKING_FEE_ZSRF"),
        F.col("RESTOCK_FEE_MANUAL_ZSRM"),
        F.col("SPCL_HNDLNG_CHRG_FX_ZH01"),
        F.col("VNDR_HNDLNG_AMT_ZTHM"),
        F.col("MIN_ORDER_CHARGE_USD_ZSMO"),
        F.col("MOC_DROP_SHP_VAL_ZSSM"),
        F.col("FUEL_SURCHARGE_ZSDF"),
        F.col("FUEL_SURCHARGE_OVERIDE_ZSDO"),
        F.col("VENDR_TRANS_CHRG_FRT_ZTV1"),
        F.col("RF_VNDR_DRP_SHP_FEE_AMT_ZSSM_F_ZTHM"),
        F.col("MARKUP_VENDOR_TRANS_FEE_AMT_ZMT1"),
        F.col("MARK_UP_DND_ZMVT"),
        F.col("ADDTN_TRANS_FEE_ZSRH"),
        F.col("ADDTN_TRANS_FEE_OVRRIDE_ZSRO"),
        F.col("DROPSHIP_FEE_VALUE_ZSSF"),
        F.col("XTND_N_VLNK_SVC_FEE_ZVC12M1M3NM"),
        F.col("ONSITE_REP_FEE_DLR_ZMGO"),
        F.col("BULK_DIST_FEE_DLR_ZMGB"),
        F.col("HLDY_DLVRY_FEE_DLR_ZMGD"),
        F.col("LOW_UOM_DIST_FEE_DLR_ZMGL"),
        F.col("GOV_DIST_FEE_DLR_ZMGN"),
        F.col("BCF_RF_EXTND_VLINK_SVC_FEE"),
    )

    tdmedpod_df = tdmedpod_df.select(
        F.col("RunDte").alias("Run Date"),
        F.col("SO_Date"),
        F.col("BILL_DTE").alias("BILL_DATE"),
        F.col("Whs"),
        F.col("DIST_CHNL_ID"),
        F.col("FNC_ID"),
        F.col("FNC_DESC"),
        F.col("SOLDTO"),
        F.col("SHIPTO"),
        F.col("RFRNC_DOC_NUM"),
        F.col("Rush_Order_Fee"),
        F.col("COE_SHIP_HNDL_AMT"),
    )

    # Custom Calculations
    hana_billing_df = hana_billing_df.withColumn(
        "BIA_SHIP_HNDL_AMT",
        F.expr(
            "[Sum_Trans_Charge_Amt]+[Sum_RESTOCK_Fee]+[Sum_Special_Hndl_Amt]+[Sum_Vendor_Hndl_Amt]+[Sum_MOC_Amt]+[Sum_Fuel_Surcharge]"
        ),
    ).withColumn(
        "COE_SHIP_HNDL_AMT",
        F.expr(
            "[Sum_Trans_Charge_Amt]+[Sum_RESTOCK_Fee]+[Sum_Special_Hndl_Amt]+[Sum_Vendor_Hndl_Amt]+[Sum_MOC_Amt]+[Sum_Fuel_Surcharge]+[Rush_Order_Fee]+[VENDR_TRANS_CHRG_FRT_ZTV1]+[MARKUP_VENDOR_TRANS_FEE_AMT_ZMT1]"
        ),
    ).withColumn(
        "Invoice_Sales",
        F.expr("[Sum_EXT_FINAL_PRICE]"),
    )

    # Data Integration: Join and Union
    joined_df = hana_billing_df.join(
        tdmedpod_df,
        hana_billing_df["DIST_CHNL_ID"] == tdmedpod_df["DIST_CHNL_ID"],
        "inner",
    )

    # Data Filtering
    filtered_df = joined_df.filter(F.col("null_yn") == "Y")

    # Dynamic Renaming
    renamed_df = filtered_df.select(
        [F.col(col).alias(col.replace("Sum_", "")) for col in filtered_df.columns]
    )

    logger.info("Transformations applied successfully.")
except Exception as e:
    logger.error(f"Error during transformations: {e}")
    raise

# Output Handling
try:
    # Drop existing table if necessary
    spark.sql("DROP TABLE IF EXISTS genai_demo.altreyx_demo.CHresult")

    # Write to Unity Catalog target table
    renamed_df.write.format("delta").mode("overwrite").saveAsTable("genai_demo.altreyx_demo.CHresult")
    logger.info("Data written successfully to Unity Catalog target table.")
except Exception as e:
    logger.error(f"Error writing data to Unity Catalog target table: {e}")
    raise
