In [None]:
import logging
from datetime import datetime, timedelta
from pyspark.sql import functions as F
from pyspark.sql.types import StringType

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Function to safely get secrets
def get_secret(scope, key):
    try:
        return dbutils.secrets.get(scope, key)
    except Exception as e:
        logger.error(f"Error retrieving secret {key} from scope {scope}: {str(e)}")
        raise

# Load data from Unity Catalog tables
try:
    text_input_df = spark.table("catalog.source_db.text_input_channel")
    manual_date_df = spark.table("catalog.source_db.manual_date_input")
    oct_tc3_df = spark.table("catalog.source_db.oct_tc3")
    week1_df = spark.table("catalog.source_db.dynamic_input_week1")
    week2_df = spark.table("catalog.source_db.dynamic_input_week2")
    week3_df = spark.table("catalog.source_db.dynamic_input_week3")
    week4_df = spark.table("catalog.source_db.dynamic_input_week4")
    week5_df = spark.table("catalog.source_db.dynamic_input_week5")
    logger.info("Data loaded from Unity Catalog tables successfully")
except Exception as e:
    logger.error(f"Error loading data from Unity Catalog tables: {str(e)}")
    raise

# Transformation: Date Generation
current_date = datetime.now()
date_df = spark.createDataFrame([(current_date,)], ["CurrentDate"])
logger.info("Current date generated")

# Transformation: DateTime Format Conversion
date_df = date_df.withColumn("DateTime_Out", F.date_format("CurrentDate", "yyyy-MM-dd"))
logger.info("DateTime format conversion applied")

# Transformation: GroupBy and Summarize
try:
    summarize_df = oct_tc3_df.groupBy("BILL_DTE").agg(
        F.sum("Invoices").alias("Sum_Invoices"),
        F.sum("Invoice_Lines").alias("Sum_Invoice_Lines"),
        F.sum("LANDED_COST").alias("Sum_LANDED_COST"),
        F.sum("EXT_FINAL_PRICE").alias("Sum_EXT_FINAL_PRICE"),
        F.sum("Trans_Charge_Amt").alias("Sum_Trans_Charge_Amt"),
        F.sum("RESTOCK_Fee").alias("Sum_RESTOCK_Fee"),
        F.sum("Special_Hndl_Amt").alias("Sum_Special_Hndl_Amt"),
        F.sum("Vendor_Hndl_Amt").alias("Sum_Vendor_Hndl_Amt"),
        F.sum("MOC_Amt").alias("Sum_MOC_Amt"),
        F.sum("Fuel_Surcharge").alias("Sum_Fuel_Surcharge")
    )
    logger.info("Summarization applied successfully")
except Exception as e:
    logger.error(f"Error during summarization: {str(e)}")
    raise

# Transformation: Date Calculations
date_calculations_df = date_df.withColumn("Prior_Week_Start", F.date_sub(F.date_sub(F.date_format("DateTime_Out", "yyyy-MM-dd"), F.dayofweek("DateTime_Out")), 7)) \
    .withColumn("Prior_Week_End", F.date_sub(F.date_sub(F.date_format("DateTime_Out", "yyyy-MM-dd"), F.dayofweek("DateTime_Out")), 1)) \
    .withColumn("Yesterday", F.date_sub(F.date_format("DateTime_Out", "yyyy-MM-dd"), 1)) \
    .withColumn("Today", F.date_format("DateTime_Out", "yyyy-MM-dd")) \
    .withColumn("P1M_Start", F.add_months(F.date_format("DateTime_Out", "yyyy-MM-dd"), -1)) \
    .withColumn("P1M_End", F.date_sub(F.date_format("DateTime_Out", "yyyy-MM-dd"), 1)) \
    .withColumn("P2M_Start", F.add_months(F.date_format("DateTime_Out", "yyyy-MM-dd"), -2)) \
    .withColumn("P2M_End", F.date_sub(F.add_months(F.date_format("DateTime_Out", "yyyy-MM-dd"), -1), 1)) \
    .withColumn("P3M_Start", F.add_months(F.date_format("DateTime_Out", "yyyy-MM-dd"), -3)) \
    .withColumn("P3M_End", F.date_sub(F.add_months(F.date_format("DateTime_Out", "yyyy-MM-dd"), -2), 1))
logger.info("Date calculations applied")

# Transformation: Union
try:
    union_df = week1_df.union(week2_df).union(week3_df).union(week4_df).union(week5_df)
    logger.info("Union operation applied successfully")
except Exception as e:
    logger.error(f"Error during union operation: {str(e)}")
    raise

# Transformation: Custom Calculations
try:
    union_df = union_df.withColumn("Rush_Order_Fee", F.coalesce(F.col("ADDTN_TRANS_FEE_OVRRIDE_ZSRO"), F.lit(0)))
    logger.info("Custom calculations applied successfully")
except Exception as e:
    logger.error(f"Error during custom calculations: {str(e)}")
    raise

# Transformation: Join
try:
    join_df = union_df.join(text_input_df, union_df["DIST_CHNL_ID"] == text_input_df["DIST_CHNL"], "left")
    logger.info("Join operation applied successfully")
except Exception as e:
    logger.error(f"Error during join operation: {str(e)}")
    raise

# Transformation: Filter
try:
    filter_df = join_df.filter(F.col("null_yn") == 'Y')
    logger.info("Filter operation applied successfully")
except Exception as e:
    logger.error(f"Error during filter operation: {str(e)}")
    raise

# Transformation: Update Nulls
try:
    update_nulls_df = filter_df.withColumn("FNC_ID", F.when(F.isnull(F.col("FNC_ID")) | (F.col("FNC_ID") == ""), "OTH").otherwise(F.col("FNC_ID"))) \
        .withColumn("FNC_DESC", F.when(F.isnull(F.col("FNC_DESC")) | (F.col("FNC_DESC") == ""), "OTHER").otherwise(F.col("FNC_DESC")))
    logger.info("Update nulls operation applied successfully")
except Exception as e:
    logger.error(f"Error during update nulls operation: {str(e)}")
    raise

# Transformation: Total Shipping and Handling
try:
    total_shipping_df = update_nulls_df.withColumn("BIA_SHIP_HNDL_AMT", F.expr("Sum_Trans_Charge_Amt + Sum_RESTOCK_Fee + Sum_Special_Hndl_Amt + Sum_Vendor_Hndl_Amt + Sum_MOC_Amt + Sum_Fuel_Surcharge")) \
        .withColumn("COE_SHIP_HNDL_AMT", F.expr("Sum_Trans_Charge_Amt + Sum_RESTOCK_Fee + Sum_Special_Hndl_Amt + Sum_Vendor_Hndl_Amt + Sum_MOC_Amt + Sum_Fuel_Surcharge + Rush_Order_Fee + VENDR_TRANS_CHRG_FRT_ZTV1 + MARKUP_VENDOR_TRANS_FEE_AMT_ZMT1"))
    logger.info("Total shipping and handling calculations applied successfully")
except Exception as e:
    logger.error(f"Error during total shipping and handling calculations: {str(e)}")
    raise

# Transformation: Custom Formula
try:
    invoice_sales_df = total_shipping_df.withColumn("Invoice_Sales", F.col("Sum_EXT_FINAL_PRICE"))
    logger.info("Custom formula applied successfully")
except Exception as e:
    logger.error(f"Error during custom formula application: {str(e)}")
    raise

# Write to Unity Catalog target table
try:
    target_catalog = "catalog_name"
    target_schema = "schema_name"
    target_table = "table_name"
    
    # Create schema if it doesn't exist
    spark.sql(f"CREATE SCHEMA IF NOT EXISTS {target_catalog}.{target_schema}")
    logger.info(f"Schema {target_catalog}.{target_schema} ensured")
    
    # Write to Unity Catalog target table (overwrite mode handles table replacement)
    invoice_sales_df.write.format("delta").mode("overwrite").saveAsTable(f"{target_catalog}.{target_schema}.{target_table}")
    logger.info(f"Data written to {target_catalog}.{target_schema}.{target_table} successfully")
except Exception as e:
    logger.error(f"Error writing data to Unity Catalog target table: {str(e)}")
    raise
