In [None]:
import logging
from datetime import datetime
import pyspark.sql.functions as F
from pyspark.sql import DataFrame

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Function to load data from Unity Catalog tables
def load_data_from_catalog(table_name: str) -> DataFrame:
    try:
        df = spark.table(table_name)
        logger.info(f"Loaded data from {table_name} with {df.count()} records.")
        return df
    except Exception as e:
        logger.error(f"Error loading data from {table_name}: {str(e)}")
        raise

# Function to perform transformations
def perform_transformations():
    try:
        # Node 69: TextInput Channel
        text_input_df = load_data_from_catalog("catalog.source_db.text_input_channel")
        
        # Node 24: Text Input Manual Date
        manual_date_df = load_data_from_catalog("catalog.source_db.manual_date_input")
        
        # Node 48: DbFileInput OCT_TC3
        oct_tc3_df = load_data_from_catalog("catalog.source_db.oct_tc3")
        
        # Node 18: Dynamic Input Week 1
        week1_df = load_data_from_catalog("catalog.source_db.dynamic_input_week1")
        
        # Node 77: Dynamic Input Week 2
        week2_df = load_data_from_catalog("catalog.source_db.dynamic_input_week2")
        
        # Node 89: Dynamic Input Week 5
        week5_df = load_data_from_catalog("catalog.source_db.dynamic_input_week5")
        
        # Node 86: Dynamic Input Week 4
        week4_df = load_data_from_catalog("catalog.source_db.dynamic_input_week4")
        
        # Node 84: Dynamic Input Week 3
        week3_df = load_data_from_catalog("catalog.source_db.dynamic_input_week3")
        
        # Node 19: DateTimeNow
        current_date = datetime.now()
        logger.info(f"Current date generated: {current_date}")
        
        # Node 20: Format
        formatted_date = current_date.strftime("%Y-%m-%d %H:%M:%S")
        logger.info(f"Formatted date: {formatted_date}")
        
        # Node 101: Summarize
        summarize_df = oct_tc3_df.groupBy("BILL_DATE").agg(
            F.sum("Invoices").alias("Sum_Invoices"),
            F.sum("Invoice_Lines").alias("Sum_Invoice_Lines"),
            F.sum("LANDED_COST").alias("Sum_LANDED_COST"),
            F.sum("EXT_FINAL_PRICE").alias("Sum_EXT_FINAL_PRICE")
        )
        
        # Node 72: Start / End
        date_calculation_df = manual_date_df.withColumn("Prior_Week_Start", F.date_sub(F.current_date(), 7))
        
        # Node 73: Alteryx Select
        select_df = date_calculation_df.select("DateTime_Out", "Start Date", "End Date")
        
        # Node 23: DateTime Conversion
        start_txt_df = select_df.withColumn("StartTXT", F.date_format("Start Date", "yyyy-MM-dd"))
        
        # Node 63: DateTime Conversion
        end_txt_df = start_txt_df.withColumn("EndTXT", F.date_format("End Date", "yyyy-MM-dd"))
        
        # Node 66: ReName
        rename_df = end_txt_df.selectExpr("Run Date", "EndTXT", "StartTXT", "Start Date", "End Date")
        
        # Node 88: Formula: 2WK START AND END
        formula_df = rename_df.withColumn("START_2WK", F.date_sub(F.current_date(), 14))
        
        # Node 22: Select Tool
        select_tool_df = formula_df.select("Run Date", "StartDTE", "EndDTE", "Prior Week Start", "Prior Week End", "Yesterday", "Today", "StartTXT", "EndTXT", "RunDTE", "P1M_Start", "P2M_Start", "P2M_End", "P3M_End", "Start Date", "END_1WK", "START_2WK", "END_2WK", "START_3WK", "END_3WK", "START_4WK", "End Date", "End_4Wk", "START_5WK")
        
        # Node 78: Union
        union_df = week1_df.union(week2_df).union(week3_df).union(week4_df).union(week5_df)
        
        # Node 27: Alteryx Select
        alteryx_select_df = union_df.select("*")
        
        # Node 40: FIELD NAMES
        field_names_df = alteryx_select_df.selectExpr("SO_Date", "BILL_DATE", "Whs", "DIST_CHNL_ID", "FNC_ID", "FNC_DESC", "SOLDTO", "SHIPTO", "RFRNC_DOC_NUM", "Invoice_lines")
        
        # Node 67: MultiFieldFormula
        multifield_formula_df = field_names_df.withColumn("EXTND_LAND_CST", F.col("LANDED_COST") * 1.1)
        
        # Node 44: Cleanse
        cleanse_df = multifield_formula_df.dropDuplicates()
        
        # Node 60: Rush_Fee
        rush_fee_df = cleanse_df.withColumn("Rush_Order_Fee", F.lit(10))
        
        # Node 42: Summarize
        summarize_rush_fee_df = rush_fee_df.groupBy("BILL_DATE").agg(F.sum("Rush_Order_Fee").alias("Sum_Rush_Order_Fee"))
        
        # Node 46: Dynamic Rename
        dynamic_rename_df = summarize_rush_fee_df.selectExpr("Invoice_Lines", "Rush_Order_Fee", "AVG_INVOICE_PRICE", "EXTND_FNL_PRICE1", "VBRP_BRGEW", "VBRP_VOLUM")
        
        # Node 56: Append Fields
        append_fields_df = dynamic_rename_df.withColumn("Additional_Field", F.lit("Additional_Value"))
        
        # Node 61: Total Shipping and Handling
        shipping_handling_df = append_fields_df.withColumn("BIA_SHIP_HNDL_AMT", F.col("Rush_Order_Fee") + F.col("EXTND_FNL_PRICE1"))
        
        # Node 68: Formula: Invoice_Sales
        invoice_sales_df = shipping_handling_df.withColumn("Invoice_Sales", F.col("Sum_Rush_Order_Fee") + F.col("Sum_EXT_FINAL_PRICE"))
        
        # Node 70: Join
        join_df = invoice_sales_df.join(text_input_df, "DIST_CHNL_ID", "inner")
        
        # Node 71: Union
        final_union_df = join_df.union(invoice_sales_df)
        
        # Node 79: Formula
        formula_null_yn_df = final_union_df.withColumn("null_yn", F.when(F.col("Invoice_Sales").isNull(), 1).otherwise(0))
        
        # Node 80: Filter Tool
        filter_tool_df = formula_null_yn_df.filter(F.col("null_yn") == 0)
        
        # Node 81: UPDATE NULL
        update_null_df = filter_tool_df.withColumn("FNC_ID", F.when(F.col("FNC_ID").isNull(), "Unknown").otherwise(F.col("FNC_ID")))
        
        # Node 82: Union
        final_df = update_null_df.union(filter_tool_df)
        
        # Node 83: Select Tool
        select_tool_final_df = final_df.select("SO_Date", "BILL_DATE", "Whs", "DIST_CHNL_ID", "FNC_ID", "FNC_DESC", "SOLDTO", "SHIPTO", "RFRNC_DOC_NUM", "Rush_Order_Fee", "Sum_Invoice_Lines", "Run Date", "StartTXT", "EndTXT", "Start Date", "START_2WK", "END_2WK", "End Date", "BIA_SHIP_HNDL_AMT", "COE_SHIP_HNDL_AMT", "Invoice_Sales", "DIST_CHNL", "DIST_CHNL_DESC")
        
        # Node 57: Alteryx Select
        alteryx_select_final_df = select_tool_final_df.selectExpr("RunDTE", "SO_Date", "BILL_DATE", "Whs", "DIST_CHNL_ID", "FNC_ID", "FNC_DESC", "SOLDTO", "SHIPTO", "RFRNC_DOC_NUM", "Rush_Order_Fee", "COE_SHIP_HNDL_AMT")
        
        # Write to Unity Catalog target table
        target_catalog = "catalog_name"
        target_schema = "schema_name"
        target_table = "table_name"
        
        # Create schema if it doesn't exist
        spark.sql(f"CREATE SCHEMA IF NOT EXISTS {target_catalog}.{target_schema}")
        logger.info(f"Schema {target_catalog}.{target_schema} ensured")
        
        # Write to Unity Catalog target table (overwrite mode handles table replacement)
        alteryx_select_final_df.write.format("delta").mode("overwrite").saveAsTable(f"{target_catalog}.{target_schema}.{target_table}")
        logger.info(f"Data written to {target_catalog}.{target_schema}.{target_table}")
        
    except Exception as e:
        logger.error(f"Error during transformations: {str(e)}")
        raise

# Execute transformations
perform_transformations()
