In [None]:
import logging
import psycopg2
from pyspark.sql import functions as F

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Securely retrieve credentials for external systems
db_user = dbutils.secrets.get(scope="my_scope", key="db_user")
db_password = dbutils.secrets.get(scope="my_scope", key="db_password")
db_host = dbutils.secrets.get(scope="my_scope", key="db_host")
db_port = dbutils.secrets.get(scope="my_scope", key="db_port")
db_name = dbutils.secrets.get(scope="my_scope", key="db_name")

# Connect to external PostgreSQL database
try:
    conn = psycopg2.connect(
        dbname=db_name,
        user=db_user,
        password=db_password,
        host=db_host,
        port=db_port
    )
    logger.info("Connected to PostgreSQL database successfully")
except Exception as e:
    logger.error(f"Error connecting to PostgreSQL database: {e}")
    raise

# Load data from Unity Catalog tables
try:
    text_input_channel_df = spark.table("catalog.source_db.text_input_channel")
    logger.info(f"Loaded text_input_channel_df with {text_input_channel_df.count()} records")
    
    manual_date_df = spark.table("catalog.source_db.manual_date")
    logger.info(f"Loaded manual_date_df with {manual_date_df.count()} records")
    
    oct_tc3_df = spark.table("catalog.source_db.oct_tc3")
    logger.info(f"Loaded oct_tc3_df with {oct_tc3_df.count()} records")
    
    week1_df = spark.table("catalog.source_db.week1")
    logger.info(f"Loaded week1_df with {week1_df.count()} records")
    
    week2_df = spark.table("catalog.source_db.week2")
    logger.info(f"Loaded week2_df with {week2_df.count()} records")
    
    week3_df = spark.table("catalog.source_db.week3")
    logger.info(f"Loaded week3_df with {week3_df.count()} records")
    
    week4_df = spark.table("catalog.source_db.week4")
    logger.info(f"Loaded week4_df with {week4_df.count()} records")
    
    week5_df = spark.table("catalog.source_db.week5")
    logger.info(f"Loaded week5_df with {week5_df.count()} records")
except Exception as e:
    logger.error(f"Error loading data from Unity Catalog tables: {e}")
    raise

# Union transformation
try:
    union_df = week1_df.union(week2_df).union(week3_df).union(week4_df).union(week5_df)
    logger.info(f"Union transformation completed with {union_df.count()} records")
except Exception as e:
    logger.error(f"Error during union transformation: {e}")
    raise

# Rename fields
try:
    renamed_df = union_df.withColumnRenamed("SO_AUDAT", "SO_Date") \
                         .withColumnRenamed("FKDAT", "BILL_DATE") \
                         .withColumnRenamed("WERKS", "Whs") \
                         .withColumnRenamed("VTWEG", "DIST_CHNL_ID") \
                         .withColumnRenamed("ZZFINCLASS", "FNC_ID") \
                         .withColumnRenamed("BEZEK", "FNC_DESC") \
                         .withColumnRenamed("SOLDTO_KUNNR", "SOLDTO") \
                         .withColumnRenamed("SHIPTO_KUNNR", "SHIPTO") \
                         .withColumnRenamed("VGBEL", "RFRNC_DOC_NUM") \
                         .withColumnRenamed("lines", "Invoice_lines")
    logger.info("Field renaming completed")
except Exception as e:
    logger.error(f"Error during field renaming: {e}")
    raise

# MultiFieldFormula transformation
try:
    formula_df = renamed_df.select(
        *[F.when(F.col(c).isNull() | (F.col(c) == ""), 0).otherwise(F.col(c)).alias(c) for c in renamed_df.columns]
    )
    logger.info("MultiFieldFormula transformation completed")
except Exception as e:
    logger.error(f"Error during MultiFieldFormula transformation: {e}")
    raise

# Cleansing transformation
try:
    cleanse_df = formula_df.select(
        *[F.upper(F.col(c)).alias(c) for c in formula_df.columns]
    )
    logger.info("Cleansing transformation completed")
except Exception as e:
    logger.error(f"Error during cleansing transformation: {e}")
    raise

# Rush_Fee calculation
try:
    rush_fee_df = cleanse_df.withColumn("Rush_Order_Fee", F.col("ADDTN_TRANS_FEE_OVRRIDE_ZSRO"))
    logger.info("Rush_Fee calculation completed")
except Exception as e:
    logger.error(f"Error during Rush_Fee calculation: {e}")
    raise

# Summarize Rush_Order_Fee
try:
    summarize_df = rush_fee_df.groupBy().agg(F.sum("Rush_Order_Fee").alias("Sum_Rush_Order_Fee"))
    logger.info("Summarize Rush_Order_Fee completed")
except Exception as e:
    logger.error(f"Error during Summarize Rush_Order_Fee: {e}")
    raise

# Dynamic Rename
try:
    dynamic_rename_df = summarize_df.select(
        *[F.col(c).alias(c.replace("Sum_", "")) for c in summarize_df.columns]
    )
    logger.info("Dynamic Rename completed")
except Exception as e:
    logger.error(f"Error during Dynamic Rename: {e}")
    raise

# Append Fields
try:
    append_df = dynamic_rename_df.join(text_input_channel_df, "DIST_CHNL_ID", "left")
    logger.info("Append Fields completed")
except Exception as e:
    logger.error(f"Error during Append Fields: {e}")
    raise

# Total Shipping and Handling calculations
try:
    total_shipping_df = append_df.withColumn("BIA_SHIP_HNDL_AMT", 
                                             F.col("Sum_Trans_Charge_Amt") + F.col("Sum_RESTOCK_Fee") + 
                                             F.col("Sum_Special_Hndl_Amt") + F.col("Sum_Vendor_Hndl_Amt") + 
                                             F.col("Sum_MOC_Amt") + F.col("Sum_Fuel_Surcharge")) \
                                 .withColumn("COE_SHIP_HNDL_AMT", 
                                             F.col("Sum_Trans_Charge_Amt") + F.col("Sum_RESTOCK_Fee") + 
                                             F.col("Sum_Special_Hndl_Amt") + F.col("Sum_Vendor_Hndl_Amt") + 
                                             F.col("Sum_MOC_Amt") + F.col("Sum_Fuel_Surcharge") + 
                                             F.col("Rush_Order_Fee") + F.col("VENDR_TRANS_CHRG_FRT_ZTV1") + 
                                             F.col("MARKUP_VENDOR_TRANS_FEE_AMT_ZMT1"))
    logger.info("Total Shipping and Handling calculations completed")
except Exception as e:
    logger.error(f"Error during Total Shipping and Handling calculations: {e}")
    raise

# Formula: Invoice_Sales
try:
    invoice_sales_df = total_shipping_df.withColumn("Invoice_Sales", F.col("Sum_EXT_FINAL_PRICE"))
    logger.info("Formula: Invoice_Sales completed")
except Exception as e:
    logger.error(f"Error during Formula: Invoice_Sales: {e}")
    raise

# Join transformation
try:
    join_df = invoice_sales_df.join(text_input_channel_df, "DIST_CHNL_ID", "left")
    logger.info("Join transformation completed")
except Exception as e:
    logger.error(f"Error during Join transformation: {e}")
    raise

# Union transformation
try:
    final_union_df = join_df.union(invoice_sales_df)
    logger.info("Final Union transformation completed")
except Exception as e:
    logger.error(f"Error during Final Union transformation: {e}")
    raise

# Formula: Identify primary key null
try:
    null_identification_df = final_union_df.withColumn("null_yn", 
                                                       F.when(F.col("FNC_ID").isNull() | (F.col("FNC_ID") == ""), 'Y')
                                                       .when(F.col("Whs").isNull() | (F.col("Whs") == ""), 'Y')
                                                       .when(F.col("DIST_CHNL_ID").isNull() | (F.col("DIST_CHNL_ID") == ""), 'Y')
                                                       .when(F.col("SOLDTO").isNull() | (F.col("SOLDTO") == ""), 'Y')
                                                       .when(F.col("SHIPTO").isNull() | (F.col("SHIPTO") == ""), 'Y')
                                                       .otherwise('N'))
    logger.info("Formula: Identify primary key null completed")
except Exception as e:
    logger.error(f"Error during Formula: Identify primary key null: {e}")
    raise

# Filter Tool
try:
    filtered_df = null_identification_df.filter(F.col("null_yn") == 'Y')
    logger.info(f"Filter Tool completed with {filtered_df.count()} records")
except Exception as e:
    logger.error(f"Error during Filter Tool: {e}")
    raise

# UPDATE NULL
try:
    update_null_df = filtered_df.withColumn("FNC_ID", 
                                            F.when(F.col("FNC_ID").isNull() | (F.col("FNC_ID") == ""), 'OTH')
                                            .otherwise(F.col("FNC_ID"))) \
                                .withColumn("FNC_DESC", 
                                            F.when(F.col("FNC_DESC").isNull() | (F.col("FNC_DESC") == ""), 'OTHER')
                                            .otherwise(F.col("FNC_DESC")))
    logger.info("UPDATE NULL completed")
except Exception as e:
    logger.error(f"Error during UPDATE NULL: {e}")
    raise

# Union transformation
try:
    final_df = update_null_df.union(filtered_df)
    logger.info(f"Final Union transformation completed with {final_df.count()} records")
except Exception as e:
    logger.error(f"Error during Final Union transformation: {e}")
    raise

# Select Tool
try:
    selected_df = final_df.select("SO_Date", "BILL_DATE", "Whs", "DIST_CHNL_ID", "FNC_ID", "FNC_DESC", 
                                  "SOLDTO", "SHIPTO", "RFRNC_DOC_NUM", "Rush_Order_Fee", "COE_SHIP_HNDL_AMT")
    logger.info("Select Tool completed")
except Exception as e:
    logger.error(f"Error during Select Tool: {e}")
    raise

# Alteryx Select: Rename
try:
    renamed_final_df = selected_df.withColumnRenamed("Run Date", "RunDTE")
    logger.info("Alteryx Select: Rename completed")
except Exception as e:
    logger.error(f"Error during Alteryx Select: Rename: {e}")
    raise

# Write output to Unity Catalog target table
try:
    target_catalog = "catalog_name"
    target_schema = "schema_name"
    target_table = "table_name"
    
    # Ensure schema exists before creating table
    spark.sql(f"CREATE SCHEMA IF NOT EXISTS {target_catalog}.{target_schema}")
    logger.info(f"Schema {target_catalog}.{target_schema} ensured")
    
    # Write to Unity Catalog target table (overwrite mode handles table replacement)
    renamed_final_df.write.format("delta").mode("overwrite").saveAsTable(f"{target_catalog}.{target_schema}.{target_table}")
    logger.info(f"Data written to {target_catalog}.{target_schema}.{target_table} successfully")
except Exception as e:
    logger.error(f"Error writing data to Unity Catalog target table: {e}")
    raise
