In [None]:
import logging
from datetime import datetime, timedelta
import pyspark.sql.functions as F
from pyspark.sql import DataFrame

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Helper function to safely execute SQL and log errors
def execute_sql(query: str) -> DataFrame:
    try:
        logger.info(f"Executing SQL: {query}")
        return spark.sql(query)
    except Exception as e:
        logger.error(f"Error executing SQL: {query}, Error: {str(e)}")
        raise

# Load data from Unity Catalog tables
try:
    oct_tc3_df = execute_sql("SELECT * FROM catalog.source_db.oct_tc3")
    week1_df = execute_sql("SELECT * FROM catalog.source_db.week1")
    week2_df = execute_sql("SELECT * FROM catalog.source_db.week2")
    week3_df = execute_sql("SELECT * FROM catalog.source_db.week3")
    week4_df = execute_sql("SELECT * FROM catalog.source_db.week4")
    week5_df = execute_sql("SELECT * FROM catalog.source_db.week5")
except Exception as e:
    logger.error(f"Error loading data from Unity Catalog: {str(e)}")
    raise

# Transformation: DateTimeNow
current_date = datetime.now()
logger.info(f"Current Date: {current_date}")

# Transformation: Format DateTime
date_time_out = current_date.strftime('%Y-%m-%d')
logger.info(f"Formatted DateTime: {date_time_out}")

# Transformation: Summarize
summarized_df = oct_tc3_df.groupBy("BILL_DTE").agg(
    F.sum("Invoices").alias("Sum_Invoices"),
    F.sum("Invoice_Lines").alias("Sum_Invoice_Lines"),
    F.sum("LANDED_COST").alias("Sum_LANDED_COST"),
    F.sum("EXT_FINAL_PRICE").alias("Sum_EXT_FINAL_PRICE"),
    F.sum("Trans_Charge_Amt").alias("Sum_Trans_Charge_Amt"),
    F.sum("RESTOCK_Fee").alias("Sum_RESTOCK_Fee"),
    F.sum("Special_Hndl_Amt").alias("Sum_Special_Hndl_Amt"),
    F.sum("Vendor_Hndl_Amt").alias("Sum_Vendor_Hndl_Amt"),
    F.sum("MOC_Amt").alias("Sum_MOC_Amt"),
    F.sum("Fuel_Surcharge").alias("Sum_Fuel_Surcharge")
)
logger.info("Summarization complete")

# Transformation: Date Calculation
prior_week_start = current_date - timedelta(days=current_date.weekday() + 7)
prior_week_end = prior_week_start + timedelta(days=6)
yesterday = current_date - timedelta(days=1)
today = current_date
p1m_start = (current_date.replace(day=1) - timedelta(days=1)).replace(day=1)
p1m_end = p1m_start + timedelta(days=31)
p2m_start = (p1m_start.replace(day=1) - timedelta(days=1)).replace(day=1)
p2m_end = p2m_start + timedelta(days=31)
p3m_start = (p2m_start.replace(day=1) - timedelta(days=1)).replace(day=1)
p3m_end = p3m_start + timedelta(days=31)

logger.info(f"Date Calculations: Prior Week Start: {prior_week_start}, Prior Week End: {prior_week_end}, Yesterday: {yesterday}, Today: {today}")

# Transformation: Union
union_df = week1_df.union(week2_df).union(week3_df).union(week4_df).union(week5_df)
logger.info("Union of weekly data complete")

# Transformation: Custom Calculation
rush_order_fee_df = union_df.withColumn("Rush_Order_Fee", F.col("ADDTN_TRANS_FEE_OVRRIDE_ZSRO"))
sum_rush_order_fee_df = rush_order_fee_df.groupBy().agg(F.sum("Rush_Order_Fee").alias("Sum_Rush_Order_Fee"))
logger.info("Rush Order Fee calculation complete")

# Transformation: Total Shipping and Handling
total_shipping_handling_df = summarized_df.withColumn("BIA_SHIP_HNDL_AMT", F.col("Sum_Trans_Charge_Amt") * 0.1) \
                                          .withColumn("COE_SHIP_HNDL_AMT", F.col("Sum_Trans_Charge_Amt") * 0.2)
logger.info("Total Shipping and Handling calculation complete")

# Transformation: Invoice Sales
invoice_sales_df = summarized_df.withColumn("Invoice_Sales", F.col("Sum_EXT_FINAL_PRICE") * 1.05)
logger.info("Invoice Sales calculation complete")

# Transformation: Join
joined_df = invoice_sales_df.join(oct_tc3_df, "DIST_CHNL_ID", "inner")
logger.info("Join operation complete")

# Transformation: Filter
filtered_df = joined_df.filter(F.col("DIST_CHNL_ID").isNotNull())
logger.info("Filter operation complete")

# Transformation: Update Null
updated_df = filtered_df.withColumn("FNC_ID", F.when(F.col("FNC_ID").isNull(), F.lit("Unknown")).otherwise(F.col("FNC_ID"))) \
                        .withColumn("FNC_DESC", F.when(F.col("FNC_DESC").isNull(), F.lit("Unknown")).otherwise(F.col("FNC_DESC")))
logger.info("Update Null operation complete")

# Write to Unity Catalog target table
target_catalog = "catalog_name"
target_schema = "schema_name"
target_table = "table_name"

# Ensure schema exists before creating table
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {target_catalog}.{target_schema}")
logger.info(f"Schema {target_catalog}.{target_schema} ensured")

# Write to Unity Catalog target table (overwrite mode handles table replacement)
updated_df.write.format("delta").mode("overwrite").saveAsTable(f"{target_catalog}.{target_schema}.{target_table}")
logger.info(f"Data written to {target_catalog}.{target_schema}.{target_table} successfully")
