In [None]:
import logging
from datetime import datetime
import pyspark.sql.functions as F
from pyspark.sql import DataFrame

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Function to load data from Unity Catalog tables
def load_data_from_catalog(table_name: str) -> DataFrame:
    try:
        df = spark.table(table_name)
        logger.info(f"Loaded data from {table_name} with {df.count()} records.")
        return df
    except Exception as e:
        logger.error(f"Error loading data from {table_name}: {str(e)}")
        raise

# Function to perform transformations
def perform_transformations():
    try:
        # Load data from Unity Catalog tables
        text_input_df = load_data_from_catalog("catalog.source_db.text_input_channel")
        manual_date_df = load_data_from_catalog("catalog.source_db.text_input_manual_date")
        oct_tc3_df = load_data_from_catalog("catalog.source_db.dbfileinput_oct_tc3")
        week1_df = load_data_from_catalog("catalog.source_db.dynamic_input_week1")
        week2_df = load_data_from_catalog("catalog.source_db.dynamic_input_week2")
        week3_df = load_data_from_catalog("catalog.source_db.dynamic_input_week3")
        week4_df = load_data_from_catalog("catalog.source_db.dynamic_input_week4")
        week5_df = load_data_from_catalog("catalog.source_db.dynamic_input_week5")

        # Perform transformations
        # Example: Summarize operation
        summarize_df = oct_tc3_df.groupBy("BILL_DTE").agg(
            F.sum("Invoices").alias("Sum_Invoices"),
            F.sum("Invoice_Lines").alias("Sum_Invoice_Lines"),
            F.sum("LANDED_COST").alias("Sum_LANDED_COST"),
            F.sum("EXT_FINAL_PRICE").alias("Sum_EXT_FINAL_PRICE"),
            F.sum("Trans_Charge_Amt").alias("Sum_Trans_Charge_Amt"),
            F.sum("RESTOCK_Fee").alias("Sum_RESTOCK_Fee"),
            F.sum("Special_Hndl_Amt").alias("Sum_Special_Hndl_Amt"),
            F.sum("Vendor_Hndl_Amt").alias("Sum_Vendor_Hndl_Amt"),
            F.sum("MOC_Amt").alias("Sum_MOC_Amt"),
            F.sum("Fuel_Surcharge").alias("Sum_Fuel_Surcharge")
        )
        logger.info(f"Summarized data with {summarize_df.count()} records.")

        # Example: DateTimeNow and DateTime Format
        current_date = datetime.now().strftime('%Y-%m-%d')
        date_df = spark.createDataFrame([(current_date,)], ["CurrentDate"])
        date_df = date_df.withColumn("DateTime_Out", F.date_format(F.col("CurrentDate"), "yyyy-MM-dd"))
        logger.info(f"Generated current date: {current_date}")

        # Example: Join operation
        joined_df = text_input_df.join(summarize_df, text_input_df.DIST_CHNL == summarize_df.DIST_CHNL_ID, "inner")
        logger.info(f"Joined data with {joined_df.count()} records.")

        # Example: Union operation
        union_df = week1_df.union(week2_df).union(week3_df).union(week4_df).union(week5_df)
        logger.info(f"Unioned data with {union_df.count()} records.")

        # Example: Filter operation
        filtered_df = union_df.filter(F.col("null_yn").isNotNull())
        logger.info(f"Filtered data with {filtered_df.count()} records.")

        # Example: Rename operation
        renamed_df = filtered_df.withColumnRenamed("Sum_Invoices", "Invoices")
        logger.info(f"Renamed fields in data.")

        # Example: Calculate Invoice Sales
        invoice_sales_df = renamed_df.withColumn("Invoice_Sales", F.col("Sum_EXT_FINAL_PRICE") * 1.1)  # Example calculation
        logger.info(f"Calculated Invoice Sales.")

        # Example: Write to Unity Catalog target table
        target_catalog = "catalog_name"
        target_schema = "schema_name"
        target_table = "table_name"

        # Create schema if it doesn't exist
        spark.sql(f"CREATE SCHEMA IF NOT EXISTS {target_catalog}.{target_schema}")
        logger.info(f"Schema {target_catalog}.{target_schema} ensured")

        # Write to Unity Catalog target table (overwrite mode handles table replacement)
        invoice_sales_df.write.format("delta").mode("overwrite").saveAsTable(f"{target_catalog}.{target_schema}.{target_table}")
        logger.info(f"Data written to {target_catalog}.{target_schema}.{target_table}")

    except Exception as e:
        logger.error(f"Error during transformations: {str(e)}")
        raise

# Execute the ETL workflow
perform_transformations()
