In [None]:
import logging
from pyspark.sql import functions as F

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

try:
    # Load data from Unity Catalog tables
    pes_prep_df = spark.table("genai_demo.jnj.pes_prep")
    logger.info(f"PES_prep loaded with {pes_prep_df.count()} records")
    logger.info(f"PES_prep schema: {pes_prep_df.schema}")

    c19_ivl_data_df = spark.table("genai_demo.jnj.c19_ivl_data")
    logger.info(f"C19_ivl_data loaded with {c19_ivl_data_df.count()} records")
    logger.info(f"C19_ivl_data schema: {c19_ivl_data_df.schema}")

    c04_ekpo_df = spark.table("genai_demo.jnj.c04_ekpo")
    logger.info(f"C04_EKPO loaded with {c04_ekpo_df.count()} records")
    logger.info(f"C04_EKPO schema: {c04_ekpo_df.schema}")

    c04_bseg_df = spark.table("genai_demo.jnj.c04_bseg")
    logger.info(f"C04_BSEG loaded with {c04_bseg_df.count()} records")
    logger.info(f"C04_BSEG schema: {c04_bseg_df.schema}")

    pjotr_df = spark.table("genai_demo.jnj.pjotr_")
    logger.info(f"PJOTR_ loaded with {pjotr_df.count()} records")
    logger.info(f"PJOTR_ schema: {pjotr_df.schema}")

    pjotr_in_pes_df = spark.table("genai_demo.jnj.pjotr_in_pes")
    logger.info(f"PJOTR_in_PES loaded with {pjotr_in_pes_df.count()} records")
    logger.info(f"PJOTR_in_PES schema: {pjotr_in_pes_df.schema}")

    # Multi-Field Formula Transformation
    transformed_df = pes_prep_df.withColumn("_Business_unit_code", F.trim(F.col("Business_unit_code"))) \
                                .withColumn("_LE_code", F.trim(F.col("LE_code")))
    logger.info(f"Multi-Field Formula Transformation applied")

    # Select Transformation
    selected_df = transformed_df.select("_Business_unit_code", "_LE_code", "Invoice_amount_USD")
    logger.info(f"Select Transformation applied")

    # Verify column existence before join
    if "_LE_code" in selected_df.columns and "IVL_business_unit_code" in c19_ivl_data_df.columns:
        # Join Transformation
        joined_df = selected_df.join(c19_ivl_data_df, selected_df["_LE_code"] == c19_ivl_data_df["IVL_business_unit_code"], "inner")
        logger.info(f"Join Transformation applied with {joined_df.count()} records")
    else:
        logger.error("Join columns do not exist in the DataFrames")
        joined_df = spark.createDataFrame([], selected_df.schema)  # Create empty DataFrame with the same schema

    # Ensure both DataFrames have the same schema before union
    c04_ekpo_df = c04_ekpo_df.selectExpr("PO_and_PO_line as _Business_unit_code", "PO_line_SLoc as _LE_code", "cast(null as double) as Invoice_amount_USD")

    # Adjust the schema of joined_df to match c04_ekpo_df
    joined_df = joined_df.selectExpr("_Business_unit_code", "_LE_code", "Invoice_amount_USD")

    # Union Transformation
    union_df = joined_df.union(c04_ekpo_df)
    logger.info(f"Union Transformation applied with {union_df.count()} records")

    # Formula Transformation
    formula_df = union_df.withColumn("Calculated_Field", F.expr("Invoice_amount_USD * 1.1"))
    logger.info(f"Formula Transformation applied")

    # Filter Transformation
    filtered_df = formula_df.filter(F.col("Calculated_Field") > 1000)
    logger.info(f"Filter Transformation applied with {filtered_df.count()} records")

    # Summarize Transformation
    summarized_df = filtered_df.groupBy("PJOTR").agg(F.sum("Calculated_Field").alias("Total_Spend"))
    logger.info(f"Summarize Transformation applied with {summarized_df.count()} records")

    # Custom Calculations
    custom_calculations_df = summarized_df.withColumn("Custom_Calc", F.expr("Total_Spend / 100"))
    logger.info(f"Custom Calculations applied")

    # Output C03_pjotr.yxdb
    target_catalog = "genai_demo"
    target_schema = "jnj"
    target_table_c03_pjotr = "c03_pjotr"

    spark.sql(f"CREATE SCHEMA IF NOT EXISTS {target_catalog}.{target_schema}")
    logger.info(f"Schema {target_catalog}.{target_schema} ensured")

    custom_calculations_df.write.format("delta").mode("overwrite").saveAsTable(f"{target_catalog}.{target_schema}.{target_table_c03_pjotr}")
    logger.info(f"Data written to {target_catalog}.{target_schema}.{target_table_c03_pjotr}")

    # Output PJOTR_in_PES.yxdb
    target_table_pjotr_in_pes = "pjotr_in_pes"

    pjotr_in_pes_df.write.format("delta").mode("overwrite").saveAsTable(f"{target_catalog}.{target_schema}.{target_table_pjotr_in_pes}")
    logger.info(f"Data written to {target_catalog}.{target_schema}.{target_table_pjotr_in_pes}")

except Exception as e:
    logger.error(f"An error occurred: {str(e)}")
