In [None]:
import logging
from pyspark.sql import functions as F
from pyspark.sql import DataFrame

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Define helper functions for transformations
def multi_field_formula_transformation(df: DataFrame) -> DataFrame:
    """Apply multi-field formula transformations."""
    logger.info("Applying multi-field formula transformations")
    df = df.withColumn("Business_unit_code", F.trim(F.regexp_replace(F.col("Business_unit_code"), "#|UNMAPPED|NULL", "")))
    df = df.withColumn("FMRC_code", F.trim(F.regexp_replace(F.col("FMRC_code"), "#|UNMAPPED|NULL", "")))
    return df

def select_transformation(df: DataFrame, fields: list) -> DataFrame:
    """Select specific fields for further processing."""
    logger.info(f"Selecting fields: {fields}")
    return df.select(*fields)

def join_transformation(df1: DataFrame, df2: DataFrame, join_keys: list) -> DataFrame:
    """Perform join operation between two DataFrames."""
    logger.info(f"Joining DataFrames on keys: {join_keys}")
    df = df1.join(df2, join_keys, "inner")
    for key in join_keys:
        df = df.drop(df2[key])
    return df

def union_transformation(df1: DataFrame, df2: DataFrame) -> DataFrame:
    """Union two DataFrames."""
    logger.info("Unioning DataFrames")
    return df1.union(df2)

def formula_transformation(df: DataFrame) -> DataFrame:
    """Apply custom calculations to fields."""
    logger.info("Applying formula transformations")
    df = df.withColumn("Custom_Calculation", F.expr("field1 + field2 * field3"))
    return df

def filter_transformation(df: DataFrame, condition: str) -> DataFrame:
    """Filter DataFrame based on specified condition."""
    logger.info(f"Filtering DataFrame with condition: {condition}")
    return df.filter(condition)

def summarize_transformation(df: DataFrame, group_by_fields: list, agg_exprs: dict) -> DataFrame:
    """Aggregate data by grouping and performing calculations."""
    logger.info(f"Summarizing DataFrame by fields: {group_by_fields}")
    return df.groupBy(*group_by_fields).agg(agg_exprs)

# Load data from Unity Catalog tables with error handling
def load_table(table_name: str) -> DataFrame:
    """Load table from Unity Catalog with error handling."""
    try:
        df = spark.table(table_name)
        logger.info(f"Successfully loaded table: {table_name}")
        return df
    except Exception as e:
        logger.error(f"Error loading table {table_name}: {e}")
        raise

# Corrected table loading with enhanced error handling
def load_table_with_fallback(table_name: str) -> DataFrame:
    """Load table from Unity Catalog with fallback for missing tables."""
    try:
        df = spark.table(table_name)
        logger.info(f"Successfully loaded table: {table_name}")
        return df
    except Exception as e:
        logger.error(f"Error loading table {table_name}: {e}")
        # Fallback logic or alternative handling can be added here
        raise

# Load data from Unity Catalog tables
pes_prep_df = load_table("catalog.source_db.PES_prep")
c19_ivl_data_df = load_table_with_fallback("catalog.source_db.C19_IVL_data")  # Ensure correct case sensitivity
c04_ekpo_df = load_table("catalog.source_db.C04_EKPO")
c04_bseg_df = load_table("catalog.source_db.C04_BSEG")
pjotr_df = load_table("catalog.source_db.PJOTR_")
pjotr_in_pes_df = load_table("catalog.source_db.PJOTR_in_PES")

# Apply transformations
try:
    pes_prep_df = multi_field_formula_transformation(pes_prep_df)
    selected_fields = ["field1", "field2", "field3"]  # Example fields
    pes_prep_df = select_transformation(pes_prep_df, selected_fields)
    join_keys = ["LE_code"]
    joined_df = join_transformation(pes_prep_df, c19_ivl_data_df, join_keys)
    unioned_df = union_transformation(joined_df, c04_ekpo_df)
    transformed_df = formula_transformation(unioned_df)
    filter_condition = "Invoice_source_system = 'AP' OR left(Invoice_source_system, 3) = 'ARB'"
    filtered_df = filter_transformation(transformed_df, filter_condition)
    group_by_fields = ["PJOTR", "Spend", "Records"]
    agg_exprs = {"Spend": "sum", "Records": "count"}
    summarized_df = summarize_transformation(filtered_df, group_by_fields, agg_exprs)
    logger.info("Transformations applied successfully")
except Exception as e:
    logger.error(f"Error during transformations: {e}")
    raise

# Output to Unity Catalog tables
try:
    target_catalog = "catalog_name"
    target_schema = "schema_name"
    target_table_c03_pjotr = "C03_pjotr"
    target_table_pjotr_in_pes = "PJOTR_in_PES"

    spark.sql(f"CREATE SCHEMA IF NOT EXISTS {target_catalog}.{target_schema}")
    logger.info(f"Schema {target_catalog}.{target_schema} ensured")

    summarized_df.write.format("delta").mode("overwrite").saveAsTable(f"{target_catalog}.{target_schema}.{target_table_c03_pjotr}")
    logger.info(f"Data written to {target_catalog}.{target_schema}.{target_table_c03_pjotr} successfully")

    pjotr_in_pes_df.write.format("delta").mode("overwrite").saveAsTable(f"{target_catalog}.{target_schema}.{target_table_pjotr_in_pes}")
    logger.info(f"Data written to {target_catalog}.{target_schema}.{target_table_pjotr_in_pes} successfully")
except Exception as e:
    logger.error(f"Error writing data to Unity Catalog tables: {e}")
    raise
