In [None]:
import logging
import psycopg2
from pyspark.sql import functions as F
from pyspark.sql.types import StringType

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Securely retrieve credentials for external systems
mysql_user = dbutils.secrets.get(scope="my_scope", key="mysql_user")
mysql_password = dbutils.secrets.get(scope="my_scope", key="mysql_password")
mysql_host = dbutils.secrets.get(scope="my_scope", key="mysql_host")
mysql_db = dbutils.secrets.get(scope="my_scope", key="mysql_db")

# Function to connect to MySQL and fetch data
def fetch_mysql_data(query):
    try:
        conn = psycopg2.connect(
            host=mysql_host,
            database=mysql_db,
            user=mysql_user,
            password=mysql_password
        )
        cursor = conn.cursor()
        cursor.execute(query)
        data = cursor.fetchall()
        cursor.close()
        conn.close()
        return data
    except Exception as e:
        logger.error(f"Error fetching data from MySQL: {e}")
        raise

# Load data from Unity Catalog tables
try:
    pjotr_df = spark.table("catalog.source_db.pjotr")
    text_input_df = spark.sql("SELECT * FROM catalog.source_db.text_input")
    bseg_df = spark.table("catalog.source_db.c04_bseg")
    ekpo_df = spark.table("catalog.source_db.c04_ekpo")
    pjotr_in_pes_df = spark.table("catalog.source_db.pjotr_in_pes")
    ivl_data_df = spark.table("catalog.source_db.c19_ivl_data")
    mysql_editable_df = spark.table("catalog.source_db.mysql_editable")
    pjotr_yxdb_df = spark.table("catalog.source_db.pjotr_yxdb")
    pes_prep_df = spark.table("catalog.source_db.pes_prep")
except Exception as e:
    logger.error(f"Error loading data from Unity Catalog: {e}")
    raise

# Apply transformations
try:
    # Node 495: Field Selection
    selected_fields_df = pes_prep_df.select(
        "PO business unit code", "PO LE code", "PO MRC code", "PO site code",
        "PO source system", "Posting date", "Region", "Sector", "Site code", "Vision sourced data"
    )

    # Node 404: Rename fields
    renamed_df = pjotr_df.withColumnRenamed("pjotr_id", "PJOTR ID") \
                         .withColumnRenamed("pjotr", "PJOTR") \
                         .withColumnRenamed("pjotr_status", "PJOTR status") \
                         .withColumnRenamed("pjotr_in_pes", "PJOTR in PES") \
                         .withColumnRenamed("procurement_sector", "Procurement sector")

    # Node 409: Custom Calculations
    custom_calculations_df = renamed_df.withColumn("procurement_country_code", F.lit("US")) \
                                       .withColumn("notes", F.lit("Note")) \
                                       .withColumn("porg", F.lit("POrg")) \
                                       .withColumn("le_code", F.lit("LE Code")) \
                                       .withColumn("le_name", F.lit("LE Name")) \
                                       .withColumn("mrc_code", F.lit("MRC Code")) \
                                       .withColumn("mrc_name", F.lit("MRC Name"))

    # Node 248: MultiField Formula
    transformed_df = selected_fields_df.withColumn("Business unit code", F.concat(F.lit("BU_"), F.col("Business unit code"))) \
                                       .withColumn("FMRC code", F.concat(F.lit("FMRC_"), F.col("FMRC code"))) \
                                       .withColumn("FSID code", F.concat(F.lit("FSID_"), F.col("FSID code")))

    # Node 100: Summarize
    summarized_df = transformed_df.groupBy("LE code").agg(
        F.count("LE code").alias("LE_code_count")
    )

    # Node 386: Join
    joined_df = custom_calculations_df.join(summarized_df, custom_calculations_df["LE code"] == summarized_df["LE code"], "inner")

    # Node 470: Filter
    filtered_df = joined_df.filter(joined_df["BU code"] == "BU_001")

    # Node 389: Union
    union_df = filtered_df.unionAll(joined_df)

    # Node 223: Summarize
    final_summarized_df = union_df.groupBy("LE code", "MRC code").agg(
        F.first("LE name").alias("First_LE_name"),
        F.first("MRC name").alias("First_MRC_name"),
        F.sum("Spend").alias("Total_Spend"),
        F.sum("Records").alias("Total_Records")
    )

    # Write to Unity Catalog target table
    target_catalog = "catalog_name"
    target_schema = "schema_name"
    target_table = "final_table"

    spark.sql(f"CREATE SCHEMA IF NOT EXISTS {target_catalog}.{target_schema}")
    logger.info(f"Schema {target_catalog}.{target_schema} ensured")

    final_summarized_df.write.format("delta").mode("overwrite").saveAsTable(f"{target_catalog}.{target_schema}.{target_table}")
    logger.info(f"Data written to {target_catalog}.{target_schema}.{target_table}")

except Exception as e:
    logger.error(f"Error during transformation or writing: {e}")
    raise
