In [None]:
import logging
from pyspark.sql import functions as F
from pyspark.sql.types import StringType

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Function to load data from MySQL
def load_mysql_data(query, connection_details):
    try:
        import psycopg2  # Import inside the function to avoid unnecessary import if not used
        conn = psycopg2.connect(**connection_details)
        cursor = conn.cursor()
        cursor.execute(query)
        data = cursor.fetchall()
        columns = [desc[0] for desc in cursor.description]
        cursor.close()
        conn.close()
        return spark.createDataFrame(data, schema=columns)
    except Exception as e:
        logger.error(f"Error loading data from MySQL: {e}")
        raise

# Load data from Unity Catalog tables
def load_unity_catalog_table(table_name):
    try:
        df = spark.table(table_name)
        logger.info(f"Loaded {df.count()} records from {table_name}")
        return df
    except Exception as e:
        logger.error(f"Error loading data from Unity Catalog table {table_name}: {e}")
        raise

# Load data from external files
def load_external_file(file_path):
    try:
        df = spark.read.format("yxdb").load(file_path)
        logger.info(f"Loaded {df.count()} records from {file_path}")
        return df
    except Exception as e:
        logger.error(f"Error loading data from file {file_path}: {e}")
        raise

# Transformation logic
def apply_transformations(df):
    try:
        # Example transformation: Trim and clean specific fields
        df = df.withColumn("_BusinessUnitCode", F.trim(F.col("BusinessUnitCode")))
        df = df.withColumn("_FMRCCode", F.trim(F.col("FMRCCode")))
        # Add more transformations as needed
        logger.info(f"Applied transformations, resulting in {df.count()} records")
        return df
    except Exception as e:
        logger.error(f"Error applying transformations: {e}")
        raise

# Join operation
def perform_join(df1, df2, join_condition):
    try:
        joined_df = df1.join(df2, join_condition, "inner")
        logger.info(f"Performed join, resulting in {joined_df.count()} records")
        return joined_df
    except Exception as e:
        logger.error(f"Error performing join: {e}")
        raise

# Write data to Unity Catalog table
def write_to_unity_catalog(df, catalog, schema, table):
    try:
        spark.sql(f"CREATE SCHEMA IF NOT EXISTS {catalog}.{schema}")
        df.write.format("delta").mode("overwrite").saveAsTable(f"{catalog}.{schema}.{table}")
        logger.info(f"Written data to {catalog}.{schema}.{table}")
    except Exception as e:
        logger.error(f"Error writing data to Unity Catalog table {catalog}.{schema}.{table}: {e}")
        raise

# Main ETL workflow
def etl_workflow():
    try:
        # Load data from MySQL
        try:
            mysql_connection_details = {
                "host": dbutils.secrets.get("mysql", "host"),
                "database": dbutils.secrets.get("mysql", "database"),
                "user": dbutils.secrets.get("mysql", "user"),
                "password": dbutils.secrets.get("mysql", "password")
            }
        except Exception as e:
            logger.error(f"Error retrieving MySQL connection details from Databricks secrets: {e}")
            raise

        mysql_df = load_mysql_data("SELECT * FROM pjotr_prod", mysql_connection_details)

        # Load data from Unity Catalog tables
        text_input_df = load_unity_catalog_table("catalog.source_db.text_input")
        bseg_df = load_unity_catalog_table("catalog.source_db.c04_bseg")
        ekpo_df = load_unity_catalog_table("catalog.source_db.c04_ekpo")
        pes_df = load_unity_catalog_table("catalog.source_db.pjotr_in_pes")
        ivl_data_df = load_unity_catalog_table("catalog.source_db.c19_ivl_data")
        mysql_editable_df = load_unity_catalog_table("catalog.source_db.mysql_editable")
        pjotr_df = load_unity_catalog_table("catalog.source_db.pjotr")
        pes_prep_df = load_unity_catalog_table("catalog.source_db.pes_prep")

        # Apply transformations
        transformed_df = apply_transformations(mysql_df)

        # Perform join operations
        joined_df = perform_join(transformed_df, pes_df, transformed_df["_LECode"] == pes_df["LECode"])

        # Write output to Unity Catalog tables
        write_to_unity_catalog(joined_df, "catalog_name", "schema_name", "c03_pjotr")
        write_to_unity_catalog(joined_df, "catalog_name", "schema_name", "c03_pjotr_midway")
        write_to_unity_catalog(joined_df, "catalog_name", "schema_name", "c03_unmapped_to_pjotr")
        write_to_unity_catalog(joined_df, "catalog_name", "schema_name", "c03_pjotr_final")
        write_to_unity_catalog(joined_df, "catalog_name", "schema_name", "pjotr_in_pes")

    except Exception as e:
        logger.error(f"Error in ETL workflow: {e}")
        raise

# Execute the ETL workflow
etl_workflow()

# Corrected Code Explanation:
# 1. The error was due to missing secrets in the Databricks environment. Ensure that the secrets are correctly set up in Databricks.
# 2. Added error handling for secret retrieval to log specific errors related to missing secrets.
# 3. The rest of the code remains unchanged as it was successfully reviewed and approved.
# 4. Ensure that the Unity Catalog tables and schemas exist before attempting to load or write data.
# 5. Added logging to capture successful operations and errors for better observability.
