In [None]:
import logging
import psycopg2
from pyspark.sql import functions as F

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Function to connect to PostgreSQL and fetch data
def fetch_data_from_postgres(query):
    try:
        # Retrieve credentials securely
        pg_host = dbutils.secrets.get(scope="my_scope", key="pg_host")
        pg_port = dbutils.secrets.get(scope="my_scope", key="pg_port")
        pg_dbname = dbutils.secrets.get(scope="my_scope", key="pg_dbname")
        pg_user = dbutils.secrets.get(scope="my_scope", key="pg_user")
        pg_password = dbutils.secrets.get(scope="my_scope", key="pg_password")

        # Connect to PostgreSQL
        conn = psycopg2.connect(
            host=pg_host,
            port=pg_port,
            dbname=pg_dbname,
            user=pg_user,
            password=pg_password
        )
        cursor = conn.cursor()
        cursor.execute(query)
        data = cursor.fetchall()
        cursor.close()
        conn.close()
        return data
    except Exception as e:
        logger.error(f"Error fetching data from PostgreSQL: {e}")
        raise

# Load data from Unity Catalog source tables
try:
    # Corrected table name with proper catalog and schema
    source_df = spark.table("catalog_name.source_db.source_table")
    logger.info("Source data loaded successfully from Unity Catalog")
except Exception as e:
    logger.error(f"Error loading source data: {e}")
    # Attempt to provide more information about the error
    current_schema = spark.sql("SELECT current_schema()").collect()[0][0]
    logger.error(f"Current schema is: {current_schema}. Please verify the table name and schema.")
    raise

# Perform transformations
try:
    # Example transformation: filter, join, and aggregate
    filtered_df = source_df.filter(F.col("column_name") > 100)
    logger.info("Data filtered successfully")

    # Assume we have a small dimension table for a broadcast join
    dim_df = spark.table("catalog_name.source_db.dimension_table")
    transformed_df = filtered_df.join(F.broadcast(dim_df), "key_column", "inner")
    logger.info("Data joined successfully")

    # Example aggregation
    aggregated_df = transformed_df.groupBy("group_column").agg(F.sum("value_column").alias("total_value"))
    logger.info("Data aggregated successfully")

    # Cache the intermediate DataFrame if beneficial
    aggregated_df.cache()
except Exception as e:
    logger.error(f"Error during data transformation: {e}")
    raise

# Write transformed data to Unity Catalog target table
try:
    target_catalog = "catalog_name"
    target_schema = "schema_name"
    target_table = "table_name"

    # Ensure schema exists before creating table
    spark.sql(f"CREATE SCHEMA IF NOT EXISTS {target_catalog}.{target_schema}")
    logger.info(f"Schema {target_catalog}.{target_schema} ensured")

    # Write to Unity Catalog target table (overwrite mode handles table replacement)
    aggregated_df.write.format("delta").mode("overwrite").saveAsTable(f"{target_catalog}.{target_schema}.{target_table}")
    logger.info(f"Data written successfully to {target_catalog}.{target_schema}.{target_table}")
except Exception as e:
    logger.error(f"Error writing data to target table: {e}")
    raise

# Corrected Code Explanation:
# 1. The error message indicated that the table or view could not be found. This was likely due to incorrect catalog or schema names.
# 2. The code now includes a try-except block to handle potential errors when loading the source data from Unity Catalog.
# 3. The schema creation step ensures that the target schema exists before attempting to write the data.
# 4. Enhanced logging provides more context about the current schema and potential issues with table names.
# 5. The code assumes that the table names and schema are correct, but if the error persists, further investigation into the catalog and schema names may be necessary.
