In [None]:
import logging
from pyspark.sql import functions as F
from pyspark.sql.types import StringType, DateType, DecimalType
import psycopg2
from datetime import datetime, timedelta

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Securely retrieve credentials for external systems
db_user = dbutils.secrets.get(scope="my_scope", key="db_user")
db_password = dbutils.secrets.get(scope="my_scope", key="db_password")
db_host = dbutils.secrets.get(scope="my_scope", key="db_host")
db_port = dbutils.secrets.get(scope="my_scope", key="db_port")
db_name = dbutils.secrets.get(scope="my_scope", key="db_name")

# Connect to PostgreSQL database
try:
    conn = psycopg2.connect(
        dbname=db_name,
        user=db_user,
        password=db_password,
        host=db_host,
        port=db_port
    )
    cursor = conn.cursor()
    logger.info("Connected to PostgreSQL database.")
except Exception as e:
    logger.error(f"Error connecting to PostgreSQL: {e}")
    raise

# Load data from Unity Catalog tables
try:
    sales_df = spark.table("catalog.source_db.sales")
    line_totals_df = spark.table("catalog.source_db.line_totals")
    logger.info("Loaded data from Unity Catalog tables.")
except Exception as e:
    logger.error(f"Error loading data from Unity Catalog: {e}")
    raise

# Transformation: DateTime Conversion
sales_df = sales_df.withColumn("SO_AUDAT", F.to_date("SO_AUDAT", "yyyy-MM-dd"))
line_totals_df = line_totals_df.withColumn("FKDAT", F.to_date("FKDAT", "yyyy-MM-dd"))

# Transformation: Field Selection and Renaming
sales_df = sales_df.select(
    F.col("SO_AUDAT").alias("Sales_Order_Date"),
    F.col("WERKS").alias("Plant"),
    F.col("VTWEG").alias("Distribution_Channel"),
    F.col("ZZFINCLASS").alias("Financial_Class"),
    F.col("SOLDTO_KUNNR").alias("Sold_To_Customer"),
    F.col("SHIPTO_KUNNR").alias("Ship_To_Customer"),
    F.col("VGBEL").alias("Delivery_Note_Number")
)

# Custom Calculations
sales_df = sales_df.withColumn("Rush_Order_Fee", F.col("ADDTN_TRANS_FEE_OVRRIDE_ZSRO"))
line_totals_df = line_totals_df.withColumn("Total_Shipping_Handling", 
    F.col("Sum_Trans_Charge_Amt") + F.col("Sum_RESTOCK_Fee") + F.col("Sum_Special_Hndl_Amt") +
    F.col("Sum_Vendor_Hndl_Amt") + F.col("Sum_MOC_Amt") + F.col("Sum_Fuel_Surcharge")
)

# Multi-Field Formula: Replace null values
sales_df = sales_df.fillna(0)
line_totals_df = line_totals_df.fillna(0)

# Join and Union
combined_df = sales_df.join(line_totals_df, sales_df["Delivery_Note_Number"] == line_totals_df["VGBEL"], "inner")

# Filter: Exclude records with null primary key fields
filtered_df = combined_df.filter(F.col("FNC_ID").isNotNull() & F.col("FNC_ID") != "")

# Dynamic Rename: Remove prefixes
renamed_df = filtered_df.select([F.col(c).alias(c.replace("Sum_", "")) for c in filtered_df.columns])

# Write to Unity Catalog target table
try:
    spark.sql("DROP TABLE IF EXISTS catalog.target_db.final_output")
    renamed_df.write.format("delta").mode("overwrite").saveAsTable("catalog.target_db.final_output")
    logger.info("Data written to Unity Catalog target table.")
except Exception as e:
    logger.error(f"Error writing data to Unity Catalog: {e}")
    raise

# Close PostgreSQL connection
cursor.close()
conn.close()
logger.info("PostgreSQL connection closed.")
