In [None]:
import logging
import psycopg2
from pyspark.sql import functions as F

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Securely retrieve credentials for external systems
db_host = dbutils.secrets.get(scope="my_scope", key="db_host")
db_port = dbutils.secrets.get(scope="my_scope", key="db_port")
db_name = dbutils.secrets.get(scope="my_scope", key="db_name")
db_user = dbutils.secrets.get(scope="my_scope", key="db_user")
db_password = dbutils.secrets.get(scope="my_scope", key="db_password")

# Function to connect to PostgreSQL and fetch data
def fetch_data_from_postgresql(query):
    try:
        conn = psycopg2.connect(
            host=db_host,
            port=db_port,
            database=db_name,
            user=db_user,
            password=db_password
        )
        cursor = conn.cursor()
        cursor.execute(query)
        data = cursor.fetchall()
        cursor.close()
        conn.close()
        return data
    except Exception as e:
        logger.error(f"Error fetching data from PostgreSQL: {e}")
        raise

# Load data from Unity Catalog tables
try:
    text_input_df = spark.table("catalog.source_db.text_input_channel")
    manual_date_df = spark.table("catalog.source_db.manual_date_input")
    oct_tc3_df = spark.table("catalog.source_db.oct_tc3")
    week1_df = spark.table("catalog.source_db.week1")
    week2_df = spark.table("catalog.source_db.week2")
    week3_df = spark.table("catalog.source_db.week3")
    week4_df = spark.table("catalog.source_db.week4")
    week5_df = spark.table("catalog.source_db.week5")
    logger.info("Data loaded from Unity Catalog tables")
except Exception as e:
    logger.error(f"Error loading data from Unity Catalog tables: {e}")
    raise

# Transformation: Cleanse
def cleanse(df):
    for col in df.columns:
        df = df.withColumn(col, F.upper(F.col(col)))
    return df

# Apply cleansing transformation
try:
    cleansed_df = cleanse(oct_tc3_df)
    logger.info("Cleansing transformation applied")
except Exception as e:
    logger.error(f"Error during cleansing transformation: {e}")
    raise

# Transformation: Rush_Fee
try:
    rush_fee_df = cleansed_df.withColumn("Rush_Order_Fee", F.col("ADDTN_TRANS_FEE_OVRRIDE_ZSRO"))
    logger.info("Rush_Fee transformation applied")
except Exception as e:
    logger.error(f"Error during Rush_Fee transformation: {e}")
    raise

# Transformation: Summarize
try:
    summarize_df = rush_fee_df.groupBy().agg(F.sum("Rush_Order_Fee").alias("Sum_Rush_Order_Fee"))
    logger.info("Summarize transformation applied")
except Exception as e:
    logger.error(f"Error during Summarize transformation: {e}")
    raise

# Transformation: Dynamic Rename
def dynamic_rename(df):
    for col in df.columns:
        if col.startswith("Sum_"):
            df = df.withColumnRenamed(col, col.replace("Sum_", ""))
    return df

# Apply dynamic rename transformation
try:
    renamed_df = dynamic_rename(summarize_df)
    logger.info("Dynamic Rename transformation applied")
except Exception as e:
    logger.error(f"Error during Dynamic Rename transformation: {e}")
    raise

# Transformation: Append Fields
try:
    appended_df = renamed_df.unionByName(week1_df).unionByName(week2_df).unionByName(week3_df).unionByName(week4_df).unionByName(week5_df)
    logger.info("Append Fields transformation applied")
except Exception as e:
    logger.error(f"Error during Append Fields transformation: {e}")
    raise

# Transformation: Total Shipping and Handling
try:
    total_shipping_df = appended_df.withColumn("BIA_SHIP_HNDL_AMT", F.col("EXTND_SHPNG_HNDLNG") + F.col("COE_SHIP_HNDL_AMT"))
    logger.info("Total Shipping and Handling transformation applied")
except Exception as e:
    logger.error(f"Error during Total Shipping and Handling transformation: {e}")
    raise

# Transformation: Formula: Invoice_Sales
try:
    invoice_sales_df = total_shipping_df.withColumn("Invoice_Sales", F.col("Sum_EXT_FINAL_PRICE"))
    logger.info("Formula: Invoice_Sales transformation applied")
except Exception as e:
    logger.error(f"Error during Formula: Invoice_Sales transformation: {e}")
    raise

# Transformation: Join
try:
    joined_df = invoice_sales_df.join(text_input_df, ["DIST_CHNL_ID", "DIST_CHNL"], "inner")
    logger.info("Join transformation applied")
except Exception as e:
    logger.error(f"Error during Join transformation: {e}")
    raise

# Transformation: Union
try:
    union_df = joined_df.unionByName(manual_date_df)
    logger.info("Union transformation applied")
except Exception as e:
    logger.error(f"Error during Union transformation: {e}")
    raise

# Transformation: Formula
try:
    formula_df = union_df.withColumn("null_yn", F.when(F.col("DIST_CHNL_ID").isNull(), "Y").otherwise("N"))
    logger.info("Formula transformation applied")
except Exception as e:
    logger.error(f"Error during Formula transformation: {e}")
    raise

# Transformation: Filter Tool
try:
    filtered_df = formula_df.filter(F.col("null_yn") == "Y")
    logger.info("Filter Tool transformation applied")
except Exception as e:
    logger.error(f"Error during Filter Tool transformation: {e}")
    raise

# Transformation: UPDATE NULL
try:
    update_null_df = filtered_df.withColumn("FNC_ID", F.when(F.col("FNC_ID").isNull(), "OTH").otherwise(F.col("FNC_ID"))) \
                                .withColumn("FNC_DESC", F.when(F.col("FNC_DESC").isNull(), "OTHER").otherwise(F.col("FNC_DESC")))
    logger.info("UPDATE NULL transformation applied")
except Exception as e:
    logger.error(f"Error during UPDATE NULL transformation: {e}")
    raise

# Transformation: Union
try:
    final_union_df = update_null_df.unionByName(joined_df)
    logger.info("Final Union transformation applied")
except Exception as e:
    logger.error(f"Error during Final Union transformation: {e}")
    raise

# Transformation: Select Tool
try:
    select_tool_df = final_union_df.select("SO_Date", "BILL_DATE", "Whs", "DIST_CHNL_ID", "FNC_ID", "FNC_DESC", "SOLDTO", "SHIPTO", "RFRNC_DOC_NUM", "Rush_Order_Fee", "Sum_Invoice_Lines", "Run Date", "StartTXT", "EndTXT", "Start Date", "START_2WK", "END_2WK", "End Date", "BIA_SHIP_HNDL_AMT", "COE_SHIP_HNDL_AMT", "Invoice_Sales", "DIST_CHNL", "DIST_CHNL_DESC")
    logger.info("Select Tool transformation applied")
except Exception as e:
    logger.error(f"Error during Select Tool transformation: {e}")
    raise

# Transformation: Alteryx Select
try:
    alteryx_select_df = select_tool_df.select("RunDTE", "SO_Date", "BILL_DATE", "Whs", "DIST_CHNL_ID", "FNC_ID", "FNC_DESC", "SOLDTO", "SHIPTO", "RFRNC_DOC_NUM", "Rush_Order_Fee", "COE_SHIP_HNDL_AMT")
    logger.info("Alteryx Select transformation applied")
except Exception as e:
    logger.error(f"Error during Alteryx Select transformation: {e}")
    raise

# Output to Unity Catalog
try:
    target_catalog = "catalog_name"
    target_schema = "schema_name"
    target_table = "output_table"

    # Ensure schema exists before creating table
    spark.sql(f"CREATE SCHEMA IF NOT EXISTS {target_catalog}.{target_schema}")
    logger.info(f"Schema {target_catalog}.{target_schema} ensured")

    # Write to Unity Catalog target table (overwrite mode handles table replacement)
    alteryx_select_df.write.format("delta").mode("overwrite").saveAsTable(f"{target_catalog}.{target_schema}.{target_table}")
    logger.info(f"Data written to {target_catalog}.{target_schema}.{target_table}")
except Exception as e:
    logger.error(f"Error writing data to Unity Catalog: {e}")
    raise

# Output to CSV
try:
    csv_path = "/mnt/output/CHresult.csv"
    alteryx_select_df.coalesce(1).write.csv(csv_path, header=True, mode="overwrite")
    logger.info(f"Data written to CSV at {csv_path}")
except Exception as e:
    logger.error(f"Error writing data to CSV: {e}")
    raise
