In [None]:
import logging
from datetime import datetime
import psycopg2
from pyspark.sql import functions as F

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Step 1: Load Data Sources
try:
    # Load static text input for Node 69
    dist_chnl_df = spark.table("catalog.source_db.dist_chnl")

    # Load manual date input for Node 24
    manual_date_df = spark.table("catalog.source_db.manual_date")

    # Load data from SQL Server for Node 48
    sql_server_conn_info = {
        "host": dbutils.secrets.get("sql_server_scope", "host"),
        "database": dbutils.secrets.get("sql_server_scope", "database"),
        "user": dbutils.secrets.get("sql_server_scope", "user"),
        "password": dbutils.secrets.get("sql_server_scope", "password")
    }
    conn = psycopg2.connect(
        host=sql_server_conn_info["host"],
        database=sql_server_conn_info["database"],
        user=sql_server_conn_info["user"],
        password=sql_server_conn_info["password"]
    )
    sql_query = "SELECT * FROM OCT_TC3"
    oct_tc3_df = spark.read.format("jdbc").option("url", f"jdbc:postgresql://{sql_server_conn_info['host']}/{sql_server_conn_info['database']}").option("query", sql_query).option("user", sql_server_conn_info["user"]).option("password", sql_server_conn_info["password"]).load()

    # Load dynamic inputs for Nodes 18, 77, 89, 86, 84
    dynamic_week_dfs = []
    for week_node in ["week1", "week2", "week3", "week4", "week5"]:
        dynamic_week_df = spark.table(f"catalog.source_db.dynamic_input_{week_node}")
        dynamic_week_dfs.append(dynamic_week_df)

    logger.info("Data sources loaded successfully.")
except Exception as e:
    logger.error(f"Error loading data sources: {e}")
    raise

# Step 2: Implement Transformations
try:
    # Node 19: DateTimeNow
    current_date_df = spark.createDataFrame([(datetime.now(),)], ["CurrentDate"])

    # Node 20: Format
    formatted_date_df = current_date_df.withColumn("DateTime_Out", F.date_format("CurrentDate", "yyyy-MM-dd HH:mm:ss"))

    # Node 101: Summarize
    summarize_df = oct_tc3_df.groupBy("BILL_DATE").agg(F.sum("INVOICES").alias("Sum_Invoices"))

    # Node 72: Start / End
    start_end_df = manual_date_df.withColumn("Prior_Week_Start", F.date_sub("Start_Date", 7)).withColumn("Prior_Week_End", F.date_sub("End_Date", 7))

    # Node 73: Alteryx Select
    select_df = start_end_df.select("Start_Date", "End_Date")

    # Node 23 & 63: DateTime Conversion
    datetime_conversion_df = select_df.withColumn("StartTXT", F.date_format("Start_Date", "yyyy-MM-dd")).withColumn("EndTXT", F.date_format("End_Date", "yyyy-MM-dd"))

    # Node 66: ReName
    rename_df = datetime_conversion_df.withColumnRenamed("StartTXT", "Run_Date")

    # Node 88: Formula: 2WK START AND END
    formula_df = rename_df.withColumn("END_1WK", F.date_sub("Run_Date", 7)).withColumn("START_2WK", F.date_sub("Run_Date", 14))

    # Node 22: Select Tool
    select_tool_df = formula_df.select("Run_Date")

    # Node 78: Union
    union_df = dynamic_week_dfs[0]
    for df in dynamic_week_dfs[1:]:
        union_df = union_df.union(df)

    # Node 27: Alteryx Select
    alteryx_select_df = union_df.select("*")

    # Node 40: FIELD NAMES
    field_names_df = alteryx_select_df.withColumnRenamed("Unknown", "Renamed_Unknown")

    # Node 67: MultiFieldFormula
    multifield_formula_df = field_names_df.fillna("N/A")

    # Node 44: Cleanse
    cleanse_df = multifield_formula_df.select([F.upper(F.col(c)).alias(c) for c in multifield_formula_df.columns])

    # Node 60: Output Preparation
    output_preparation_df = cleanse_df

    logger.info("Transformations implemented successfully.")
except Exception as e:
    logger.error(f"Error during transformations: {e}")
    raise

# Step 3: Write Output
try:
    # Ensure schema exists before creating table
    target_catalog = "catalog_name"
    target_schema = "schema_name"
    target_table = "output_table"

    spark.sql(f"CREATE SCHEMA IF NOT EXISTS {target_catalog}.{target_schema}")
    logger.info(f"Schema {target_catalog}.{target_schema} ensured")

    # Write to Unity Catalog target table (overwrite mode handles table replacement)
    output_preparation_df.write.format("delta").mode("overwrite").saveAsTable(f"{target_catalog}.{target_schema}.{target_table}")
    logger.info("Output written successfully.")
except Exception as e:
    logger.error(f"Error writing output: {e}")
    raise
