In [0]:
from pyspark.sql import functions as F
import json

In [0]:
# Parameters extraction
Parameters = dbutils.widgets.get("Parameters")
Parameters = json.loads(Parameters)

ProcessInstanceId = 0
ProcessQueueId = 0
StageId = 0
TableName = ""

for p in Parameters:
    if p.get("TableName") == "Products":
        ProcessInstanceId = int(p.get("ProcessInstanceId"))
        ProcessQueueId = int(p.get("ProcessQueueId"))
        StageId = int(p.get("StageId"))
        TableName = str(p.get("TableName"))

In [0]:
# Mark current table as InProgress
if StageId == 3:
    spark.sql(f"""
        update control.processqueue
        set ProcessStatus = 'InProgress',
            ProcessStartTime = current_timestamp()
        where StageId = {StageId}
            and ProcessInstanceId = {ProcessInstanceId}
            and ProcessQueueId = {ProcessQueueId}
            and TableName = '{TableName}';
    """)
else:
    raise Exception(f"Stage Id is not relavent to R2B-transformation for table: {TableName}")

In [0]:

# Write tables to Silver
status = False

try:
    # Read source (bronze) table
    src_df = spark.table("workspace.bronze.products")

    # Select and add computed columns
    final_df = (
        src_df.select(
            "ProductId",
            "ProductName",
            "ProductCategory",
            "ProductPrice",
            F.current_timestamp().alias("LoadTimestamp"),
            F.lit("Source_CSV").alias("SourceSystem"),
        )
    )

    # Overwrite/create the Delta table at the target
    (final_df.write
        .format("delta")
        .mode("overwrite")            # mirrors CREATE OR REPLACE TABLE ... AS SELECT
        .option("overwriteSchema", "true")
        .saveAsTable("workspace.silver.products"))
    status = True

except Exception as e:
    print(f"Error: {e}")
    status = False


In [0]:
# Mark file as Success/Failed

if status == True:
    spark.sql(f"""
        UPDATE control.processqueue
        SET
            ProcessStatus = 'Succeeded',
            ProcessEndTime = current_timestamp(),
            ProcessDuration = CAST(
                (unix_timestamp(current_timestamp()) - unix_timestamp(ProcessStartTime)) / 60
                AS BIGINT
            )
        WHERE
            StageId = {StageId}
            AND ProcessInstanceId = {ProcessInstanceId}
            AND ProcessQueueId = {ProcessQueueId}
            AND TableName = '{TableName}'
            """)
    print(f"{TableName} Marked as Successful")
elif status == False:
        spark.sql(f"""
        UPDATE control.processqueue
        SET
            ProcessStatus = 'Failed',
            ProcessEndTime = current_timestamp(),
            ProcessDuration = CAST(
                (unix_timestamp(current_timestamp()) - unix_timestamp(ProcessStartTime)) / 60
                AS BIGINT
            )
        WHERE
            StageId = {StageId}
            AND ProcessInstanceId = {ProcessInstanceId}
            AND ProcessQueueId = {ProcessQueueId}
            AND TableName = '{TableName}'
            """)
        print(f"{TableName} Marked as Failed")
        raise Exception(f"Hard failure: {TableName} Failure detected")