In [0]:
# # SILVER CLEANUP ONLY: Tables + Checkpoints

# dbutils.widgets.text(name = "env", defaultValue = '', label = "Enter the environment in lower case")
# env = dbutils.widgets.get("env")

# print(f"Starting SILVER CLEANUP for environment: {env}")

# # Load commons for checkpoint base path
# %run "./commons"

# # --------------------------------
# # 1. DELETE SILVER CHECKPOINTS
# # --------------------------------

# print("Deleting SILVER streaming checkpoints...")

# silver_checkpoint_paths = [
#     f"{checkpoint}/SilverOrdersLoad/Checkpt",
#     f"{checkpoint}/SilverOrderDetailsLoad/Checkpt"
# ]

# for path in silver_checkpoint_paths:
#     try:
#         dbutils.fs.rm(path, recurse=True)
#         print(f"Deleted checkpoint: {path}")
#     except:
#         print(f"Checkpoint not found (skipped): {path}")

# # --------------------------------
# # 2. DROP SILVER TABLES ONLY
# # --------------------------------

# print("Dropping SILVER tables...")

# silver_tables = [
#     "silver_orders",
#     "silver_orderdetails"
# ]

# for table in silver_tables:
#     spark.sql(f"DROP TABLE IF EXISTS `{env}_catalog`.`silver`.`{table}`")
#     print(f"Dropped silver table: {table}")

# print("SILVER CLEANUP COMPLETED!")

In [0]:
dbutils.widgets.text(name = "env", defaultValue = '', label = 'Enter the environment in lower case')
env = dbutils.widgets.get("env")

In [0]:
%run "./commons"

In [0]:
from pyspark.sql.functions import current_timestamp, col, to_date

## Read 2 fact tables (Orders, OrderDetails)

In [0]:
def read_bronze_table(environment, table_name):
    print(f"Reading Bronze table {table_name}: ", end = '')
    df = spark.readStream.table(f"`{environment}_catalog`.`bronze`.{table_name}")
    print("Success!")

    return df

In [0]:
df_bronze_orders = read_bronze_table(env, "raw_orders")
df_bronze_orderdetails = read_bronze_table(env, "raw_orderdetails")

## Clean & Transform 2 fact tables (Orders, OrderDetails)

In [0]:
def transform_orders(df):
    print("Transforming Orders: Parsing OrderDate, adding Transformed_Time: ", end = '')
    df_no_dups = remove_Dups(df)
    df_clean = handle_NULLs(df_no_dups, df_no_dups.schema.names)
    df_typed = df_clean.withColumn("OrderDateParsed", to_date(col("OrderDate"), "yyyy/MM/dd"))
    df_final = df_typed.withColumn("Transformed_Time", current_timestamp())
    print("Success!")

    return df_final


def transform_orderdetails(df):
    print("Transforming OrderDetails: computing LineSalesAmount, Transformed_Time: ", end = '')
    df_no_dups = remove_Dups(df)
    df_clean = handle_NULLs(df_no_dups, df_no_dups.schema.names)
    df_measures = df_clean.withColumn("LineSalesAmount", col("OrderItemQuantity") * col("PerUnitPrice"))
    df_final = df_measures.withColumn("Transformed_Time", current_timestamp())
    print("Success!")

    return df_final


def write_to_silver(streaming_df, environment, table_name, chk_subdir):
    print(f"Writing Silver table {table_name}: ", end='')

    q = (streaming_df.writeStream
        .format("delta")
        .option("checkpointLocation", f"{checkpoint}/{chk_subdir}/Checkpt")
        .outputMode("append")
        .queryName(f"Silver_{table_name}_WriteStream")
        .trigger(availableNow=True)
        .toTable(f"`{environment}_catalog`.`silver`.`{table_name}`"))
    
    q.awaitTermination()

    print("Success!")

In [0]:
df_orders_silver = transform_orders(df_bronze_orders)
df_orderdetails_silver = transform_orderdetails(df_bronze_orderdetails)

In [0]:
write_to_silver(df_orders_silver, env, "silver_orders", "SilverOrdersLoad")
write_to_silver(df_orderdetails_silver, env, "silver_orderdetails", "SilverOrderDetailsLoad")

## Display sample data in silver tables

In [0]:
display(spark.sql(f"SELECT * FROM `{env}_catalog`.`silver`.`silver_orders` LIMIT 10"))
display(spark.sql(f"SELECT * FROM `{env}_catalog`.`silver`.`silver_orderdetails` LIMIT 10"))

In [0]:
display(spark.sql(f"SELECT COUNT(*) FROM `{env}_catalog`.`silver`.`silver_orders`"))
display(spark.sql(f"SELECT COUNT(*) FROM `{env}_catalog`.`silver`.`silver_orderdetails`"))