In [0]:



import dlt
from pyspark.sql.types import StructType, StructField, StringType, LongType, DoubleType, TimestampType
from pyspark.sql.functions import col, to_timestamp, current_timestamp


print("Libraries and functions loaded successfully.")


In [0]:

# Define Streaming Schema (Same as Bronze Data)


sales_schema = StructType([
    StructField("transaction_id", StringType(), True),
    StructField("product_id", LongType(), True),
    StructField("store_id", LongType(), True),
    StructField("quantity_sold", LongType(), True),
    StructField("sale_amount", DoubleType(), True),
    StructField("transaction_time", StringType(), True)
])

print("Schema defined successfully.")


In [0]:
# Define Paths
bronze_streaming_path = "/mnt/realtimedeai/bronze/streaming/"


In [0]:

@dlt.table(
    name="streaming_sales",
    comment="Auto-ingested streaming sales data from Bronze to Silver using DLT"
)
def streaming_sales():
    # Read and transform streaming data
    return (
        spark.readStream
            .format("parquet")
            .schema(sales_schema)
            .load(bronze_streaming_path)
            .dropDuplicates(["transaction_id"])
            .filter(
                (col("quantity_sold") > 0) &
                (col("sale_amount") > 0) &
                (col("transaction_id").isNotNull()) &
                (col("product_id").isNotNull()) &
                (col("store_id").isNotNull())
            )
            .withColumn("transaction_time", to_timestamp(col("transaction_time"), "yyyy-MM-dd'T'HH:mm:ss.SSSSSS"))
            .withColumn("processed_time", current_timestamp())
    )
