In [0]:
%python
import dlt
from pyspark.sql.functions import col, current_timestamp
 
@dlt.view
def business_da_src():
    return (
        spark.readStream.table("workspace.food_inspection_project.silver_food_inspection_dallas")
        .where(col("restaurant_name").isNotNull())
        .selectExpr(
            "restaurant_name as business_key",     
            "restaurant_name",
            "street_address",
            "current_timestamp() AS event_ts"
        )
    )
 

dlt.create_streaming_table("business_da_type2_stage")
 
dlt.apply_changes(
    target="business_da_type2_stage",
    source="business_da_src",
    keys=["business_key"],
    sequence_by="event_ts",
    stored_as_scd_type=2,
    ignore_null_updates=True
)
 
@dlt.table(name="dim_business_da_scd2")
def dim_business_da_scd2():
    df = dlt.read("business_da_type2_stage")
 
    return (
        df
        .withColumnRenamed("__START_AT",   "effective_start_dt")
        .withColumnRenamed("__END_AT",     "effective_end_dt")
        .withColumnRenamed("__IS_CURRENT", "is_current")
        .withColumnRenamed("id", "business_key")
    )

In [0]:
@dlt.table(name="dim_business_da_scd2_v2")
def dim_business_da_scd2():
    from pyspark.sql.window import Window
    from pyspark.sql.functions import row_number
 
    df = dlt.read("business_da_type2_stage")
 
    w = Window.orderBy("business_key", "__START_AT")
 
    df2 = (
        df
        .withColumn("row_num", row_number().over(w))
        .withColumnRenamed("__START_AT", "effective_start_dt")
        .withColumnRenamed("__END_AT", "effective_end_dt")
        .withColumnRenamed("__IS_CURRENT", "is_current")
    )
 
    return df2