In [0]:
import dlt
from pyspark import pipelines as dp
from pyspark.sql.functions import *
from pyspark.sql.types import *

In [0]:
@dlt.view
def silver_patients_cdf_type2_stage():
    df = spark.readStream.table("LIVE.patients_type2_stage")
    df = df.withColumn("start_dt", col("_commit_timestamp"))
    df = df.withColumn("end_dt", lit(None).cast("timestamp"))
    df = df.withColumn("is_active", lit("Y"))
    df = df.withColumn("load_dt", current_timestamp())
    return df

@dlt.view
def silver_providers_cdf_type2_stage():
    df = spark.readStream.table("LIVE.providers_type2_stage")
    df = df.withColumn("start_dt", col("_commit_timestamp"))
    df = df.withColumn("end_dt", lit(None).cast("timestamp"))
    df = df.withColumn("is_active", lit("Y"))
    df = df.withColumn("load_dt", current_timestamp())
    return df

@dlt.view
def silver_encounters_cdf_type2_stage():
    df = spark.readStream.table("LIVE.encounters_type2_stage")
    df = df.withColumn("start_dt", col("_commit_timestamp"))
    df = df.withColumn("end_dt", lit(None).cast("timestamp"))
    df = df.withColumn("is_active", lit("Y"))
    df = df.withColumn("load_dt", current_timestamp())
    return df

@dlt.view
def silver_conditions_cdf_type2_stage():
    df = spark.readStream.table("LIVE.conditions_type2_stage")
    df = df.withColumn("start_dt", col("_commit_timestamp"))
    df = df.withColumn("end_dt", lit(None).cast("timestamp"))
    df = df.withColumn("is_active", lit("Y"))
    df = df.withColumn("load_dt", current_timestamp())
    return df

In [0]:
dlt.create_streaming_table("gold_patients_cdf_streaming_type2")
dlt.apply_changes(
    target = "gold_patients_cdf_streaming_type2",
    source = "silver_patients_cdf_type2_stage",
    keys = ["patient_id"],
    sequence_by = "start_dt",
    ignore_null_updates = True,
    except_column_list = ["_change_type", "_commit_version", "__START_AT", "__END_AT"],
    stored_as_scd_type = 1
)

dlt.create_streaming_table("gold_providers_cdf_streaming_type2")
dlt.apply_changes(
    target = "gold_providers_cdf_streaming_type2",
    source = "silver_providers_cdf_type2_stage",
    keys = ["doctor_id"],
    sequence_by = struct("_commit_timestamp"),
    ignore_null_updates = True,
    apply_as_deletes = expr("_change_type = 'delete'"),
    except_column_list = ["_change_type","_commit_version", "__START_AT", "__END_AT"],
    stored_as_scd_type = 1
)

dlt.create_streaming_table("gold_encounters_cdf_streaming_type2")
dlt.apply_changes(
    target = "gold_encounters_cdf_streaming_type2",
    source = "silver_encounters_cdf_type2_stage",
    keys = ["visit_id"],
    sequence_by = struct("_commit_timestamp"),
    ignore_null_updates = True,
    apply_as_deletes = expr("_change_type = 'delete'"),
    except_column_list = ["_change_type","_commit_version", "__START_AT", "__END_AT"],
    stored_as_scd_type = 1
)

dlt.create_streaming_table("gold_conditions_cdf_streaming_type2")
dlt.apply_changes(
    target = "gold_conditions_cdf_streaming_type2",
    source = "silver_conditions_cdf_type2_stage",
    keys = ["visit_id", "DiagnosisCode"],
    sequence_by = struct("_commit_timestamp"),
    ignore_null_updates = True,
    apply_as_deletes = expr("_change_type = 'delete'"),
    except_column_list = ["_change_type","_commit_version", "__START_AT", "__END_AT"],
    stored_as_scd_type = 1
)