In [22]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from delta.tables import DeltaTable
from pyspark.sql.window import Window

#### *Incremnetal Load Date*

In [23]:
source_schema = 'silver_sales'

target_schema = 'gold_sales'

backdate = ''

cdc_col = 'ModifiedDate'

In [24]:
if spark.catalog.tableExists(f"{target_schema}.FactOrderDetail"):
    if len(backdate) == 0:
        last_load_date = spark.sql(f"""
                                SELECT MAX({cdc_col}) FROM {target_schema}.FactOrderDetail
                                """).collect()[0][0]
    else:
        last_load_date = backdate
else:
    last_load_date = '1900-01-01 00:00:00'

last_load_date

In [25]:
df_src = spark.sql(f"""
                    SELECT * FROM {source_schema}.salesorderdetail 
                    WHERE {cdc_col} > '{last_load_date}'
                    """)

df_src.createOrReplaceTempView("src")

df_src.count()

In [26]:
df_fact_src = spark.sql("""
                Select 
                    sh.SalesOrderID,
                    sh.SalesOrderDetailID,
                    sh.CarrierTrackingNumber,
                    sh.OrderQty,
                    dp.DimProductKey,
                    sh.SpecialOfferID, sh.UnitPrice, sh.UnitPriceDiscount, sh.LineTotal, sh.ModifiedDate
                FROM
                src sh
                LEFT JOIN gold_sales.dimproduct dp
                    ON sh.ProductID = dp.ProductID
                    AND sh.ModifiedDate >= dp.StartDate
                    AND sh.ModifiedDate < dp.EndDate
                """)

In [27]:
if spark.catalog.tableExists(f"{target_schema}.FactOrderDetail"):
    dlt_obj = DeltaTable.forName(spark, f"{target_schema}.FactOrderDetail")
    dlt_obj.alias("trg").merge(df_fact_src.alias("src"), "trg.SalesOrderID = src.SalesOrderID AND trg.SalesOrderDetailID = src.SalesOrderDetailID")\
                        .whenMatchedUpdateAll(condition=f"src.{cdc_col} > trg.{cdc_col}")\
                        .whenNotMatchedInsertAll()\
                        .execute()
else:
    df_fact_src.write.format('delta')\
                .mode('append')\
                .option('path', 'abfss://gold@dlcontoso.dfs.core.windows.net/sales/FactOrderDetail')\
                .saveAsTable('gold_sales.FactOrderDetail')