### IMPORT LIBRARIES

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql import *
from delta.tables import DeltaTable

### READ DATA

In [0]:
src_df = spark.read.format('csv') \
                 .option("header","true") \
                 .option("inferSchema","true") \
                 .load('/FileStore/SCDsrc')

src_df.display()

### TRANSFORMED DATA

In [0]:
src_df = src_df.select('p_id','p_name','p_category','update_dt').filter(col('p_id').isNotNull())
src_df.display()

### CREATE WIDGET

In [0]:
dbutils.widgets.text("initial_run","0")

### WRITE DATA INTO DELTA TABLE

In [0]:
if dbutils.widgets.get("initial_run") == "1":

    deltatable = DeltaTable.forPath(spark, '/FileStore/SCDdest/prod_dim')

    deltatable.alias('dest').merge(src_df.alias('src'),"dest.p_id = src.p_id and dest.flag = 'Y' and dest.end_date is null" ) \
                            .whenMatchedUpdate(
                                condition = "src.p_name != dest.p_name or src.p_category != dest.p_category",
                                set = {
                                    "end_date" : col('src.update_dt').cast('date'),
                                    "flag" : lit('N')
                                }
                            ) \
                            .whenNotMatchedInsert(
                                values = {
                                    "p_id" : col('src.p_id'),
                                    "p_name" : col('src.p_name'),
                                    "p_category" : col('src.p_category'),
                                    "update_dt" : col('src.update_dt'),
                                    "start_date" : col('src.update_dt'),
                                    "end_date" : lit(None).cast("date"),
                                    "flag" : lit("Y")
                                }
                            ) \
                            .execute()

else:
    intial_df = src_df.withColumn("start_date", col('update_dt')) \
                           .withColumn("end_date", lit(None).cast("date")) \
                           .withColumn("flag", lit("Y"))

    intial_df.write.format('delta') \
            .mode('overwrite') \
            .option('path','/FileStore/SCDdest/prod_dim') \
            .saveAsTable('Product_dim')

In [0]:
src_df.createOrReplaceTempView('src_table')

In [0]:
%sql
INSERT INTO Product_dim
SELECT
  s.p_id,
  s.p_name,
  s.p_category,
  s.update_dt,
  s.update_dt as start_date,
  NULL as end_date,
  'Y' as flag
FROM src_table s
LEFT JOIN Product_dim p
ON s.p_id = p.p_id
WHERE p.flag = 'N'

### CHECK DATA

In [0]:
%sql
select * from Product_dim order by p_id asc,update_dt desc

### DELETE FILE AFTER EACH RUN

In [0]:
dbutils.fs.rm('/FileStore/SCDsrc',True)