# DAY 5 : Delta Lake Advanced

In [0]:
from pyspark.sql import functions as F
from delta.tables import DeltaTable

df = spark.read.table("workspace.ecommerce.oct_events_delta")

# Create your incremental updates
incremental_updates = df.limit(10).withColumn("price", F.col("price") * 1.2).withColumn("updated_at", F.current_timestamp())

# Display to verify
display(incremental_updates)

event_time,event_type,product_id,category_id,category_code,brand,price,user_id,user_session,updated_at
2019-10-15T16:25:52.000Z,view,21405695,2053013561579406073,electronics.clocks,casio,92.052,553727928,dc6c4641-15f9-4058-8b87-450527fda255,2026-01-14T14:38:10.644Z
2019-10-15T16:25:52.000Z,view,17300959,2053013553853497655,,montale,138.99599999999998,544842378,8ac19fd1-697b-4f4c-9120-d06af2d104f3,2026-01-14T14:38:10.644Z
2019-10-15T16:25:52.000Z,view,10101001,2053013553283072273,,,29.928,521785953,f5fd184f-f065-4949-bf50-0ad8b2618359,2026-01-14T14:38:10.644Z
2019-10-15T16:25:52.000Z,view,13300090,2053013557166998015,,vegas,256.992,548015241,e513c9ed-1b3e-4d7e-ad09-39ae3615b5e6,2026-01-14T14:38:10.644Z
2019-10-15T16:25:52.000Z,view,28711659,2053013565748544479,apparel.shoes,rieker,50.964,541534802,c9545511-4c80-45c6-ab93-4fee0e4a9b44,2026-01-14T14:38:10.644Z
2019-10-15T16:25:52.000Z,view,28101552,2053013564918072245,,erkaplan,315.072,524393933,0099a166-f8c2-4d8d-94c1-1d4e0db53669,2026-01-14T14:38:10.644Z
2019-10-15T16:25:52.000Z,view,5100564,2053013553341792533,electronics.clocks,samsung,311.976,560542953,cd92b6d5-3075-4e09-8b47-16649617f4c5,2026-01-14T14:38:10.644Z
2019-10-15T16:25:52.000Z,view,1307401,2053013558920217191,computers.notebook,asus,359.64,536294163,6b8e22bc-a6d3-43e8-9819-4c710adfac8f,2026-01-14T14:38:10.644Z
2019-10-15T16:25:52.000Z,view,1801881,2053013554415534427,electronics.video.tv,samsung,592.752,514736175,06419ad2-4af4-457f-9ff2-f87fe6279829,2026-01-14T14:38:10.644Z
2019-10-15T16:25:52.000Z,view,1005203,2053013555631882655,electronics.smartphone,xiaomi,144.528,527054725,02687311-a3a9-4a74-be66-11d18d003afa,2026-01-14T14:38:10.644Z


In [0]:
# Load the DeltaTable object needed for the Merge command
target_table = DeltaTable.forName(spark, "workspace.ecommerce.oct_events_delta")

(target_table.alias("target")
  .merge(
    incremental_updates.alias("source"),
    "target.product_id = source.product_id AND target.event_time = source.event_time"
  )
  .whenMatchedUpdate(set={"price": "source.price"})
  .whenNotMatchedInsertAll()
  .execute())

print("Incremental Merge Complete.")

Incremental Merge Complete.


In [0]:
%sql
DESCRIBE HISTORY workspace.ecommerce.oct_events_delta

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
3,2026-01-14T14:38:36.000Z,76058570651149,gomesrohit92@gmail.com,MERGE,"Map(predicate -> [""((product_id#13259 = product_id#13175) AND (event_time#13257 = event_time#13173))""], clusterBy -> [], matchedPredicates -> [{""actionType"":""update""}], statsOnLoad -> false, notMatchedBySourcePredicates -> [], notMatchedPredicates -> [{""actionType"":""insert""}])",,List(4418899387102987),0114-143607-x7gggmsd-v2n,2.0,WriteSerializable,False,"Map(numTargetRowsCopied -> 0, numTargetRowsDeleted -> 0, numTargetFilesAdded -> 1, numTargetBytesAdded -> 3350, numTargetBytesRemoved -> 0, numTargetDeletionVectorsAdded -> 1, numTargetRowsMatchedUpdated -> 10, executionTimeMs -> 8083, materializeSourceTimeMs -> 693, numTargetRowsInserted -> 0, numTargetRowsMatchedDeleted -> 0, numTargetDeletionVectorsUpdated -> 0, scanTimeMs -> 3076, numTargetRowsUpdated -> 10, numOutputRows -> 10, numTargetDeletionVectorsRemoved -> 0, numTargetRowsNotMatchedBySourceUpdated -> 0, numTargetChangeFilesAdded -> 0, numSourceRows -> 10, numTargetFilesRemoved -> 0, numTargetRowsNotMatchedBySourceDeleted -> 0, rewriteTimeMs -> 4175)",,Databricks-Runtime/17.3.x-aarch64-photon-scala2.13
2,2026-01-12T18:25:39.000Z,76058570651149,gomesrohit92@gmail.com,MERGE,"Map(predicate -> [""((product_id#14436 = product_id#13850) AND (event_time#14434 = event_time#13848))""], clusterBy -> [], matchedPredicates -> [{""actionType"":""update""}], statsOnLoad -> false, notMatchedBySourcePredicates -> [], notMatchedPredicates -> [{""actionType"":""insert""}])",,List(2217144857195530),0112-181520-515by3hp-v2n,1.0,WriteSerializable,False,"Map(numTargetRowsCopied -> 0, numTargetRowsDeleted -> 0, numTargetFilesAdded -> 1, numTargetBytesAdded -> 3034, numTargetBytesRemoved -> 0, numTargetDeletionVectorsAdded -> 1, numTargetRowsMatchedUpdated -> 5, executionTimeMs -> 7225, materializeSourceTimeMs -> 909, numTargetRowsInserted -> 0, numTargetRowsMatchedDeleted -> 0, numTargetDeletionVectorsUpdated -> 0, scanTimeMs -> 3013, numTargetRowsUpdated -> 5, numOutputRows -> 5, numTargetDeletionVectorsRemoved -> 0, numTargetRowsNotMatchedBySourceUpdated -> 0, numTargetChangeFilesAdded -> 0, numSourceRows -> 5, numTargetFilesRemoved -> 0, numTargetRowsNotMatchedBySourceDeleted -> 0, rewriteTimeMs -> 3100)",,Databricks-Runtime/17.3.x-aarch64-photon-scala2.13
1,2026-01-12T18:25:27.000Z,76058570651149,gomesrohit92@gmail.com,CREATE OR REPLACE TABLE AS SELECT,"Map(partitionBy -> [], clusterBy -> [], description -> null, isManaged -> true, properties -> {""delta.enableDeletionVectors"":""true""}, statsOnLoad -> true)",,List(2217144857195530),0112-181520-515by3hp-v2n,0.0,WriteSerializable,False,"Map(numFiles -> 43, numRemovedFiles -> 43, numRemovedBytes -> 1405244778, numDeletionVectorsRemoved -> 0, numOutputRows -> 42448764, numOutputBytes -> 1405244778)",,Databricks-Runtime/17.3.x-aarch64-photon-scala2.13
0,2026-01-12T18:21:41.000Z,76058570651149,gomesrohit92@gmail.com,CREATE TABLE AS SELECT,"Map(partitionBy -> [], clusterBy -> [], description -> null, isManaged -> true, properties -> {""delta.enableDeletionVectors"":""true""}, statsOnLoad -> true)",,List(2217144857195530),0112-181520-515by3hp-v2n,,WriteSerializable,True,"Map(numFiles -> 43, numOutputRows -> 42448764, numOutputBytes -> 1405244778)",,Databricks-Runtime/17.3.x-aarch64-photon-scala2.13
