### Ingest sample order data into a Spark DataFrame.

In [0]:
# Ingest sample order data into a Spark DataFrame

from pyspark.sql.types import *

data=spark.read.format("csv").option("inferSchema","true").option("header",True).load("/Volumes/main/default/input-data/orders_1000.csv")

In [0]:
from pyspark.sql.functions import to_timestamp

data = data.withColumn(
    "order_timestamp",
    to_timestamp("order_timestamp", "dd-MM-yyyy HH:mm")
)


In [0]:
data.printSchema()

root
 |-- order_id: string (nullable = true)
 |-- order_timestamp: timestamp (nullable = true)
 |-- customer_id: string (nullable = true)
 |-- country: string (nullable = true)
 |-- amount: double (nullable = true)
 |-- currency: string (nullable = true)
 |-- status: string (nullable = true)



### Add a derived column order_date (date only from order_timestamp).

In [0]:
# Add a derived column order_date (date only from order_timestamp)

from pyspark.sql.functions import *

data=data.withColumn("order_date",to_date(col("order_timestamp")))

In [0]:
data.printSchema()

root
 |-- order_id: string (nullable = true)
 |-- order_timestamp: timestamp (nullable = true)
 |-- customer_id: string (nullable = true)
 |-- country: string (nullable = true)
 |-- amount: double (nullable = true)
 |-- currency: string (nullable = true)
 |-- status: string (nullable = true)
 |-- order_date: date (nullable = true)



### Write the DataFrame as a Delta table partitioned by country and order_date.

In [0]:
# Write the DataFrame as a Delta table partitioned by country and order_date.

data.write.format("delta").mode("overwrite").partitionBy("country","order_date").saveAsTable("main.default.orders")

### Verify the partition structure in the storage path.

In [0]:
%sql

-- Check the partitions
SHOW PARTITIONS main.default.orders;

country,order_date
AU,2024-05-23
CA,2024-10-24
US,2024-07-27
IN,2024-05-04
AU,2024-10-13
AU,2024-12-10
CA,2024-05-19
US,2024-01-01
AU,2024-01-28
IN,2024-02-26


In [0]:
%sql

-- Check the partitions
DESCRIBE DETAIL main.default.orders;

format,id,name,description,location,createdAt,lastModified,partitionColumns,clusteringColumns,numFiles,sizeInBytes,properties,minReaderVersion,minWriterVersion,tableFeatures,statistics,clusterByAuto
delta,20bd279b-379a-493f-b7e7-c9f261d12565,main.default.orders,,,2025-12-01T15:37:00.000Z,2025-12-01T15:45:18.000Z,"List(country, order_date)",List(),777,1388835,"Map(delta.parquet.compression.codec -> zstd, delta.enableDeletionVectors -> true)",3,7,"List(appendOnly, deletionVectors, invariants)","Map(numRowsDeletedByDeletionVectors -> 0, numDeletionVectors -> 0)",False


In [0]:
%sql
SELECT * FROM main.default.orders LIMIT 20;

order_id,order_timestamp,customer_id,country,amount,currency,status,order_date
ORD00039,2024-02-29T01:56:00.000Z,CUST3128,US,949.17,USD,CREATED,2024-02-29
ORD00764,2024-02-29T16:19:00.000Z,CUST2135,US,459.2,USD,CANCELLED,2024-02-29
ORD00914,2024-02-29T18:52:00.000Z,CUST6642,US,987.67,USD,CREATED,2024-02-29
ORD00076,2024-12-21T03:32:00.000Z,CUST6887,IN,950.11,INR,PAID,2024-12-21
ORD00119,2024-12-21T01:58:00.000Z,CUST9606,IN,673.08,INR,PAID,2024-12-21
ORD00516,2024-04-07T14:19:00.000Z,CUST5702,CA,409.77,CAD,CANCELLED,2024-04-07
ORD00547,2024-04-07T01:49:00.000Z,CUST1973,CA,345.85,CAD,CREATED,2024-04-07
ORD00254,2024-09-11T10:59:00.000Z,CUST9093,US,508.63,USD,CREATED,2024-09-11
ORD00405,2024-09-11T02:54:00.000Z,CUST2078,US,315.82,USD,PAID,2024-09-11
ORD00029,2024-12-24T01:11:00.000Z,CUST5595,AU,229.46,AUD,CREATED,2024-12-24


### Run queries that demonstrate partition pruning

In [0]:
# Explain partitioning pruning

spark.sql("""
SELECT * FROM main.default.orders
WHERE country = 'UK'
""").explain(True)

== Parsed Logical Plan ==
'Project [*]
+- 'Filter ('country = UK)
   +- 'UnresolvedRelation [main, default, orders], [], false

== Analyzed Logical Plan ==
order_id: string, order_timestamp: timestamp, customer_id: string, country: string, amount: double, currency: string, status: string, order_date: date
Project [order_id#14005, order_timestamp#14006, customer_id#14007, country#14008, amount#14009, currency#14010, status#14011, order_date#14012]
+- Filter (country#14008 = UK)
   +- SubqueryAlias main.default.orders
      +- Relation main.default.orders[order_id#14005,order_timestamp#14006,customer_id#14007,country#14008,amount#14009,currency#14010,status#14011,order_date#14012] parquet

== Optimized Logical Plan ==
Filter (isnotnull(country#14008) AND (country#14008 = UK))
+- Relation main.default.orders[order_id#14005,order_timestamp#14006,customer_id#14007,country#14008,amount#14009,currency#14010,status#14011,order_date#14012] parquet

== Physical Plan ==
*(1) ColumnarToRow
+- Phot

In [0]:
spark.sql("""
SELECT * FROM main.default.orders
WHERE country = 'AU'
  AND order_date = DATE('2024-08-23')
""").explain(True)


== Parsed Logical Plan ==
'Project [*]
+- 'Filter (('country = AU) AND ('order_date = 'DATE(2024-08-23)))
   +- 'UnresolvedRelation [main, default, orders], [], false

== Analyzed Logical Plan ==
order_id: string, order_timestamp: timestamp, customer_id: string, country: string, amount: double, currency: string, status: string, order_date: date
Project [order_id#14054, order_timestamp#14055, customer_id#14056, country#14057, amount#14058, currency#14059, status#14060, order_date#14061]
+- Filter ((country#14057 = AU) AND (order_date#14061 = cast(2024-08-23 as date)))
   +- SubqueryAlias main.default.orders
      +- Relation main.default.orders[order_id#14054,order_timestamp#14055,customer_id#14056,country#14057,amount#14058,currency#14059,status#14060,order_date#14061] parquet

== Optimized Logical Plan ==
Filter (((isnotnull(order_date#14061) AND (order_date#14061 = 2024-08-23)) AND isnotnull(country#14057)) AND (country#14057 = AU))
+- Relation main.default.orders[order_id#14054,orde

### Demonstrate Delta Lake Time Travel

In [0]:
%sql
DESCRIBE HISTORY main.default.orders;

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
2,2025-12-01T15:45:18.000Z,77045635878552,sakethkarumudi@gmail.com,CREATE OR REPLACE TABLE AS SELECT,"Map(partitionBy -> [""country"",""order_date""], clusterBy -> [], description -> null, isManaged -> true, properties -> {""delta.parquet.compression.codec"":""zstd"",""delta.enableDeletionVectors"":""true""}, statsOnLoad -> true)",,List(75496836158763),1201-152550-mh1pe68-v2n,1.0,WriteSerializable,False,"Map(numFiles -> 777, numRemovedFiles -> 5, numRemovedBytes -> 17971, numDeletionVectorsRemoved -> 0, numOutputRows -> 1000, numOutputBytes -> 1388835)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
1,2025-12-01T15:42:06.000Z,77045635878552,sakethkarumudi@gmail.com,CREATE OR REPLACE TABLE AS SELECT,"Map(partitionBy -> [""country"",""order_date""], clusterBy -> [], description -> null, isManaged -> true, properties -> {""delta.parquet.compression.codec"":""zstd"",""delta.enableDeletionVectors"":""true""}, statsOnLoad -> true)",,List(75496836158763),1201-152550-mh1pe68-v2n,0.0,WriteSerializable,False,"Map(numFiles -> 5, numRemovedFiles -> 5, numRemovedBytes -> 17971, numDeletionVectorsRemoved -> 0, numOutputRows -> 1000, numOutputBytes -> 17971)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
0,2025-12-01T15:37:04.000Z,77045635878552,sakethkarumudi@gmail.com,CREATE OR REPLACE TABLE AS SELECT,"Map(partitionBy -> [""country"",""order_date""], clusterBy -> [], description -> null, isManaged -> true, properties -> {""delta.parquet.compression.codec"":""zstd"",""delta.enableDeletionVectors"":""true""}, statsOnLoad -> true)",,List(75496836158763),1201-152550-mh1pe68-v2n,,WriteSerializable,False,"Map(numFiles -> 5, numRemovedFiles -> 0, numRemovedBytes -> 0, numDeletionVectorsRemoved -> 0, numOutputRows -> 1000, numOutputBytes -> 17971)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13


In [0]:
# Update the data

spark.sql("""
UPDATE main.default.orders
SET status = 'PROCESSING'
WHERE status = 'CREATED' AND country = 'AU'
""")

DataFrame[num_affected_rows: bigint]

In [0]:
%sql

-- Check the history
DESCRIBE HISTORY main.default.orders;

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
11,2025-12-01T16:06:14.000Z,77045635878552,sakethkarumudi@gmail.com,OPTIMIZE,"Map(predicate -> [], auto -> false, clusterBy -> [], zOrderBy -> [""customer_id""], batchId -> 0)",,List(75496836158763),1201-152550-mh1pe68-v2n,9.0,SnapshotIsolation,False,"Map(numRemovedFiles -> 835, numRemovedBytes -> 1668702, p25FileSize -> 2357, numDeletionVectorsRemoved -> 86, conflictDetectionTimeMs -> 106, minFileSize -> 2355, numAddedFiles -> 400, maxFileSize -> 2519, p75FileSize -> 2382, p50FileSize -> 2381, numAddedBytes -> 957078)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
10,2025-12-01T16:05:35.000Z,77045635878552,sakethkarumudi@gmail.com,OPTIMIZE,"Map(predicate -> [], auto -> false, clusterBy -> [], zOrderBy -> [""customer_id""], batchId -> 1)",,List(75496836158763),1201-152550-mh1pe68-v2n,9.0,SnapshotIsolation,False,"Map(numRemovedFiles -> 507, numRemovedBytes -> 1013379, p25FileSize -> 2357, numDeletionVectorsRemoved -> 46, minFileSize -> 2356, numAddedFiles -> 249, maxFileSize -> 2558, p75FileSize -> 2382, p50FileSize -> 2371, numAddedBytes -> 595790)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
9,2025-12-01T16:02:37.000Z,77045635878552,sakethkarumudi@gmail.com,OPTIMIZE,"Map(predicate -> [""(((((((((((((((((((((((((((((((('country <=> cast(AU as string)) AND ('order_date <=> cast(2024-12-10 as date))) OR (('country <=> cast(CA as string)) AND ('order_date <=> cast(2024-02-21 as date)))) OR (('country <=> cast(IN as string)) AND ('order_date <=> cast(2024-01-03 as date)))) OR (('country <=> cast(US as string)) AND ('order_date <=> cast(2024-08-26 as date)))) OR (('country <=> cast(IN as string)) AND ('order_date <=> cast(2024-06-28 as date)))) OR (('country <=> cast(US as string)) AND ('order_date <=> cast(2024-07-15 as date)))) OR (('country <=> cast(US as string)) AND ('order_date <=> cast(2024-03-08 as date)))) OR (('country <=> cast(AU as string)) AND ('order_date <=> cast(2024-04-17 as date)))) OR (('country <=> cast(CA as string)) AND ('order_date <=> cast(2024-03-20 as date)))) OR (('country <=> cast(IN as string)) AND ('order_date <=> cast(2024-08-24 as date)))) OR (('country <=> cast(IN as string)) AND ('order_date <=> cast(2024-04-03 as date)))) OR (('country <=> cast(US as string)) AND ('order_date <=> cast(2024-11-02 as date)))) OR (('country <=> cast(CA as string)) AND ('order_date <=> cast(2024-11-25 as date)))) OR (('country <=> cast(CA as string)) AND ('order_date <=> cast(2024-02-18 as date)))) OR (('country <=> cast(UK as string)) AND ('order_date <=> cast(2024-08-01 as date)))) OR (('country <=> cast(IN as string)) AND ('order_date <=> cast(2024-08-17 as date)))) OR (('country <=> cast(UK as string)) AND ('order_date <=> cast(2024-11-25 as date)))) OR (('country <=> cast(UK as string)) AND ('order_date <=> cast(2024-09-09 as date)))) OR (('country <=> cast(CA as string)) AND ('order_date <=> cast(2024-01-16 as date)))) OR (('country <=> cast(UK as string)) AND ('order_date <=> cast(2024-08-03 as date)))) OR (('country <=> cast(CA as string)) AND ('order_date <=> cast(2024-05-08 as date)))) OR (('country <=> cast(UK as string)) AND ('order_date <=> cast(2024-07-17 as date)))) OR (('country <=> cast(US as string)) AND ('order_date <=> cast(2024-07-21 as date)))) OR (('country <=> cast(UK as string)) AND ('order_date <=> cast(2024-09-24 as date)))) OR (('country <=> cast(US as string)) AND ('order_date <=> cast(2024-02-05 as date)))) OR (('country <=> cast(AU as string)) AND ('order_date <=> cast(2024-12-30 as date)))) OR (('country <=> cast(AU as string)) AND ('order_date <=> cast(2024-01-31 as date)))) OR (('country <=> cast(UK as string)) AND ('order_date <=> cast(2024-10-09 as date)))) OR (('country <=> cast(UK as string)) AND ('order_date <=> cast(2024-04-01 as date)))) OR (('country <=> cast(CA as string)) AND ('order_date <=> cast(2024-05-22 as date)))) OR (('country <=> cast(IN as string)) AND ('order_date <=> cast(2024-02-13 as date))))""], auto -> true, clusterBy -> [], zOrderBy -> [], batchId -> 0)",,List(75496836158763),1201-152550-mh1pe68-v2n,8.0,SnapshotIsolation,False,"Map(numRemovedFiles -> 1, numRemovedBytes -> 2488, p25FileSize -> 2382, numDeletionVectorsRemoved -> 1, minFileSize -> 2382, numAddedFiles -> 1, maxFileSize -> 2382, p75FileSize -> 2382, p50FileSize -> 2382, numAddedBytes -> 2382)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
8,2025-12-01T16:02:33.000Z,77045635878552,sakethkarumudi@gmail.com,DELETE,"Map(predicate -> [""(amount#17554 < 100.0)""])",,List(75496836158763),1201-152550-mh1pe68-v2n,7.0,WriteSerializable,False,"Map(numRemovedFiles -> 128, numRemovedBytes -> 251782, numCopiedRows -> 0, numDeletionVectorsAdded -> 61, numDeletionVectorsRemoved -> 5, numAddedChangeFiles -> 0, executionTimeMs -> 1679, numDeletionVectorsUpdated -> 1, numDeletedRows -> 196, scanTimeMs -> 1086, numAddedFiles -> 0, numAddedBytes -> 0, rewriteTimeMs -> 584)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
7,2025-12-01T16:02:05.000Z,77045635878552,sakethkarumudi@gmail.com,OPTIMIZE,"Map(predicate -> [""((((((((((((((((((((((((((((((((((('country <=> cast(IN as string)) AND ('order_date <=> cast(2024-03-18 as date))) OR (('country <=> cast(AU as string)) AND ('order_date <=> cast(2024-02-24 as date)))) OR (('country <=> cast(US as string)) AND ('order_date <=> cast(2024-02-29 as date)))) OR (('country <=> cast(US as string)) AND ('order_date <=> cast(2024-09-30 as date)))) OR (('country <=> cast(AU as string)) AND ('order_date <=> cast(2024-06-13 as date)))) OR (('country <=> cast(IN as string)) AND ('order_date <=> cast(2024-01-09 as date)))) OR (('country <=> cast(UK as string)) AND ('order_date <=> cast(2024-03-17 as date)))) OR (('country <=> cast(UK as string)) AND ('order_date <=> cast(2024-06-15 as date)))) OR (('country <=> cast(UK as string)) AND ('order_date <=> cast(2024-03-28 as date)))) OR (('country <=> cast(IN as string)) AND ('order_date <=> cast(2024-07-04 as date)))) OR (('country <=> cast(CA as string)) AND ('order_date <=> cast(2024-04-17 as date)))) OR (('country <=> cast(US as string)) AND ('order_date <=> cast(2024-03-29 as date)))) OR (('country <=> cast(AU as string)) AND ('order_date <=> cast(2024-12-16 as date)))) OR (('country <=> cast(IN as string)) AND ('order_date <=> cast(2024-07-11 as date)))) OR (('country <=> cast(IN as string)) AND ('order_date <=> cast(2024-09-07 as date)))) OR (('country <=> cast(IN as string)) AND ('order_date <=> cast(2024-01-28 as date)))) OR (('country <=> cast(CA as string)) AND ('order_date <=> cast(2024-09-03 as date)))) OR (('country <=> cast(AU as string)) AND ('order_date <=> cast(2024-09-24 as date)))) OR (('country <=> cast(IN as string)) AND ('order_date <=> cast(2024-09-20 as date)))) OR (('country <=> cast(IN as string)) AND ('order_date <=> cast(2024-11-04 as date)))) OR (('country <=> cast(CA as string)) AND ('order_date <=> cast(2024-01-26 as date)))) OR (('country <=> cast(IN as string)) AND ('order_date <=> cast(2024-03-21 as date)))) OR (('country <=> cast(IN as string)) AND ('order_date <=> cast(2024-10-31 as date)))) OR (('country <=> cast(UK as string)) AND ('order_date <=> cast(2024-06-17 as date)))) OR (('country <=> cast(IN as string)) AND ('order_date <=> cast(2024-06-06 as date)))) OR (('country <=> cast(CA as string)) AND ('order_date <=> cast(2024-03-13 as date)))) OR (('country <=> cast(UK as string)) AND ('order_date <=> cast(2024-09-21 as date)))) OR (('country <=> cast(AU as string)) AND ('order_date <=> cast(2024-11-13 as date)))) OR (('country <=> cast(UK as string)) AND ('order_date <=> cast(2024-12-29 as date)))) OR (('country <=> cast(IN as string)) AND ('order_date <=> cast(2024-12-21 as date)))) OR (('country <=> cast(IN as string)) AND ('order_date <=> cast(2024-09-30 as date)))) OR (('country <=> cast(AU as string)) AND ('order_date <=> cast(2024-01-31 as date)))) OR (('country <=> cast(IN as string)) AND ('order_date <=> cast(2024-04-08 as date)))) OR (('country <=> cast(CA as string)) AND ('order_date <=> cast(2024-06-18 as date))))""], auto -> true, clusterBy -> [], zOrderBy -> [], batchId -> 0)",,List(75496836158763),1201-152550-mh1pe68-v2n,6.0,SnapshotIsolation,False,"Map(numRemovedFiles -> 3, numRemovedBytes -> 6539, p25FileSize -> 2488, numDeletionVectorsRemoved -> 2, minFileSize -> 2488, numAddedFiles -> 1, maxFileSize -> 2488, p75FileSize -> 2488, p50FileSize -> 2488, numAddedBytes -> 2488)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
6,2025-12-01T16:02:01.000Z,77045635878552,sakethkarumudi@gmail.com,UPDATE,"Map(predicate -> [""(amount#16596 > 900.0)""])",,List(75496836158763),1201-152550-mh1pe68-v2n,5.0,WriteSerializable,False,"Map(numRemovedFiles -> 129, numRemovedBytes -> 252217, numCopiedRows -> 0, numDeletionVectorsAdded -> 65, numDeletionVectorsRemoved -> 2, numAddedChangeFiles -> 0, executionTimeMs -> 5600, numDeletionVectorsUpdated -> 0, scanTimeMs -> 1152, numAddedFiles -> 97, numUpdatedRows -> 202, numAddedBytes -> 231335, rewriteTimeMs -> 4412)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
5,2025-12-01T15:57:47.000Z,77045635878552,sakethkarumudi@gmail.com,WRITE,"Map(mode -> Append, statsOnLoad -> false, partitionBy -> [])",,List(75496836158763),1201-152550-mh1pe68-v2n,4.0,WriteSerializable,True,"Map(numFiles -> 777, numOutputRows -> 1000, numOutputBytes -> 1697840)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
4,2025-12-01T15:53:48.000Z,77045635878552,sakethkarumudi@gmail.com,OPTIMIZE,"Map(predicate -> [""(((((((((((((((((('country <=> cast(AU as string)) AND ('order_date <=> cast(2024-12-16 as date))) OR (('country <=> cast(AU as string)) AND ('order_date <=> cast(2024-12-10 as date)))) OR (('country <=> cast(AU as string)) AND ('order_date <=> cast(2024-09-24 as date)))) OR (('country <=> cast(AU as string)) AND ('order_date <=> cast(2024-12-24 as date)))) OR (('country <=> cast(AU as string)) AND ('order_date <=> cast(2024-09-07 as date)))) OR (('country <=> cast(AU as string)) AND ('order_date <=> cast(2024-02-27 as date)))) OR (('country <=> cast(AU as string)) AND ('order_date <=> cast(2024-03-13 as date)))) OR (('country <=> cast(AU as string)) AND ('order_date <=> cast(2024-06-27 as date)))) OR (('country <=> cast(AU as string)) AND ('order_date <=> cast(2024-05-10 as date)))) OR (('country <=> cast(AU as string)) AND ('order_date <=> cast(2024-06-10 as date)))) OR (('country <=> cast(AU as string)) AND ('order_date <=> cast(2024-02-07 as date)))) OR (('country <=> cast(AU as string)) AND ('order_date <=> cast(2024-06-13 as date)))) OR (('country <=> cast(AU as string)) AND ('order_date <=> cast(2024-06-01 as date)))) OR (('country <=> cast(AU as string)) AND ('order_date <=> cast(2024-04-17 as date)))) OR (('country <=> cast(AU as string)) AND ('order_date <=> cast(2024-11-04 as date)))) OR (('country <=> cast(AU as string)) AND ('order_date <=> cast(2024-09-22 as date)))) OR (('country <=> cast(AU as string)) AND ('order_date <=> cast(2024-01-31 as date))))""], auto -> true, clusterBy -> [], zOrderBy -> [], batchId -> 0)",,List(75496836158763),1201-152550-mh1pe68-v2n,3.0,SnapshotIsolation,False,"Map(numRemovedFiles -> 2, numRemovedBytes -> 3641, p25FileSize -> 1870, numDeletionVectorsRemoved -> 1, minFileSize -> 1870, numAddedFiles -> 1, maxFileSize -> 1870, p75FileSize -> 1870, p50FileSize -> 1870, numAddedBytes -> 1870)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
3,2025-12-01T15:53:43.000Z,77045635878552,sakethkarumudi@gmail.com,UPDATE,"Map(predicate -> [""((status#14270 = CREATED) AND (country#14267 = AU))""])",,List(75496836158763),1201-152550-mh1pe68-v2n,2.0,WriteSerializable,False,"Map(numRemovedFiles -> 47, numRemovedBytes -> 83381, numCopiedRows -> 0, numDeletionVectorsAdded -> 17, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 0, executionTimeMs -> 7049, numDeletionVectorsUpdated -> 0, scanTimeMs -> 1820, numAddedFiles -> 64, numUpdatedRows -> 72, numAddedBytes -> 114645, rewriteTimeMs -> 5205)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
2,2025-12-01T15:45:18.000Z,77045635878552,sakethkarumudi@gmail.com,CREATE OR REPLACE TABLE AS SELECT,"Map(partitionBy -> [""country"",""order_date""], clusterBy -> [], description -> null, isManaged -> true, properties -> {""delta.parquet.compression.codec"":""zstd"",""delta.enableDeletionVectors"":""true""}, statsOnLoad -> true)",,List(75496836158763),1201-152550-mh1pe68-v2n,1.0,WriteSerializable,False,"Map(numFiles -> 777, numRemovedFiles -> 5, numRemovedBytes -> 17971, numDeletionVectorsRemoved -> 0, numOutputRows -> 1000, numOutputBytes -> 1388835)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13


In [0]:
%sql

-- Delta Table Time Travel
SELECT * FROM main.default.orders VERSION AS OF 2 LIMIT 20;

order_id,order_timestamp,customer_id,country,amount,currency,status,order_date
ORD00039,2024-02-29T01:56:00.000Z,CUST3128,US,949.17,USD,CREATED,2024-02-29
ORD00764,2024-02-29T16:19:00.000Z,CUST2135,US,459.2,USD,CANCELLED,2024-02-29
ORD00914,2024-02-29T18:52:00.000Z,CUST6642,US,987.67,USD,CREATED,2024-02-29
ORD00076,2024-12-21T03:32:00.000Z,CUST6887,IN,950.11,INR,PAID,2024-12-21
ORD00119,2024-12-21T01:58:00.000Z,CUST9606,IN,673.08,INR,PAID,2024-12-21
ORD00516,2024-04-07T14:19:00.000Z,CUST5702,CA,409.77,CAD,CANCELLED,2024-04-07
ORD00547,2024-04-07T01:49:00.000Z,CUST1973,CA,345.85,CAD,CREATED,2024-04-07
ORD00254,2024-09-11T10:59:00.000Z,CUST9093,US,508.63,USD,CREATED,2024-09-11
ORD00405,2024-09-11T02:54:00.000Z,CUST2078,US,315.82,USD,PAID,2024-09-11
ORD00029,2024-12-24T01:11:00.000Z,CUST5595,AU,229.46,AUD,CREATED,2024-12-24


### Demonstrate Schema Evolution

In [0]:
df = data.withColumn("payment_method", lit("CARD")) \
           .withColumn("coupon_code", lit(None).cast("string"))


In [0]:
# Merge schema

df.write.format("delta") \
    .mode("append") \
    .option("mergeSchema", "true") \
    .saveAsTable("main.default.orders")

In [0]:
%sql
DESCRIBE EXTENDED main.default.orders;

col_name,data_type,comment
order_id,string,
order_timestamp,timestamp,
customer_id,string,
country,string,
amount,double,
currency,string,
status,string,
order_date,date,
payment_method,string,
coupon_code,string,


In [0]:
%sql
SELECT * FROM main.default.orders;

### Demonstrate Updates &amp; Deletes using Delta

In [0]:
%sql
UPDATE main.default.orders
SET status = 'CANCELLED'
WHERE amount > 900;

num_affected_rows
202


In [0]:
%sql
DELETE FROM main.default.orders
WHERE amount < 100;

num_affected_rows
196


### Optimize the table

In [0]:
%sql

-- Optimize the table
OPTIMIZE main.default.orders
ZORDER BY (customer_id);

path,metrics
,"List(649, 1342, List(2355, 2558, 2392.7087827426813, 649, 1552868), List(1745, 2463, 1998.5700447093889, 1342, 2682081), 715, List(minCubeSize(107374182400), List(0, 0), List(1408, 2839347), 0, List(1342, 2682081), 649, null), null, 0, 2, 1408, 66, false, 0, 0, 1764605058742, 1764605174283, 8, 649, null, List(132, 143), null, 10, 10, 101859, 0, null)"


In [0]:
%sql
DESCRIBE DETAIL main.default.orders;

format,id,name,description,location,createdAt,lastModified,partitionColumns,clusteringColumns,numFiles,sizeInBytes,properties,minReaderVersion,minWriterVersion,tableFeatures,statistics,clusterByAuto
delta,20bd279b-379a-493f-b7e7-c9f261d12565,main.default.orders,,,2025-12-01T15:37:00.000Z,2025-12-01T16:06:14.000Z,"List(country, order_date)",List(),715,1710134,"Map(delta.parquet.compression.codec -> zstd, delta.enableDeletionVectors -> true)",3,7,"List(appendOnly, deletionVectors, invariants)","Map(numRowsDeletedByDeletionVectors -> 0, numDeletionVectors -> 0)",False


### Show how small file problems can occur with too many partitions and how OPTIMIZE helps.

In [0]:
%sql

-- Optimizing the tables
OPTIMIZE main.default.orders;

path,metrics
,"List(0, 0, List(null, null, 0.0, 0, 0), List(null, null, 0.0, 0, 0), 715, null, null, 0, 0, 715, 715, true, 0, 0, 1764606508923, 1764606510727, 8, 0, null, List(0, 0), null, 10, 10, 0, 0, null)"


In [0]:
%sql

-- On optimization, number of files decreased
-- Improved Query Performance
DESCRIBE DETAIL main.default.orders;

format,id,name,description,location,createdAt,lastModified,partitionColumns,clusteringColumns,numFiles,sizeInBytes,properties,minReaderVersion,minWriterVersion,tableFeatures,statistics,clusterByAuto
delta,20bd279b-379a-493f-b7e7-c9f261d12565,main.default.orders,,,2025-12-01T15:37:00.000Z,2025-12-01T16:06:14.000Z,"List(country, order_date)",List(),715,1710134,"Map(delta.parquet.compression.codec -> zstd, delta.enableDeletionVectors -> true)",3,7,"List(appendOnly, deletionVectors, invariants)","Map(numRowsDeletedByDeletionVectors -> 0, numDeletionVectors -> 0)",False
