# 🧠 Databricks Delta Tables 101 (Managed Tables Only)
End-to-end learning notebook for Delta Tables: ACID operations, Time Travel, CDC, and Streaming with checkpoints.

In [0]:
%sql
CREATE CATALOG IF NOT EXISTS demo;
USE CATALOG demo;
CREATE SCHEMA IF NOT EXISTS delta101;
USE demo.delta101;

In [0]:
%sql
CREATE OR REPLACE TABLE sales_delta (
  id BIGINT,
  ts TIMESTAMP,
  product STRING,
  qty INT,
  price DECIMAL(10,2)
) USING DELTA;

INSERT INTO sales_delta VALUES
  (1, current_timestamp(), 'Pencil', 5, 1.20),
  (2, current_timestamp(), 'Pen', 2, 2.50),
  (3, current_timestamp(), 'Notebook', 1, 4.90);

SELECT * FROM sales_delta ORDER BY id;

id,ts,product,qty,price
1,2025-10-27T04:20:10.817Z,Pencil,5,1.2
2,2025-10-27T04:20:10.817Z,Pen,2,2.5
3,2025-10-27T04:20:10.817Z,Notebook,1,4.9


In [0]:
%sql
UPDATE sales_delta SET qty = 10 WHERE id = 1;
DELETE FROM sales_delta WHERE product = 'Pen';
MERGE INTO sales_delta t
USING (SELECT 2 AS id, current_timestamp() AS ts, 'Pen' AS product, 3 AS qty, 2.50 AS price) s
ON t.id = s.id
WHEN MATCHED THEN UPDATE SET *
WHEN NOT MATCHED THEN INSERT *;

SELECT * FROM sales_delta ORDER BY id;

id,ts,product,qty,price
1,2025-10-27T04:20:10.817Z,Pencil,10,1.2
2,2025-10-27T04:22:02.955Z,Pen,3,2.5
3,2025-10-27T04:20:10.817Z,Notebook,1,4.9


In [0]:
%sql
DESCRIBE HISTORY sales_delta;
SELECT * FROM sales_delta VERSION AS OF 0;

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
9,2025-10-27T04:09:46.000Z,73874938602462,rajendravechalapu@gmail.com,RESTORE,"Map(version -> 0, timestamp -> null)",,List(546745205863369),1027-035855-z3gfqxmp-v2n,8.0,Serializable,False,"Map(numRestoredFiles -> 0, removedFilesSize -> 3636, numRemovedFiles -> 2, restoredFilesSize -> 0, numDeletionVectorsAdded -> 0, numDeletionVectorsRemoved -> 0, numOfFilesAfterRestore -> 0, tableSizeAfterRestore -> 0)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
8,2025-10-27T04:07:39.000Z,73874938602462,rajendravechalapu@gmail.com,MERGE,"Map(predicate -> [""(id#12861L = cast(id#12851 as bigint))""], clusterBy -> [], matchedPredicates -> [{""actionType"":""update""}], statsOnLoad -> true, notMatchedBySourcePredicates -> [], notMatchedPredicates -> [{""actionType"":""insert""}])",,List(546745205863369),1027-035855-z3gfqxmp-v2n,7.0,WriteSerializable,False,"Map(numTargetRowsCopied -> 0, numTargetRowsDeleted -> 0, numTargetFilesAdded -> 1, numTargetBytesAdded -> 1380, numTargetBytesRemoved -> 0, numTargetDeletionVectorsAdded -> 0, numTargetRowsMatchedUpdated -> 0, executionTimeMs -> 2237, materializeSourceTimeMs -> 3, numTargetRowsInserted -> 1, numTargetRowsMatchedDeleted -> 0, numTargetDeletionVectorsUpdated -> 0, scanTimeMs -> 945, numTargetRowsUpdated -> 0, numOutputRows -> 1, numTargetDeletionVectorsRemoved -> 0, numTargetRowsNotMatchedBySourceUpdated -> 0, numTargetChangeFilesAdded -> 0, numSourceRows -> 1, numTargetFilesRemoved -> 0, numTargetRowsNotMatchedBySourceDeleted -> 0, rewriteTimeMs -> 1182)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
7,2025-10-27T04:07:02.000Z,73874938602462,rajendravechalapu@gmail.com,OPTIMIZE,"Map(predicate -> [], auto -> true, clusterBy -> [], zOrderBy -> [], batchId -> 0)",,List(546745205863369),1027-035855-z3gfqxmp-v2n,6.0,SnapshotIsolation,False,"Map(numRemovedFiles -> 1, numRemovedBytes -> 2301, p25FileSize -> 2256, numDeletionVectorsRemoved -> 1, minFileSize -> 2256, numAddedFiles -> 1, maxFileSize -> 2256, p75FileSize -> 2256, p50FileSize -> 2256, numAddedBytes -> 2256)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
6,2025-10-27T04:06:59.000Z,73874938602462,rajendravechalapu@gmail.com,DELETE,"Map(predicate -> [""(product#12474 = Pen)""])",,List(546745205863369),1027-035855-z3gfqxmp-v2n,5.0,WriteSerializable,False,"Map(numRemovedFiles -> 0, numRemovedBytes -> 0, numCopiedRows -> 0, numDeletionVectorsAdded -> 1, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 0, executionTimeMs -> 1625, numDeletionVectorsUpdated -> 0, numDeletedRows -> 1, scanTimeMs -> 1104, numAddedFiles -> 0, numAddedBytes -> 0, rewriteTimeMs -> 520)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
5,2025-10-27T04:06:21.000Z,73874938602462,rajendravechalapu@gmail.com,OPTIMIZE,"Map(predicate -> [], auto -> true, clusterBy -> [], zOrderBy -> [], batchId -> 0)",,List(546745205863369),1027-035855-z3gfqxmp-v2n,4.0,SnapshotIsolation,False,"Map(numRemovedFiles -> 2, numRemovedBytes -> 4449, p25FileSize -> 2301, numDeletionVectorsRemoved -> 1, minFileSize -> 2301, numAddedFiles -> 1, maxFileSize -> 2301, p75FileSize -> 2301, p50FileSize -> 2301, numAddedBytes -> 2301)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
4,2025-10-27T04:06:19.000Z,73874938602462,rajendravechalapu@gmail.com,UPDATE,"Map(predicate -> [""(id#11870L = 1)""])",,List(546745205863369),1027-035855-z3gfqxmp-v2n,3.0,WriteSerializable,False,"Map(numRemovedFiles -> 0, numRemovedBytes -> 0, numCopiedRows -> 0, numDeletionVectorsAdded -> 1, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 0, executionTimeMs -> 2457, numDeletionVectorsUpdated -> 0, scanTimeMs -> 962, numAddedFiles -> 1, numUpdatedRows -> 1, numAddedBytes -> 2132, rewriteTimeMs -> 1494)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
3,2025-10-27T04:06:01.000Z,73874938602462,rajendravechalapu@gmail.com,OPTIMIZE,"Map(predicate -> [], auto -> true, clusterBy -> [], zOrderBy -> [], batchId -> 0)",,List(546745205863369),1027-035855-z3gfqxmp-v2n,2.0,SnapshotIsolation,False,"Map(numRemovedFiles -> 2, numRemovedBytes -> 3618, p25FileSize -> 2317, numDeletionVectorsRemoved -> 1, minFileSize -> 2317, numAddedFiles -> 1, maxFileSize -> 2317, p75FileSize -> 2317, p50FileSize -> 2317, numAddedBytes -> 2317)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
2,2025-10-27T04:05:58.000Z,73874938602462,rajendravechalapu@gmail.com,UPDATE,"Map(predicate -> [""(id#11254L = 1)""])",,List(546745205863369),1027-035855-z3gfqxmp-v2n,1.0,WriteSerializable,False,"Map(numRemovedFiles -> 0, numRemovedBytes -> 0, numCopiedRows -> 0, numDeletionVectorsAdded -> 1, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 0, executionTimeMs -> 4130, numDeletionVectorsUpdated -> 0, scanTimeMs -> 2117, numAddedFiles -> 1, numUpdatedRows -> 1, numAddedBytes -> 2132, rewriteTimeMs -> 1983)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
1,2025-10-27T03:59:21.000Z,73874938602462,rajendravechalapu@gmail.com,WRITE,"Map(mode -> Append, statsOnLoad -> true, partitionBy -> [])",,List(546745205863369),1027-035855-z3gfqxmp-v2n,0.0,WriteSerializable,True,"Map(numFiles -> 1, numOutputRows -> 3, numOutputBytes -> 1486)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
0,2025-10-27T03:59:14.000Z,73874938602462,rajendravechalapu@gmail.com,CREATE OR REPLACE TABLE,"Map(partitionBy -> [], clusterBy -> [], description -> null, isManaged -> true, properties -> {""delta.enableDeletionVectors"":""true"",""delta.enableRowTracking"":""true"",""delta.rowTracking.materializedRowIdColumnName"":""_row-id-col-4875d927-75c6-4590-9820-d86b27e25dbd"",""delta.rowTracking.materializedRowCommitVersionColumnName"":""_row-commit-version-col-2d94d1fa-0a8c-4960-9037-c49ea55c2b12""}, statsOnLoad -> false)",,List(546745205863369),1027-035855-z3gfqxmp-v2n,,WriteSerializable,True,Map(),,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13


In [0]:
%sql
-- Add a new column 'channel' of type STRING to the sales_delta table
ALTER TABLE sales_delta ADD COLUMN channel STRING;



[0;31m---------------------------------------------------------------------------[0m
[0;31mAnalysisException[0m                         Traceback (most recent call last)
File [0;32m<command-5448948360445371>, line 1[0m
[0;32m----> 1[0m get_ipython()[38;5;241m.[39mrun_cell_magic([38;5;124m'[39m[38;5;124msql[39m[38;5;124m'[39m, [38;5;124m'[39m[38;5;124m'[39m, [38;5;124m"[39m[38;5;124m-- Add a new column [39m[38;5;124m'[39m[38;5;124mchannel[39m[38;5;124m'[39m[38;5;124m of type STRING to the sales_delta table[39m[38;5;130;01m\n[39;00m[38;5;124mALTER TABLE sales_delta ADD COLUMN channel STRING;[39m[38;5;130;01m\n[39;00m[38;5;130;01m\n[39;00m[38;5;124m-- Add a table constraint to ensure [39m[38;5;124m'[39m[38;5;124mqty[39m[38;5;124m'[39m[38;5;124m is always positive[39m[38;5;130;01m\n[39;00m[38;5;124mALTER TABLE sales_delta ADD CONSTRAINT qty_positive CHECK (qty > 0);[39m[38;5;130;01m\n[39;00m[38;5;130;01m\n[39;00m[38;5;124m-- Show d

In [0]:
%sql
-- Add a table constraint to ensure 'qty' is always positive
ALTER TABLE sales_delta ADD CONSTRAINT qty_positive CHECK (qty > 0);

-- Show detailed metadata information about the sales_delta table
DESCRIBE DETAIL sales_delta;

format,id,name,description,location,createdAt,lastModified,partitionColumns,clusteringColumns,numFiles,sizeInBytes,properties,minReaderVersion,minWriterVersion,tableFeatures,statistics,clusterByAuto
delta,43313070-7996-4e48-84c7-4f21b250dfda,workspace.default.sales_delta,,,2025-10-27T03:59:12.857Z,2025-10-27T04:25:35.000Z,List(),List(),0,0,"Map(delta.enableDeletionVectors -> true, delta.enableRowTracking -> true, delta.constraints.qty_positive -> qty > 0, delta.rowTracking.materializedRowCommitVersionColumnName -> _row-commit-version-col-2d94d1fa-0a8c-4960-9037-c49ea55c2b12, delta.rowTracking.materializedRowIdColumnName -> _row-id-col-4875d927-75c6-4590-9820-d86b27e25dbd)",3,7,"List(appendOnly, checkConstraints, deletionVectors, domainMetadata, invariants, rowTracking)","Map(numRowsDeletedByDeletionVectors -> 0, numDeletionVectors -> 0)",False


In [0]:
# Import functions from pyspark.sql as F
from pyspark.sql import functions as F

# Read the 'sales_delta' table and add a new column 'channel' with value 'web'
df = spark.table('demo.delta101.sales_delta').withColumn('channel', F.lit('web'))

# Write the DataFrame back to the same Delta table, allowing schema evolution (mergeSchema)
(df.write
  .option('mergeSchema','true')  # Allow new columns to be added to the table schema
  .format('delta')               # Use Delta Lake format
  .mode('append')                # Append new data to the table
  .saveAsTable('demo.delta101.sales_delta'))  # Save to the specified table

In [0]:
display(spark.table('demo.delta101.sales_delta'))

id,ts,product,qty,price,channel
3,2025-10-27T04:20:10.817Z,Notebook,1,4.9,
1,2025-10-27T04:20:10.817Z,Pencil,10,1.2,
3,2025-10-27T04:20:10.817Z,Notebook,1,4.9,web
1,2025-10-27T04:20:10.817Z,Pencil,10,1.2,web
2,2025-10-27T04:22:02.955Z,Pen,3,2.5,web
2,2025-10-27T04:22:02.955Z,Pen,3,2.5,


In [0]:
%sql
-- OPTIMIZE compacts small files in the Delta table to improve read performance
OPTIMIZE sales_delta;

-- OPTIMIZE ... ZORDER BY physically sorts data by specified columns to speed up queries on those columns
OPTIMIZE sales_delta ZORDER BY (product, ts);

-- VACUUM removes old files no longer needed by the Delta table, retaining files from the last 168 hours
VACUUM sales_delta RETAIN 168 HOURS;

path


In [0]:
%sql
-- Enable Change Data Feed (CDF) on the sales_delta table to track row-level changes
ALTER TABLE sales_delta SET TBLPROPERTIES ('delta.enableChangeDataFeed' = 'true');


In [0]:
%sql
-- record max/latest version of the table to use in CDC later

DESCRIBE HISTORY sales_delta;

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
17,2025-10-27T04:33:53.000Z,73874938602462,rajendravechalapu@gmail.com,UPDATE,"Map(predicate -> [""(product#18862 = Pencil)""])",,List(529636418100290),1027-035855-z3gfqxmp-v2n,16.0,WriteSerializable,False,"Map(numRemovedFiles -> 0, numRemovedBytes -> 0, numCopiedRows -> 0, numDeletionVectorsAdded -> 0, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 0, executionTimeMs -> 159, numDeletionVectorsUpdated -> 0, scanTimeMs -> 159, numAddedFiles -> 0, numUpdatedRows -> 0, numAddedBytes -> 0, rewriteTimeMs -> 0)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
16,2025-10-27T04:33:42.000Z,73874938602462,rajendravechalapu@gmail.com,SET TBLPROPERTIES,"Map(properties -> {""delta.enableChangeDataFeed"":""true""})",,List(529636418100290),1027-035855-z3gfqxmp-v2n,15.0,WriteSerializable,True,Map(),,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
15,2025-10-27T04:32:38.000Z,73874938602462,rajendravechalapu@gmail.com,UPDATE,"Map(predicate -> [""(product#18634 = Pencil)""])",,List(529636418100290),1027-035855-z3gfqxmp-v2n,14.0,WriteSerializable,False,"Map(numRemovedFiles -> 0, numRemovedBytes -> 0, numCopiedRows -> 0, numDeletionVectorsAdded -> 0, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 0, executionTimeMs -> 183, numDeletionVectorsUpdated -> 0, scanTimeMs -> 181, numAddedFiles -> 0, numUpdatedRows -> 0, numAddedBytes -> 0, rewriteTimeMs -> 0)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
14,2025-10-27T04:32:36.000Z,73874938602462,rajendravechalapu@gmail.com,SET TBLPROPERTIES,"Map(properties -> {""delta.enableChangeDataFeed"":""true""})",,List(529636418100290),1027-035855-z3gfqxmp-v2n,13.0,WriteSerializable,True,Map(),,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
13,2025-10-27T04:31:30.000Z,73874938602462,rajendravechalapu@gmail.com,VACUUM END,Map(status -> COMPLETED),,List(529636418100290),1027-035855-z3gfqxmp-v2n,12.0,SnapshotIsolation,True,"Map(numDeletedFiles -> 0, numVacuumedDirectories -> 1)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
12,2025-10-27T04:31:29.000Z,73874938602462,rajendravechalapu@gmail.com,VACUUM START,"Map(retentionCheckEnabled -> true, defaultRetentionMillis -> 604800000, specifiedRetentionMillis -> 604800000)",,List(529636418100290),1027-035855-z3gfqxmp-v2n,11.0,SnapshotIsolation,True,"Map(numFilesToDelete -> 0, sizeOfDataToDelete -> 0)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
11,2025-10-27T04:25:35.000Z,73874938602462,rajendravechalapu@gmail.com,ADD CONSTRAINT,"Map(name -> qty_positive, expr -> qty > 0)",,List(529636418100290),1027-035855-z3gfqxmp-v2n,10.0,WriteSerializable,False,Map(),,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
10,2025-10-27T04:24:20.000Z,73874938602462,rajendravechalapu@gmail.com,ADD COLUMNS,"Map(columns -> [{""column"":{""name"":""channel"",""type"":""string"",""nullable"":true,""metadata"":{}}}])",,List(529636418100290),1027-035855-z3gfqxmp-v2n,9.0,WriteSerializable,True,Map(),,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
9,2025-10-27T04:09:46.000Z,73874938602462,rajendravechalapu@gmail.com,RESTORE,"Map(version -> 0, timestamp -> null)",,List(546745205863369),1027-035855-z3gfqxmp-v2n,8.0,Serializable,False,"Map(numRestoredFiles -> 0, removedFilesSize -> 3636, numRemovedFiles -> 2, restoredFilesSize -> 0, numDeletionVectorsAdded -> 0, numDeletionVectorsRemoved -> 0, numOfFilesAfterRestore -> 0, tableSizeAfterRestore -> 0)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
8,2025-10-27T04:07:39.000Z,73874938602462,rajendravechalapu@gmail.com,MERGE,"Map(predicate -> [""(id#12861L = cast(id#12851 as bigint))""], clusterBy -> [], matchedPredicates -> [{""actionType"":""update""}], statsOnLoad -> true, notMatchedBySourcePredicates -> [], notMatchedPredicates -> [{""actionType"":""insert""}])",,List(546745205863369),1027-035855-z3gfqxmp-v2n,7.0,WriteSerializable,False,"Map(numTargetRowsCopied -> 0, numTargetRowsDeleted -> 0, numTargetFilesAdded -> 1, numTargetBytesAdded -> 1380, numTargetBytesRemoved -> 0, numTargetDeletionVectorsAdded -> 0, numTargetRowsMatchedUpdated -> 0, executionTimeMs -> 2237, materializeSourceTimeMs -> 3, numTargetRowsInserted -> 1, numTargetRowsMatchedDeleted -> 0, numTargetDeletionVectorsUpdated -> 0, scanTimeMs -> 945, numTargetRowsUpdated -> 0, numOutputRows -> 1, numTargetDeletionVectorsRemoved -> 0, numTargetRowsNotMatchedBySourceUpdated -> 0, numTargetChangeFilesAdded -> 0, numSourceRows -> 1, numTargetFilesRemoved -> 0, numTargetRowsNotMatchedBySourceDeleted -> 0, rewriteTimeMs -> 1182)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13


In [0]:
%sql

-- Increment the 'qty' column by 1 for all rows where the product is 'Pencil'
UPDATE sales_delta SET qty = qty + 1 WHERE product = 'Pencil';



num_affected_rows
0


In [0]:
%sql
DESCRIBE HISTORY sales_delta;

In [0]:
%sql
-- Query the changes (inserts, updates, deletes) in the sales_delta table from version 0 to 100 using Change Data Feed
-- Replace <cdf_enabled_version> with the version number where CDF was enabled
SELECT *
FROM table_changes('sales_delta', 14, 17);
--SELECT * FROM table_changes('sales_delta', 0, 100);
-- SELECT *
-- FROM table_changes('sales_delta', <cdf_enabled_version>, <latest_version>);

id,ts,product,qty,price,channel,_change_type,_commit_version,_commit_timestamp


In [0]:
%sql
-- Show detailed metadata information about the sales_delta table
DESCRIBE DETAIL sales_delta;

-- Show the last 10 operations in the sales_delta table's history
DESCRIBE HISTORY sales_delta LIMIT 10;

-- Enable automatic file compaction and optimized writes for the sales_delta table
ALTER TABLE sales_delta SET TBLPROPERTIES (
  'delta.autoOptimize.optimizeWrite'='true',
  'delta.autoOptimize.autoCompact'='true'
);


[0;36m  File [0;32m<command-5448948360445378>, line 1[0;36m[0m
[0;31m    DESCRIBE HISTORY sales_delta LIMIT 10;[0m
[0m             ^[0m
[0;31mSyntaxError[0m[0;31m:[0m invalid syntax
