# ðŸ§  Databricks Delta Tables 101 (Managed Tables Only)
End-to-end learning notebook for Delta Tables: ACID operations, Time Travel, CDC, and Streaming with checkpoints.

In [0]:
%sql
CREATE CATALOG IF NOT EXISTS demo;
USE CATALOG demo;
CREATE SCHEMA IF NOT EXISTS delta101;
USE demo.delta101;


In [0]:
%sql
CREATE OR REPLACE TABLE sales_delta (
  id BIGINT,
  ts TIMESTAMP,
  product STRING,
  qty INT,
  price DECIMAL(10,2)
) USING DELTA;

INSERT INTO sales_delta VALUES
  (1, current_timestamp(), 'Pencil', 5, 1.20),
  (2, current_timestamp(), 'Pen', 2, 2.50),
  (3, current_timestamp(), 'Notebook', 1, 4.90);

SELECT * FROM sales_delta ORDER BY id;

In [0]:
%sql
UPDATE sales_delta SET qty = 10 WHERE id = 1;
DELETE FROM sales_delta WHERE product = 'Pen';
MERGE INTO sales_delta t
USING (SELECT 2 AS id, current_timestamp() AS ts, 'Pen' AS product, 3 AS qty, 2.50 AS price) s
ON t.id = s.id
WHEN MATCHED THEN UPDATE SET *
WHEN NOT MATCHED THEN INSERT *;

SELECT * FROM sales_delta ORDER BY id;

In [0]:
%sql
DESCRIBE HISTORY sales_delta;
SELECT * FROM sales_delta VERSION AS OF 0;

In [0]:
%sql
-- Add a new column 'channel' of type STRING to the sales_delta table
ALTER TABLE sales_delta ADD COLUMN channel STRING;



In [0]:
%sql
-- Add a table constraint to ensure 'qty' is always positive
ALTER TABLE sales_delta ADD CONSTRAINT qty_positive CHECK (qty > 0);

-- Show detailed metadata information about the sales_delta table
DESCRIBE DETAIL sales_delta;

In [0]:
# Import functions from pyspark.sql as F
from pyspark.sql import functions as F

# Read the 'sales_delta' table and add a new column 'channel' with value 'web'
df = spark.table('demo.delta101.sales_delta').withColumn('channel', F.lit('web'))

# Write the DataFrame back to the same Delta table, allowing schema evolution (mergeSchema)
(df.write
  .option('mergeSchema','true')  # Allow new columns to be added to the table schema
  .format('delta')               # Use Delta Lake format
  .mode('append')                # Append new data to the table
  .saveAsTable('demo.delta101.sales_delta'))  # Save to the specified table

In [0]:
display(spark.table('demo.delta101.sales_delta'))

In [0]:
%sql
-- OPTIMIZE compacts small files in the Delta table to improve read performance
OPTIMIZE sales_delta;

-- OPTIMIZE ... ZORDER BY physically sorts data by specified columns to speed up queries on those columns
OPTIMIZE sales_delta ZORDER BY (product, ts);

-- VACUUM removes old files no longer needed by the Delta table, retaining files from the last 168 hours
VACUUM sales_delta RETAIN 168 HOURS;

In [0]:
%sql
-- Enable Change Data Feed (CDF) on the sales_delta table to track row-level changes
ALTER TABLE sales_delta SET TBLPROPERTIES ('delta.enableChangeDataFeed' = 'true');


In [0]:
%sql
-- record max/latest version of the table to use in CDC later

DESCRIBE HISTORY sales_delta;

In [0]:
%sql

-- Increment the 'qty' column by 1 for all rows where the product is 'Pencil'
UPDATE sales_delta SET qty = qty + 1 WHERE product = 'Pencil';



In [0]:
%sql
DESCRIBE HISTORY sales_delta;

In [0]:
%sql
-- Query the changes (inserts, updates, deletes) in the sales_delta table from version 0 to 100 using Change Data Feed
-- Replace <cdf_enabled_version> with the version number where CDF was enabled
SELECT *
FROM table_changes('sales_delta', 14, 17);
--SELECT * FROM table_changes('sales_delta', 0, 100);
-- SELECT *
-- FROM table_changes('sales_delta', <cdf_enabled_version>, <latest_version>);

In [0]:
%sql
-- Show detailed metadata information about the sales_delta table
DESCRIBE DETAIL sales_delta;

-- Show the last 10 operations in the sales_delta table's history
DESCRIBE HISTORY sales_delta LIMIT 10;

-- Enable automatic file compaction and optimized writes for the sales_delta table
ALTER TABLE sales_delta SET TBLPROPERTIES (
  'delta.autoOptimize.optimizeWrite'='true',
  'delta.autoOptimize.autoCompact'='true'
);


In [0]:
print("complete")