### Creating the Schema

In [0]:
spark.sql("create catalog if not exists bankcatalog;")
spark.sql("create schema if not exists bankcatalog.bankschema;")
spark.sql("create volume if not exists bankcatalog.bankschema.bankvolume;")

### Reading the data as CSV & writing it as Delta tables

In [0]:
loans_df = spark.read.format("csv").option("header", "true").load("/Volumes/bankcatalog/bankschema/bankvolume/loans/loans.csv")
accounts_df = spark.read.format("csv").option("header", "true").load("/Volumes/bankcatalog/bankschema/bankvolume/accounts/accounts.csv")
loans_df.write.format("delta").mode("overwrite").save("/Volumes/bankcatalog/bankschema/bankvolume/targetloandir")
accounts_df.write.format("delta").mode("overwrite").save("/Volumes/bankcatalog/bankschema/bankvolume/targetaccountsdir")

In [0]:
loans_df.write.saveAsTable("bankcatalog.bankschema.loandelta",format="delta",mode='overwrite')
accounts_df.write.saveAsTable("bankcatalog.bankschema.accountdelta",format="delta",mode='overwrite')

### Optimizing the Delta tables and checking history

In [0]:
%sql
optimize bankcatalog.bankschema.loandelta

In [0]:
%sql
desc history bankcatalog.bankschema.loandelta

In [0]:
%sql
optimize bankcatalog.bankschema.accountdelta

In [0]:
%sql
desc history bankcatalog.bankschema.accountdelta

In [0]:
loans_df.show(10,False)

In [0]:
%sql
drop table if exists bankcatalog.bankschema.loantabledelta;
create table if not exists bankcatalog.bankschema.loantabledelta(
  Amount string,
  Collateral string,
  CustomerID string,
  EndDate string,
  InterestRate string,
  LoanID string,
  LoanType string,
  PaymentFrequency string,
  StartDate string,
  Status string
) using delta;


In [0]:
%sql
optimize bankcatalog.bankschema.loantabledelta zorder by (LoanID)

In [0]:
%sql
desc history bankcatalog.bankschema.loantabledelta

In [0]:
loans_df.write.saveAsTable("bankcatalog.bankschema.loantabledelta",format="delta",mode='overwrite')

In [0]:
%sql
select * from bankcatalog.bankschema.loantabledelta limit 10;

In [0]:
%sql
desc history bankcatalog.bankschema.loantabledelta

### Another example of Z-order

In [0]:
%sql
CREATE OR REPLACE TABLE bankcatalog.bankschema.customer_txn (
    txn_id INT,
    customer_id INT,
    region STRING,
    txn_amount DOUBLE,
    txn_type STRING,
    transaction_date DATE
)
USING DELTA;

In [0]:
%sql
--Batch 1
INSERT INTO bankcatalog.bankschema.customer_txn VALUES
 (1, 1001, 'North', 250.00, 'Online', '2025-10-01'),
 (2, 1002, 'South', 400.00, 'Offline', '2025-10-02'),
 (3, 1003, 'West', 600.00, 'Online', '2025-10-03');

-- Batch 2
INSERT INTO bankcatalog.bankschema.customer_txn VALUES
 (4, 1001, 'North', 300.00, 'Offline', '2025-10-01'),
 (5, 1004, 'East', 750.00, 'Online', '2025-10-02'),
 (6, 1005, 'South', 180.00, 'Online', '2025-10-03');

-- Batch 3
INSERT INTO bankcatalog.bankschema.customer_txn VALUES
 (7, 1001, 'North', 270.00, 'Online', '2025-10-01'),
 (8, 1003, 'West', 500.00, 'Offline', '2025-10-02'),
 (9, 1002, 'South', 900.00, 'Online', '2025-10-03');


In [0]:
%sql
desc history bankcatalog.bankschema.customer_txn

In [0]:
%sql
optimize bankcatalog.bankschema.customer_txn zorder by(txn_id,transaction_date)

In [0]:
%sql
desc history bankcatalog.bankschema.customer_txn

In [0]:
%sql
select * from bankcatalog.bankschema.customer_txn  limit 3;

In [0]:
%sql
DESCRIBE DETAIL bankcatalog.bankschema.customer_txn;

### Partition Pruning

In [0]:
accounts_df.write.partitionBy('AccountType').format("delta").mode("overwrite").saveAsTable("bankcatalog.bankschema.accountpartition")

In [0]:
%sql
explain select * from bankcatalog.bankschema.accountpartition where AccountType = 'Business';

### Vaccumming

In [0]:
%sql
--SET spark.databricks.delta.retentionDurationCheck.enabled = false;
vacuum bankcatalog.bankschema.accountpartition retain 183 hours

### Liquid Clustering

In [0]:
%sql
use bankcatalog.bankschema

In [0]:
%sql
-- The CLUSTER BY clause enables liquid clustering automatically.
drop table sales_orders_liquid;
CREATE TABLE IF NOT EXISTS sales_orders_liquid
(
  order_id INT,
  customer_id INT,
  region STRING,
  product STRING,
  quantity INT,
  price DOUBLE,
  order_date DATE
)
USING DELTA
CLUSTER BY (customer_id, region);
--clustering column can be high or low cardinal, unlike partition which requires only low cardinal columns.
--column order used in cluster by is based on the primary filter, ie. whether you first filter based on customer_id or region, accordingly keep the coloumns order.
     

In [0]:
%sql

INSERT INTO sales_orders_liquid VALUES
 (1, 101, 'North', 'Laptop', 2, 65000, '2025-10-01'),
 (2, 102, 'South', 'Headphones', 5, 2500, '2025-10-01'),
 (3, 103, 'West', 'Desk Chair', 3, 4500, '2025-10-02');

INSERT INTO sales_orders_liquid VALUES
 (4, 101, 'North', 'Keyboard', 1, 1200, '2025-10-03'),
 (5, 104, 'East', 'Monitor', 2, 9500, '2025-10-03'),
 (6, 105, 'South', 'Mouse', 4, 700, '2025-10-03');


In [0]:
%sql
describe detail sales_orders_liquid

In [0]:
%sql
UPDATE sales_orders_liquid
SET price = price * 1.05
WHERE region = 'North';

In [0]:
%sql
describe detail sales_orders_liquid

In [0]:
%sql
describe history sales_orders_liquid

In [0]:
%sql
delete from sales_orders_liquid where region = 'North'

In [0]:
%sql
describe history sales_orders_liquid

### Cost Efficient Environment Cloning (Shallow Clone)

In [0]:
%sql
create table sales_orders_liquid_shallow clone sales_orders_liquid ;
    
select * from sales_orders_liquid_shallow ;

In [0]:
%sql
INSERT INTO sales_orders_liquid VALUES
 (1, 101, 'North', 'Laptop', 2, 65000, '2025-10-01'),
 (33, 133, 'North', 'Kindle', 45, 250044, '2025-10-02') ;

In [0]:
%sql
select * from sales_orders_liquid where region = "North"; -- Data got inserted in Main Table
    
select * from sales_orders_liquid_shallow where region = "North" -- Data did not get inserted in Shallow Table


In [0]:
%sql
INSERT INTO sales_orders_liquid_shallow VALUES
 (66, 101, 'North', 'VW', 2, 65000, '2025-10-01'),
 (67, 133, 'North', 'VW', 45, 250044, '2025-10-02') ;

In [0]:
%sql
select * from sales_orders_liquid where region = "North"; -- Old Data

select * from sales_orders_liquid_shallow where region = "North" -- New Data inserted in Shallow Table
    


### Time Travel

In [0]:
%sql
select distinct 'non time travel' as area,product from sales_orders_liquid 
union all
select distinct 'time travel' as area,product  from sales_orders_liquid timestamp as of "2026-02-07T01:42:05.000+00:00"