In [0]:
spark.sql("CREATE CATALOG IF NOT EXISTS kaninipro")
spark.sql("CREATE SCHEMA IF NOT EXISTS kaninipro.dev")
spark.sql("drop table if exists kaninipro.dev.sample_table")
spark.sql("drop table if exists kaninipro.dev.people_cdf")

In [0]:
from pyspark.sql.functions import col
from delta.tables import DeltaTable

#time-travel and restore table demo

In [0]:
#initial create table with dataframe write
data = [
      (1, "Alice",25), 
      (2, "Bob",30),
      (3,"charlie",15)
      ]
df = spark.createDataFrame(data, ["id", "name","age"])

df.write.format("delta").mode("overwrite")\
            .saveAsTable("kaninipro.dev.sample_table")


data = [(1, "Alice",15)]

updates_df = spark.createDataFrame(data, ["id", "name","age"])


#update the data 
delta_table = DeltaTable.forName(spark, "kaninipro.dev.sample_table")

delta_table.alias("target").merge(
    updates_df.alias("source"),
    "target.id = source.id"
).whenMatchedUpdateAll() \
 .whenNotMatchedInsertAll() \
 .execute()


In [0]:
spark.sql("describe history kaninipro.dev.sample_table").display()

In [0]:
%sql
select * from kaninipro.dev.sample_table

In [0]:
%sql
select * from kaninipro.dev.sample_table version as of 0

In [0]:
spark.sql("restore table kaninipro.dev.sample_table version as of 0")
# spark.sql("restore table table_name TO TIMESTAMP AS OF '2026-01-20T14:30:00'") restoring by timestamp

spark.table("kaninipro.dev.sample_table").display()

#change data feed

In [0]:
#initial create table with dataframe write
data = [
      (1, "Alice",25,"I"), 
      (2, "Bob",30,"I"),
      (3,"charlie",15,"I")
      ]
df = spark.createDataFrame(data, ["id", "name","age","indicator"])

selected_df = df.select("id","name","age").filter(col("indicator")!="D")

selected_df.write.format("delta").mode("overwrite")\
            .option("delta.enableChangeDataFeed", "true") \
            .saveAsTable("kaninipro.dev.people_cdf")


#update-1
data = [(1, "Alice",25,"D"),
        (2, "Bob",31,"U"),
        (4, "Vishnu",71,"I")
        ]

updates_df = spark.createDataFrame(data, ["id", "name","age","indicator"])

delta_table = DeltaTable.forName(spark, "kaninipro.dev.people_cdf")

delta_table.alias("target").merge(
    updates_df.alias("source"),
    "target.id = source.id"
).whenMatchedUpdateAll(condition="source.indicator != 'D'") \
 .whenMatchedDelete(condition="source.indicator = 'D'") \
 .whenNotMatchedInsertAll(condition="source.indicator != 'D'") \
 .execute()



#update-2
data = [(2, "Bob",32,"U")]

updates_df = spark.createDataFrame(data, ["id", "name","age","indicator"])

delta_table = DeltaTable.forName(spark, "kaninipro.dev.people_cdf")

delta_table.alias("target").merge(
    updates_df.alias("source"),
    "target.id = source.id"
).whenMatchedUpdateAll(condition="source.indicator != 'D'") \
 .whenMatchedDelete(condition="source.indicator = 'D'") \
 .whenNotMatchedInsertAll(condition="source.indicator != 'D'") \
 .execute()       

In [0]:
%sql
describe history kaninipro.dev.people_cdf

In [0]:
%sql
select * from kaninipro.dev.people_cdf version as of 3

In [0]:
%sql
select * from kaninipro.dev.people_cdf version as of 1

In [0]:
%sql
select * from kaninipro.dev.people_cdf version as of 0

In [0]:
df_cdf = spark.read.format("delta") \
    .option("readChangeFeed", "true") \
    .option("startingVersion","0")\
    .table("kaninipro.dev.people_cdf")

display(df_cdf)

#row level tracking

In [0]:
#initial create table with dataframe write
data = [
      (1, "Alice",25,"I"), 
      (2, "Bob",30,"I"),
      (3,"charlie",15,"I")
      ]
df = spark.createDataFrame(data, ["id", "name","age","indicator"])

selected_df = df.select("id","name","age").filter(col("indicator")!="D")

selected_df.write.format("delta").mode("overwrite")\
            .option("delta.enableChangeDataFeed", "true") \
            .option("delta.enableRowTracking", "true") \
            .saveAsTable("kaninipro.dev.people_cdf")


#update-1
data = [(1, "Alice",25,"D"),
        (2, "Bob",31,"U"),
        (4, "Vishnu",71,"I")
        ]

updates_df = spark.createDataFrame(data, ["id", "name","age","indicator"])

delta_table = DeltaTable.forName(spark, "kaninipro.dev.people_cdf")

delta_table.alias("target").merge(
    updates_df.alias("source"),
    "target.id = source.id"
).whenMatchedUpdateAll(condition="source.indicator != 'D'") \
 .whenMatchedDelete(condition="source.indicator = 'D'") \
 .whenNotMatchedInsertAll(condition="source.indicator != 'D'") \
 .execute()



#update-2
data = [(2, "Bob",32,"U")]

updates_df = spark.createDataFrame(data, ["id", "name","age","indicator"])

delta_table = DeltaTable.forName(spark, "kaninipro.dev.people_cdf")

delta_table.alias("target").merge(
    updates_df.alias("source"),
    "target.id = source.id"
).whenMatchedUpdateAll(condition="source.indicator != 'D'") \
 .whenMatchedDelete(condition="source.indicator = 'D'") \
 .whenNotMatchedInsertAll(condition="source.indicator != 'D'") \
 .execute()       

In [0]:
df_cdf = spark.read.format("delta") \
    .option("readChangeFeed", "true") \
    .option("startingVersion","0")\
    .table("kaninipro.dev.people_cdf")

display(df_cdf)

In [0]:
df_cdf = spark.read.format("delta")\
    .table("kaninipro.dev.people_cdf")

display(df_cdf.selectExpr("*","_metadata.row_id","_metadata.row_commit_version"))

In [0]:
df_cdf = spark.read.format("delta") \
    .option("readChangeFeed", "true") \
    .option("startingVersion","0")\
    .table("kaninipro.dev.people_cdf")

display(df_cdf.selectExpr("_metadata.row_id","_metadata.row_commit_version"))