In [0]:
from delta.tables import *

In [0]:
data = [
    ("P001", "Laptop", "Electronics", 999.99),
    ("P002", "Smartphone", "Electronics", 499.99),
    ("P003", "T-Shirt", "Clothing", 29.99)
]
cols = ["product_id", "name", "category", "price"]
df = spark.createDataFrame(data, cols)
df.write.format("delta").mode("overwrite").saveAsTable("default.products")


In [0]:
spark.sql("select * from default.products").show()

+----------+----------+-----------+------+--------+
|product_id|      name|   category| price|discount|
+----------+----------+-----------+------+--------+
|      P001|    Laptop|Electronics|999.99|    NULL|
|      P002|Smartphone|Electronics|499.99|    NULL|
|      P003|   T-Shirt|   Clothing| 29.99|    NULL|
+----------+----------+-----------+------+--------+



In [0]:
data_new = [
    ("P001", "Laptop", "Electronics", 1099.99, 50.0),
    ("P004", "Headphones", "Electronics", 149.99, 10.0),
    ("P005", "Jeans", "Clothing", 59.99, 5.0)
]
cols_new = ["product_id", "name", "category", "price", "discount"]

df_new = spark.createDataFrame(data_new, cols_new)



In [0]:
df_new.limit(0).write.format("delta").mode("append").option("mergeSchema", "true").saveAsTable("default.products")


In [0]:
spark.sql("select * from default.products_new").show()

+----------+----------+-----------+-------+--------+
|product_id|      name|   category|  price|discount|
+----------+----------+-----------+-------+--------+
|      P001|    Laptop|Electronics|1099.99|    50.0|
|      P004|Headphones|Electronics| 149.99|    10.0|
|      P005|     Jeans|   Clothing|  59.99|     5.0|
+----------+----------+-----------+-------+--------+



In [0]:
delta_table = DeltaTable.forName(spark,"workspace.default.products")


In [0]:
delta_table.alias("target").merge(df_new.alias("source"), "target.product_id = source.product_id").whenMatchedUpdate(set = {"price": "source.price", "discount": "source.discount"}).whenNotMatchedInsert(values = {"product_id": "source.product_id", "name": "source.name", "category": "source.category", "price": "source.price", "discount": "source.discount"}).execute()

DataFrame[num_affected_rows: bigint, num_updated_rows: bigint, num_deleted_rows: bigint, num_inserted_rows: bigint]

In [0]:
spark.sql("DESCRIBE HISTORY default.products").show(truncate=False)


+-------+-------------------+--------------+-----------------------+---------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----+--------+------------------------+-----------+-----------------+-------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [0]:
spark.read.format("delta").option("versionAsOf",1).table("default.products").show()

+----------+----------+-----------+-------+--------+
|product_id|      name|   category|  price|discount|
+----------+----------+-----------+-------+--------+
|      P001|    Laptop|Electronics|1099.99|    50.0|
|      P004|Headphones|Electronics| 149.99|    10.0|
|      P005|     Jeans|   Clothing|  59.99|     5.0|
|      P001|    Laptop|Electronics| 999.99|    NULL|
|      P002|Smartphone|Electronics| 499.99|    NULL|
|      P003|   T-Shirt|   Clothing|  29.99|    NULL|
+----------+----------+-----------+-------+--------+



In [0]:
spark.read.format("delta").option("versionAsOf",2).table("default.products").show()

+----------+----------+-----------+-------+--------+
|product_id|      name|   category|  price|discount|
+----------+----------+-----------+-------+--------+
|      P001|    Laptop|Electronics|1099.99|    50.0|
|      P001|    Laptop|Electronics|1099.99|    50.0|
|      P004|Headphones|Electronics| 149.99|    10.0|
|      P005|     Jeans|   Clothing|  59.99|     5.0|
|      P002|Smartphone|Electronics| 499.99|    NULL|
|      P003|   T-Shirt|   Clothing|  29.99|    NULL|
+----------+----------+-----------+-------+--------+



In [0]:
df2 = spark.read.table("default.products")
df2.orderBy("product_id").show()

+----------+----------+-----------+-------+--------+
|product_id|      name|   category|  price|discount|
+----------+----------+-----------+-------+--------+
|      P001|    Laptop|Electronics|1099.99|    50.0|
|      P002|Smartphone|Electronics| 499.99|    NULL|
|      P003|   T-Shirt|   Clothing|  29.99|    NULL|
|      P004|Headphones|Electronics| 149.99|    10.0|
|      P005|     Jeans|   Clothing|  59.99|     5.0|
+----------+----------+-----------+-------+--------+

