In [0]:
csv_data = """id,name,category,price
1,Amit,Electronics,50000
2,Priya,Furniture,3000
3,Rahul,Stationery,200
4,Neha,Books,800
5,Karthik,Electronics,45000
"""
dbutils.fs.put("dbfs:/tmp/example.csv", csv_data, True)
df = spark.read.csv("dbfs:/tmp/example.csv", header=True)
df.show()

Wrote 139 bytes.
+---+-------+-----------+-----+
| id|   name|   category|price|
+---+-------+-----------+-----+
|  1|   Amit|Electronics|50000|
|  2|  Priya|  Furniture| 3000|
|  3|  Rahul| Stationery|  200|
|  4|   Neha|      Books|  800|
|  5|Karthik|Electronics|45000|
+---+-------+-----------+-----+



In [0]:
df.write.format("delta").mode("overwrite").save("/tmp/delta_example")

In [0]:
df_delta = spark.read.format("delta").load("/tmp/delta_example")
df_delta.show()

+---+-------+-----------+-----+
| id|   name|   category|price|
+---+-------+-----------+-----+
|  1|   Amit|Electronics|50000|
|  2|  Priya|  Furniture| 3000|
|  3|  Rahul| Stationery|  200|
|  4|   Neha|      Books|  800|
|  5|Karthik|Electronics|45000|
+---+-------+-----------+-----+



In [0]:
from delta.tables import DeltaTable
delta_table = DeltaTable.forPath(spark, "/tmp/delta_example")
delta_table.update(
  condition = "id = 2",
  set = {"price": "4000"}
)
df_delta.show()

+---+-------+-----------+-----+
| id|   name|   category|price|
+---+-------+-----------+-----+
|  1|   Amit|Electronics|50000|
|  3|  Rahul| Stationery|  200|
|  4|   Neha|      Books|  800|
|  5|Karthik|Electronics|45000|
|  2|  Priya|  Furniture| 4000|
+---+-------+-----------+-----+



In [0]:
new_data = [
    (2, "Priya", "Furniture", 4000),     # Update
    (6, "Sneha", "Kitchen", 1200)        # Insert
]

updates_df = spark.createDataFrame(new_data, ["id", "name", "category", "price"])
# perform upsert
delta_table.alias("t").merge(
  updates_df.alias("u"),
  "t.id = u.id").whenMatchedUpdateAll().whenNotMatchedInsertAll().execute()
delta_table.toDF().show()

+---+-------+-----------+-----+
| id|   name|   category|price|
+---+-------+-----------+-----+
|  1|   Amit|Electronics|50000|
|  3|  Rahul| Stationery|  200|
|  4|   Neha|      Books|  800|
|  5|Karthik|Electronics|45000|
|  2|  Priya|  Furniture| 4000|
|  6|  Sneha|    Kitchen| 1200|
+---+-------+-----------+-----+



In [0]:
# read previous version
previous_df = spark.read.format("delta").option("versionAsOf", 0).load("/tmp/delta_example")
previous_df.show()

+---+-------+-----------+-----+
| id|   name|   category|price|
+---+-------+-----------+-----+
|  1|   Amit|Electronics|50000|
|  2|  Priya|  Furniture| 3000|
|  3|  Rahul| Stationery|  200|
|  4|   Neha|      Books|  800|
|  5|Karthik|Electronics|45000|
+---+-------+-----------+-----+



In [0]:
# patitioning
df.write.format("delta").mode("overwrite").partitionBy('category').save("/tmp/product_partioned")
spark.read.format("delta").load("/tmp/product_partioned").show()

+---+-------+-----------+-----+
| id|   name|   category|price|
+---+-------+-----------+-----+
|  5|Karthik|Electronics|45000|
|  1|   Amit|Electronics|50000|
|  2|  Priya|  Furniture| 3000|
|  3|  Rahul| Stationery|  200|
|  4|   Neha|      Books|  800|
+---+-------+-----------+-----+

