In [0]:
csv_data = """id,name,category,price
1, Amit, Electronics, 50000
2, Priya, Furniture, 3000
3, Rahul, Stationery, 200
4, Neha, Books, 800
5, Karthik, Electronics, 45000"""

#Save the inline DBFS
dbutils.fs.put("dbfs:/file:/tmp/electronic_products.csv", csv_data, True)


Wrote 153 bytes.


True

In [0]:
df = spark.read.option("header", True).csv("dbfs:/file:/tmp/electronic_products.csv")
display(df)

id,name,category,price
1,Amit,Electronics,50000
2,Priya,Furniture,3000
3,Rahul,Stationery,200
4,Neha,Books,800
5,Karthik,Electronics,45000


In [0]:

df.write.format("delta").mode("overwrite").save("/tmp/electronic_products.delta")


In [0]:
df_delta = spark.read.format("delta").load("/tmp/electronic_products.delta")
display(df_delta)

id,name,category,price
1,Amit,Electronics,50000
2,Priya,Furniture,3000
3,Rahul,Stationery,200
4,Neha,Books,800
5,Karthik,Electronics,45000


In [0]:
from delta.tables import DeltaTable

delta_table = DeltaTable.forPath(spark, "/tmp/electronic_products.delta")

delta_table.update(
    condition="id = 2",
    set={"price": "3500"}
)

delta_table.toDF().display()

id,name,category,price
1,Amit,Electronics,50000
3,Rahul,Stationery,200
4,Neha,Books,800
5,Karthik,Electronics,45000
2,Priya,Furniture,3500


In [0]:
new_data = [
    (2, "Priya", "Furniture", "3500"), #update
    (6, "Sneha", "Electronics", "60000") #insert
]

update_df = spark.createDataFrame(new_data, ["id", "name", "category", "price"])
delta_table.alias("target").merge(
    update_df.alias("source"),
    "target.id = source.id"
).whenMatchedUpdateAll().whenNotMatchedInsertAll().execute()

delta_table.toDF().show()

+---+--------+------------+------+
| id|    name|    category| price|
+---+--------+------------+------+
|  1|    Amit| Electronics| 50000|
|  3|   Rahul|  Stationery|   200|
|  4|    Neha|       Books|   800|
|  5| Karthik| Electronics| 45000|
|  6|   Sneha| Electronics| 60000|
|  2|   Priya|   Furniture|  3500|
+---+--------+------------+------+



In [0]:
#current version
delta_table.toDF().display()

#read previous version
previous_df = spark.read.format("delta").option("versionAsOf", 0).load("/tmp/electronic_products.delta")
previous_df.display()

id,name,category,price
1,Amit,Electronics,50000
3,Rahul,Stationery,200
4,Neha,Books,800
5,Karthik,Electronics,45000
2,Priya,Furniture,3500
6,Sneha,Electronics,60000


id,name,category,price
1,Amit,Electronics,50000
2,Priya,Furniture,3000
3,Rahul,Stationery,200
4,Neha,Books,800
5,Karthik,Electronics,45000


In [0]:
df.write.format("delta").mode("overwrite").option("overwriteSchema", "true").partitionBy("category").save("/tmp/electronic_products.delta")


In [0]:
spark.read.format("delta").load("/tmp/electronic_products.delta").display()

id,name,category,price
1,Amit,Electronics,50000
5,Karthik,Electronics,45000
2,Priya,Furniture,3000
3,Rahul,Stationery,200
4,Neha,Books,800
