In [4]:
from delta_lake_spark_utils import sparksession_with_delta_lake

In [5]:
spark = sparksession_with_delta_lake()

SparkSession created successfully with Delta Lake support!


In [6]:
# 2. Create some sample data
data = [("Alice", 1, "New York"),
        ("Bob", 2, "London"),
        ("Charlie", 3, "Paris")]
columns = ["Name", "ID", "City"]
df = spark.createDataFrame(data, columns)

# 3. Define a path for your Delta Lake table
delta_table_path = "./delta_tables/my_delta_table"

# 4. Write DataFrame to Delta Lake table
print(f"\nWriting data to Delta Lake table at: {delta_table_path}")
df.write.format("delta").mode("overwrite").save(delta_table_path)
print("Data written successfully!")

# 5. Read data from Delta Lake table
print(f"\nReading data from Delta Lake table at: {delta_table_path}")
delta_df = spark.read.format("delta").load(delta_table_path)
delta_df.show()

# 6. Perform an update (example: change Bob's city)
#    Delta Lake allows for ACID transactions directly on the table
from delta.tables import DeltaTable

print("\nUpdating Bob's city...")
deltaTable = DeltaTable.forPath(spark, delta_table_path)
deltaTable.update(
    col("Name") == "Bob",
    {"City": "'Berlin'"}
)

print("Data after update:")
spark.read.format("delta").load(delta_table_path).show()

# 7. Add new data (example: append Eve)
print("\nAppending new data (Eve)...")
new_data = [("Eve", 4, "Rome")]
new_df = spark.createDataFrame(new_data, columns)
new_df.write.format("delta").mode("append").save(delta_table_path)

print("Data after append:")
spark.read.format("delta").load(delta_table_path).show()


# 8. Time Travel (read previous version)
#    You can specify a version or a timestamp
print("\nTime traveling to version 0 (initial write):")
old_df = spark.read.format("delta").option("versionAsOf", 0).load(delta_table_path)
old_df.show()

# 9. Stop SparkSession
spark.stop()
print("\nSparkSession stopped.")


Writing data to Delta Lake table at: ./delta_tables/my_delta_table


                                                                                

Data written successfully!

Reading data from Delta Lake table at: ./delta_tables/my_delta_table
+-------+---+--------+
|   Name| ID|    City|
+-------+---+--------+
|  Alice|  1|New York|
|Charlie|  3|   Paris|
|    Bob|  2|  London|
+-------+---+--------+


Updating Bob's city...
Data after update:
+-------+---+--------+
|   Name| ID|    City|
+-------+---+--------+
|  Alice|  1|New York|
|Charlie|  3|   Paris|
|    Bob|  2|  Berlin|
+-------+---+--------+


Appending new data (Eve)...
Data after append:
+-------+---+--------+
|   Name| ID|    City|
+-------+---+--------+
|  Alice|  1|New York|
|Charlie|  3|   Paris|
|    Bob|  2|  Berlin|
|    Eve|  4|    Rome|
+-------+---+--------+


Time traveling to version 0 (initial write):
+-------+---+--------+
|   Name| ID|    City|
+-------+---+--------+
|  Alice|  1|New York|
|Charlie|  3|   Paris|
|    Bob|  2|  London|
+-------+---+--------+


SparkSession stopped.
