In [1]:
from pyspark.sql import SparkSession

spark = SparkSession.builder \
    .appName("IcebergTimeTravel") \
    .config("spark.sql.catalog.local", "org.apache.iceberg.spark.SparkCatalog") \
    .config("spark.sql.catalog.local.type", "hadoop") \
    .config("spark.sql.catalog.local.warehouse", "/tmp/iceberg_warehouse") \
    .config("spark.jars.packages", "org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.4.2") \
    .getOrCreate()

In [2]:
# Inspect available snapshots
snapshots_df = spark.sql("SELECT * FROM local.db.sample_users.snapshots")
snapshots_df.show(truncate=False)

+-----------------------+-------------------+---------+---------+--------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|committed_at           |snapshot_id        |parent_id|operation|manifest_list                                                                                                       |summary                                                                                                                                                                                                                                                                                         |
+-----------------------+-------------------+---------+-

## Option 1: Using Snapshot ID

In [4]:
# Use a valid snapshot ID obtained from above
snapshot_id = "5568038035354584041"

df_old = spark.read \
    .option("snapshot-id", snapshot_id) \
    .format("iceberg") \
    .load("local.db.sample_users")

df_old.show(truncate=False)

+---+----+-----------+
|id |name|signup_date|
+---+----+-----------+
|2  |Alex|2024-02-01 |
|1  |Soni|2024-01-01 |
+---+----+-----------+



In [5]:
# Use a valid snapshot ID obtained from above
snapshot_id = "4271837051864160400"

df_new= spark.read \
    .option("snapshot-id", snapshot_id) \
    .format("iceberg") \
    .load("local.db.sample_users")

df_new.show(truncate=False)

+---+--------+-----------+
|id |name    |signup_date|
+---+--------+-----------+
|4  |Neo     |2024-04-01 |
|3  |Morpheus|2024-03-01 |
|2  |Alex    |2024-02-01 |
|1  |Trinity |2024-01-01 |
|5  |Smith   |2024-05-01 |
|6  |Anderson|2024-06-01 |
+---+--------+-----------+



In [6]:
current_df = spark.read.table("local.db.sample_users")
current_df.show(truncate=False)

+---+--------+-----------+
|id |name    |signup_date|
+---+--------+-----------+
|4  |Neo     |2024-04-01 |
|3  |Morpheus|2024-03-01 |
|2  |Alex    |2024-02-01 |
|1  |Trinity |2024-01-01 |
|5  |Smith   |2024-05-01 |
|6  |Anderson|2024-06-01 |
+---+--------+-----------+



## Option 2: Use the Timestamp (epoch millis)

In [9]:
snapshots_df = spark.sql("SELECT * FROM local.db.sample_users.snapshots")
snapshots_df.select("committed_at", "snapshot_id").show(truncate=False)


+-----------------------+-------------------+
|committed_at           |snapshot_id        |
+-----------------------+-------------------+
|2025-04-02 15:10:12.365|5568038035354584041|
|2025-04-02 17:41:52.616|4271837051864160400|
|2025-04-02 17:52:13.098|560746950586654696 |
|2025-04-02 18:40:03.527|2794978594534067236|
|2025-04-02 18:40:35.269|349006547023142716 |
+-----------------------+-------------------+



In [13]:
import datetime

# Convert to milliseconds since epoch
ts = datetime.datetime(2025, 6, 2, 15, 10, 12, 0)
epoch_ms = int(ts.timestamp() * 1000)
print(epoch_ms)  # Prints something like this 1743904212365

df_old = spark.read \
    .option("as-of-timestamp", epoch_ms) \
    .format("iceberg") \
    .load("local.db.sample_users")

df_old.show()



1748857212000
+---+--------+-----------+----------------+
| id|    name|signup_date|           email|
+---+--------+-----------+----------------+
|  2|    Alex| 2024-02-01|            NULL|
|  1| Trinity| 2024-01-01|            NULL|
|  3|    Nina| 2024-03-01|nina@example.com|
|  4|     Neo| 2024-04-01|            NULL|
|  3|Morpheus| 2024-03-01|            NULL|
|  6|Anderson| 2024-06-01|            NULL|
+---+--------+-----------+----------------+

