## Querying older snapshots

In [2]:
from pyiceberg.catalog.sql import SqlCatalog
import os

warehouse_path = os.path.abspath("./iceberg_warehouse")
catalog = SqlCatalog(
    "default",
    **{
        "uri": f"sqlite:///{warehouse_path}/pyiceberg_catalog.db",
        "warehouse": f"file://{warehouse_path}",
    },
)

In [34]:
# Create a new table
from pyiceberg.schema import Schema
from pyiceberg.types import NestedField, IntegerType, StringType
import pyarrow as pa

schema = Schema(
    NestedField(field_id=1, name="id", field_type=IntegerType(), required=False),
    NestedField(field_id=2, name="data", field_type=StringType(), required=False)
)
if not catalog.table_exists("default.second_table"):
    table = catalog.create_table("default.second_table", schema)
else:
    table = catalog.load_table("default.second_table")

In [None]:

# Convert data to PyArrow table with correct data types
data = pa.Table.from_pydict({"id": pa.array([1, 2], type=pa.int32()), "data": ["first", "second"]})

table.append(data)




AttributeError: 'Table' object has no attribute 'commit'

In [31]:
table.scan().to_pandas()

Unnamed: 0,id,data
0,3,third
1,1,first
2,2,second


In [19]:

# Get the current snapshot
current_snapshot = table.current_snapshot()
print(f"Current snapshot ID: {current_snapshot.snapshot_id}")


Current snapshot ID: 4075123781797130261


In [21]:
# Convert the dictionary to a PyArrow table
new_data = pa.Table.from_pydict({"id": pa.array([3], type=pa.int32()), "data": ["third"]})

# Append the new data to the table
table.append(new_data)



In [22]:
# Get the new snapshot
new_snapshot = table.current_snapshot()
print(f"New snapshot ID: {new_snapshot.snapshot_id}")


New snapshot ID: 8127183146016301519


In [26]:
# Time travel to the previous snapshot
table.history()

[SnapshotLogEntry(snapshot_id=4075123781797130261, timestamp_ms=1733583091714),
 SnapshotLogEntry(snapshot_id=8127183146016301519, timestamp_ms=1733583520963)]

In [30]:
table.scan(snapshot_id=4075123781797130261).to_pandas()

Unnamed: 0,id,data
0,1,first
1,2,second


In [29]:
table.scan(snapshot_id=8127183146016301519).to_pandas()

Unnamed: 0,id,data
0,3,third
1,1,first
2,2,second


In [36]:
from pyiceberg.expressions import GreaterThan

# Define the filter expression
filter_expr = GreaterThan("id", 1)

# Scan the table with the filter
table.scan(row_filter=filter_expr).to_pandas()

Unnamed: 0,id,data
0,3,third
1,2,second
