In [3]:
import polars as pl
import lancedb
import time
import duckdb

  from .autonotebook import tqdm as notebook_tqdm


### Streaming Query Dataframe Output

Run this notebook in a loop to monitor the progress of the dataset. Analytics can also be run simultaneously while the dataset is getting updated.

In [18]:
# continuously query the latest block number head while the dataset updates.
while True:
    table_name = "blocks"
    db: lancedb.DBConnection = lancedb.connect(table_name)
    # Try to open and merge data into existing table.
    table: lancedb.table = db.open_table(table_name)

    # Get the latest block number
    print(
        table.to_polars()
        .select("block_number")
        .sort(by="block_number", descending=True)
        .collect()["block_number"][0]
    )

    # Get total number of rows
    print(table.to_polars().select(pl.len()).collect().item())

    time.sleep(180)

19769573
216257


KeyboardInterrupt: 

### DuckDB Query Example

In [6]:
table_name = "blocks"
db: lancedb.DBConnection = lancedb.connect(table_name)
# open the table as a lance dataset to make it accessible for duckdb
lance_dataset_table: lancedb.table = db.open_table(table_name).to_lance()

In [15]:
# duckdb output format
duckdb.sql("SELECT MAX(block_number) FROM lance_dataset_table")

┌───────────────────┐
│ max(block_number) │
│      uint64       │
├───────────────────┤
│          19769573 │
└───────────────────┘

In [16]:
# polars dataframe output format
duckdb.sql("SELECT MAX(block_number) FROM lance_dataset_table").pl()

max(block_number)
u64
19769573
