# PyIceberg Example

In [10]:
from pyiceberg.catalog import load_catalog
import pyarrow.parquet as pq
import pyarrow.dataset as ds
import polars as pl

In [2]:
# Create a temporary location for Iceberg
!mkdir /tmp/warehouse

mkdir: /tmp/warehouse: File exists


In [3]:
# set up the catalog
warehouse_path = "/tmp/warehouse"
catalog = load_catalog(
    "default",
    **{
        'type': 'sql',
        "uri": f"sqlite:///{warehouse_path}/pyiceberg_catalog.db",
        "warehouse": f"file://{warehouse_path}",
    },
)

In [5]:
df = pq.read_table("/Users/matthewpowers/data/tpch_sf1/nation.parquet")

In [6]:
# create a new iceberg table
catalog.create_namespace("default")

table = catalog.create_table(
    "default.nation",
    schema=df.schema,
)

In [7]:
# Append the dataframe to the table
table.append(df)

In [9]:
# check length of table
len(table.scan().to_arrow())

25

## Read Iceberg table into Polars via Arrow

In [12]:
# Load the table (namespace.table_name)
table = catalog.load_table("default.nation")

# Scan and load as a PyArrow table
arrow_table = table.scan().to_arrow()

# Convert to Polars DataFrame
df = pl.from_arrow(arrow_table)

print(df.head())

shape: (5, 4)
┌─────────────┬───────────┬─────────────┬─────────────────────────────────┐
│ n_nationkey ┆ n_name    ┆ n_regionkey ┆ n_comment                       │
│ ---         ┆ ---       ┆ ---         ┆ ---                             │
│ i64         ┆ str       ┆ i64         ┆ str                             │
╞═════════════╪═══════════╪═════════════╪═════════════════════════════════╡
│ 0           ┆ ALGERIA   ┆ 0           ┆  haggle. carefully final depos… │
│ 1           ┆ ARGENTINA ┆ 1           ┆ al foxes promise slyly accordi… │
│ 2           ┆ BRAZIL    ┆ 1           ┆ y alongside of the pending dep… │
│ 3           ┆ CANADA    ┆ 1           ┆ eas hang ironic, silent packag… │
│ 4           ┆ EGYPT     ┆ 4           ┆ y above the carefully unusual … │
└─────────────┴───────────┴─────────────┴─────────────────────────────────┘


## Read Iceberg table into Polars via native reader

In [17]:
table_path = "/tmp/warehouse/default/nation"  # path to the Iceberg table folder

lazy_df = pl.scan_iceberg(
    source=table_path,
    reader_override="native"
)

# Collect into a Polars DataFrame
df = lazy_df.collect()

print(df.head())

FileNotFoundError: [Errno 2] Failed to open local file '/tmp/warehouse/default/nation/metadata/version-hint.text'. Detail: [errno 2] No such file or directory

This error occurred with the following context stack:
	[1] 'python dataset scan'
	[2] 'sink'
