In [1]:
# Remove iceberg namespace and table first
!pyiceberg --uri http://127.0.0.1:19120/iceberg/main/ drop table iot_sensor.equipment_data
!pyiceberg --uri http://127.0.0.1:19120/iceberg/main/ drop namespace iot_sensor

NoSuchTableException: Table does not exist: iot_sensor.equipment_data
NoSuchNamespaceException: Namespace does not exist: iot_sensor


In [2]:
from pyiceberg.catalog import load_catalog

# Connect to nessie catalog With Iceberg REST
catalog = load_catalog(
    "nessie",
    **{
        "uri": "http://127.0.0.1:19120/iceberg/main/",
        "py-io-impl": "pyiceberg.io.pyarrow.PyArrowFileIO",
        "s3.access-key-id": "admin",
        "s3.secret-access-key": "password",
    }
)

# Verify connection by listing namespaces
namespaces = catalog.list_namespaces()
print("Namespaces:", namespaces)


Namespaces: []


In [3]:
from pyarrow import csv

filepath = "data/raw/equipment_data.csv"
df = csv.read_csv(filepath)

print("table schema:\n", df.schema, "\n")
print("table shape:\n", df.shape)

table schema:
 Timestamp: timestamp[s]
Temperature (°C): double
Vibration (mm/s): double
Pressure (Pa): double
RPM: double
Maintenance Required: int64
Temp_Change: double
Vib_Change: double 

table shape:
 (43800, 8)


In [4]:
catalog.create_namespace("iot_sensor")

table = catalog.create_table(
    "iot_sensor.equipment_data",
    schema=df.schema,
)

In [5]:
table = catalog.load_table("iot_sensor.equipment_data")
table

equipment_data(
  1: Timestamp: optional timestamp,
  2: Temperature (°C): optional double,
  3: Vibration (mm/s): optional double,
  4: Pressure (Pa): optional double,
  5: RPM: optional double,
  6: Maintenance Required: optional long,
  7: Temp_Change: optional double,
  8: Vib_Change: optional double
),
partition by: [],
sort order: [],
snapshot: null

In [6]:
# Load data to iceberg table
# If you execute code in local you need to add `127.0.0.1 minio` dns record into /etc/hosts file.
table.append(df)

