# Verify Polaris Setup

In [77]:
import os
import traceback
from pathlib import Path

import pyarrow as pa
from pyiceberg.catalog.rest import RestCatalog
from pyiceberg.exceptions import NamespaceAlreadyExistsError, TableAlreadyExistsError
from pyiceberg.types import StringType

## Retrieve Principal Credentials

In [78]:

principal_creds = Path(os.getcwd()).parent.joinpath("work", "principal.txt")
with open(principal_creds, "r") as file:
    realm, client_id, client_secret = file.readline().split(",")

## Define Variables

In [79]:
namespace = "demo_db"
table_name = "fruits"
# IMPORTANT!!! /api/catalog or get the prefix from your setup
CATALOG_URI = "http://localhost:18181/api/catalog"
catalog_name = "polardb"

In [80]:
catalog = RestCatalog(
    name=catalog_name,
    **{
        "uri": CATALOG_URI,
        "credential": f"{client_id}:{client_secret}",
        "header.content-type": "application/vnd.api+json",
        "header.X-Iceberg-Access-Delegation": "vended-credentials",
        "header.Polaris-Realm": realm,
        "warehouse": catalog_name,
        "scope": "PRINCIPAL_ROLE:ALL",
    },
)



## Create Namespace

In [81]:
try:
    catalog.create_namespace(namespace)
except NamespaceAlreadyExistsError:
    print(f"Namespace '{namespace}' already exists")
except Exception as e:
    print(e)

## Create Table

In [82]:
_schema = pa.schema(
    [
        pa.field("id", pa.int64(), nullable=False),
        pa.field("name", pa.string(), nullable=True),
    ]
)
try:
    new_tbl = catalog.create_table(
        identifier=f"{namespace}.{table_name}",
        schema=_schema,
    )
    print(new_tbl)
except TableAlreadyExistsError:
    print(f"Table '{table_name}' already exists")
except Exception as e:
    print(e)

fruits(
  1: id: required long,
  2: name: optional string
),
partition by: [],
sort order: [],
snapshot: null


## Load Table

In [83]:
try:
    table = catalog.load_table(f"{namespace}.{table_name}")
    df = table.scan().to_pandas()
    print(df.head())
except Exception as e:
    print(e)

Empty DataFrame
Columns: [id, name]
Index: []


## Insert Data

In [84]:
try:
    data = pa.Table.from_pylist(
        [
            {"id": 1, "name": "mango"},
            {"id": 2, "name": "banana"},
            {"id": 3, "name": "orange"},
        ],
        schema=_schema,
    )
    table.append(data)
except Exception:
    print(traceback.format_exc())

## Query Data

In [90]:
df = table.scan().to_pandas()
df.head(10)

Unnamed: 0,id,name,season
0,1,mango,summer
1,2,banana,all
2,3,orange,winter


## Schema Evolution

In [86]:

with table.update_schema() as update:
    update.add_column("season",StringType(),doc="Fruit Season")



In [87]:
print(table)

fruits(
  1: id: required long,
  2: name: optional string,
  3: season: optional string (Fruit Season)
),
partition by: [],
sort order: [],
snapshot: Operation.APPEND: id=8949572113130933916, schema_id=0


In [89]:
new_schema =  _schema.append( pa.field("season", pa.string(), nullable=True),)
# New data with season column
new_table = pa.Table.from_pylist([
    {"id": 1, "name": "mango","season": "summer"},
    {"id": 2, "name": "banana","season": "all"},
    {"id": 3, "name": "orange","season": "winter"},
   
],schema=new_schema)
table.overwrite(new_table)