In [10]:
from pyspark.sql import SparkSession
from pyspark.sql import types as T

# Inicializa a sessão Spark com o Iceberg configurado
spark = (
    SparkSession.builder
    .appName("IcebergExample")
    .config("spark.sql.catalog.local", "org.apache.iceberg.spark.SparkCatalog")
    .config("spark.sql.catalog.local.type", "hadoop")
    .config("spark.sql.catalog.local.warehouse", "work/warehouse")  # Defina o diretório de warehouse
    .getOrCreate()
)



In [11]:
# Cria um exemplo de DataFrame
schema = T.StructType([
    T.StructField("id", T.IntegerType(), nullable=False),
    T.StructField("name", T.StringType(), nullable=False)
])

data = [(4, "Alice"), (5, "Bob"), (6, "Charlie")]
df = spark.createDataFrame(data, schema)

In [12]:
# Define o namespace e o nome da tabela no Iceberg
namespace = "default"
table_name = "people"

# Cria a tabela Iceberg (se não existir) e grava o DataFrame nela
spark.sql(f"CREATE TABLE IF NOT EXISTS local.{namespace}.{table_name} (id INT, name STRING) USING iceberg")
df.writeTo(f"local.{namespace}.{table_name}").append()

In [14]:
# Verifica o conteúdo da tabela Iceberg
spark.sql(f"""
SELECT * 
FROM local.{namespace}.{table_name}
order by id
""").show()

+---+-------+
| id|   name|
+---+-------+
|  1|  Juana|
|  2|    Bob|
|  3|Charlie|
|  4|  Alice|
|  5|    Bob|
|  6|Charlie|
+---+-------+



In [15]:
spark.sql(f"""
update local.{namespace}.{table_name}
set name = 'Marcia'
where id = 1
""")

DataFrame[]

In [16]:
spark.sql(f"""
SELECT * 
FROM local.{namespace}.{table_name}
where id = 1
""").show()

+---+------+
| id|  name|
+---+------+
|  1|Marcia|
+---+------+

