In [1]:
from pyspark.sql import SparkSession

# Inicializar a sessão do Spark com configurações para Iceberg e MinIO
spark = SparkSession.builder \
    .appName("Iceberg Example") \
    .config("spark.sql.extensions", "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions") \
    .config("spark.sql.catalog.iceberg", "org.apache.iceberg.spark.SparkCatalog") \
    .config("spark.jars.packages", "org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.5.0") \
    .config("spark.sql.catalog.iceberg.type", "hive") \
    .config("spark.sql.catalog.iceberg.uri", "thrift://hive-metastore:9083") \
    .config("spark.sql.catalog.iceberg.warehouse", "s3a://warehouse/externo/table") \
    .config("spark.hadoop.fs.s3a.endpoint", "http://minio:9000") \
    .config("spark.hadoop.fs.s3a.access.key", "minioadmin") \
    .config("spark.hadoop.fs.s3a.secret.key", "minioadmin") \
    .config("spark.hadoop.fs.s3a.path.style.access", "true") \
    .getOrCreate()

data = [
    (1, "Alice", 25),
    (2, "Bob", 30),
    (3, "Charlie", 35)
]
columns = ["id", "name", "age"]

df = spark.createDataFrame(data, columns)

# Mostrar o DataFrame
print("DataFrame criado:")
df.show()

# Salvar o DataFrame no formato Iceberg
df.write.format("iceberg").saveAsTable("iceberg.bronze.tb_pessoa")

print("DataFrame salvo no formato Iceberg no MinIO.")

DataFrame criado:
+---+-------+---+
| id|   name|age|
+---+-------+---+
|  1|  Alice| 25|
|  2|    Bob| 30|
|  3|Charlie| 35|
+---+-------+---+

DataFrame salvo no formato Iceberg no MinIO.


In [2]:
df = spark.sql("SELECT * FROM iceberg.bronze.tb_pessoa")
df.show()


+---+-------+---+
| id|   name|age|
+---+-------+---+
|  1|  Alice| 25|
|  2|    Bob| 30|
|  3|Charlie| 35|
+---+-------+---+



In [3]:
spark.sql("""
SHOW TABLES IN iceberg.bronze;
""").show()

+---------+---------+-----------+
|namespace|tableName|isTemporary|
+---------+---------+-----------+
|   bronze|tb_pessoa|      false|
+---------+---------+-----------+



In [4]:
snap = spark.sql("""
SELECT *
FROM iceberg.bronze.tb_pessoa.snapshots
ORDER BY committed_at DESC;
""")

snap.toPandas()

Unnamed: 0,committed_at,snapshot_id,parent_id,operation,manifest_list,summary
0,2025-03-16 23:57:24.051,5740962067477006763,,append,s3a://warehouse/externo/table/tb_pessoa/metada...,"{'spark.app.id': 'local-1742169429655', 'chang..."


In [5]:
spark.sql("""
DELETE FROM iceberg.bronze.tb_pessoa
WHERE id = 2
""")


DataFrame[]

In [6]:
snap = spark.sql("""
SELECT *
FROM iceberg.bronze.tb_pessoa.snapshots
ORDER BY committed_at DESC;
""")

snap.toPandas()

Unnamed: 0,committed_at,snapshot_id,parent_id,operation,manifest_list,summary
0,2025-03-16 23:57:28.764,7419262281424501369,5.740962e+18,delete,s3a://warehouse/externo/table/tb_pessoa/metada...,"{'spark.app.id': 'local-1742169429655', 'remov..."
1,2025-03-16 23:57:24.051,5740962067477006763,,append,s3a://warehouse/externo/table/tb_pessoa/metada...,"{'spark.app.id': 'local-1742169429655', 'chang..."
