In [None]:
spark.sql("CREATE NAMESPACE IF NOT EXISTS nessie.demo_db")

spark.sql("""
CREATE TABLE IF NOT EXISTS nessie.demo_db.sales (
    order_id INT,
    customer STRING,
    amount DOUBLE
) USING iceberg
""")

print("‚úÖ Table 'nessie.demo_db.sales' created.")


In [None]:
from pyspark.sql import SparkSession

spark = (
    SparkSession.builder.appName("Iceberg-Nessie-MinIO-Demo")
    # ===== Iceberg Catalog qua Nessie =====
    .config("spark.sql.catalog.nessie", "org.apache.iceberg.spark.SparkCatalog")
    .config("spark.sql.catalog.nessie.catalog-impl", "org.apache.iceberg.nessie.NessieCatalog")
    .config("spark.sql.catalog.nessie.uri", "http://nessie:19120/api/v1")
    .config("spark.sql.catalog.nessie.ref", "main")
    .config("spark.sql.catalog.nessie.warehouse", "s3a://silver_fddƒëf/")
    # ===== C·∫•u h√¨nh MinIO (S3-compatible) =====
    .config("spark.sql.catalog.nessie.s3.endpoint", "http://minio:9000")
    .config("spark.sql.catalog.nessie.s3.access-key", "admin")
    .config("spark.sql.catalog.nessie.s3.secret-key", "admin123")
    .config("spark.sql.catalog.nessie.s3.path-style-access", "true")
    # ===== Spark + Hadoop S3 connector =====
    .config("spark.hadoop.fs.s3a.endpoint", "http://minio:9000")
    .config("spark.hadoop.fs.s3a.access.key", "admin")
    .config("spark.hadoop.fs.s3a.secret.key", "admin123")
    .config("spark.hadoop.fs.s3a.path.style.access", "true")
    .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
    .getOrCreate()
)

print("‚úÖ SparkSession connected to Nessie + MinIO via Iceberg.")


In [None]:
data = [
    (1, "Alice", 100.5),
    (2, "Bob", 230.75),
    (3, "Charlie", 315.2)
]
df = spark.createDataFrame(data, ["order_id", "customer", "amount"])

df.writeTo("nessie.demo_db.sales").append()

print("‚úÖ Data written successfully!")


In [None]:
spark.sql("SELECT * FROM nessie.demo_db.sales").show()


In [None]:
import requests
resp = requests.get("http://nessie:19120/api/v1/config")
print(resp.status_code)
print(resp.json())


In [None]:
import os
from pynessie import NessieClient
from pynessie.conf import build_config

# 1. Khai b√°o endpoint Nessie (container trong c√πng network)
os.environ["NESSIE_ENDPOINT"] = "http://nessie:19120/api/v1"

# 2. T·∫°o ƒë·ªëi t∆∞·ª£ng config t·ª´ environment
config = build_config()

# 3. Truy·ªÅn config n√†y v√†o NessieClient
client = NessieClient(config)

# 4. Ki·ªÉm tra k·∫øt n·ªëi
print("‚úÖ Connected to Nessie")
print("Current default branch:", client.get_default_branch())


In [None]:
data2 = [
    (4, "David", 420.0),
    (5, "Eva", 180.5)
]
spark.createDataFrame(data2, ["order_id", "customer", "amount"])\
    .writeTo("nessie.demo_db.sales").append()

spark.sql("SELECT * FROM nessie.demo_db.sales").show()


In [None]:
entry = next(client.get_log("main"))
print(entry)
print(dir(entry))


In [None]:
for log_entry in client.get_log("main"):
    print(f"Commit hash: {log_entry.__hash__} | Message: {log_entry.commit_meta.message}")


In [None]:
client.create_branch("experiment", "main")
print("‚úÖ Branch 'experiment' created from 'main'")


In [None]:
# T·∫°o SparkSession tr·ªè t·ªõi branch 'experiment'
spark_branch = (
    SparkSession.builder.appName("NessieExperimentBranch")
    .config("spark.sql.catalog.nessie", "org.apache.iceberg.spark.SparkCatalog")
    .config("spark.sql.catalog.nessie.catalog-impl", "org.apache.iceberg.nessie.NessieCatalog")
    .config("spark.sql.catalog.nessie.uri", "http://nessie:19120/api/v1")
    .config("spark.sql.catalog.nessie.ref", "experiment")   # üëà Chuy·ªÉn branch
    .config("spark.sql.catalog.nessie.warehouse", "s3a://warehouse/")
    .config("spark.sql.catalog.nessie.s3.endpoint", "http://minio:9000")
    .config("spark.sql.catalog.nessie.s3.access-key", "admin")
    .config("spark.sql.catalog.nessie.s3.secret-key", "admin123")
    .config("spark.sql.catalog.nessie.s3.path-style-access", "true")
    .getOrCreate()
)

# Ghi th√™m d·ªØ li·ªáu ch·ªâ trong branch 'experiment'
data3 = [(6, "Frank", 999.9)]
spark_branch.createDataFrame(data3, ["order_id", "customer", "amount"])\
    .writeTo("nessie.demo_db.sales").append()

print("‚úÖ Wrote data to branch 'experiment'")


In [None]:
spark.sql("SELECT * FROM nessie.demo_db.sales ORDER BY order_id").show()


In [None]:
spark_branch.sql("SELECT * FROM nessie.demo_db.sales ORDER BY order_id").show()


In [None]:
spark.sql("SELECT * FROM nessie.demo_db.sales.snapshots").show(truncate=False)

In [None]:
snapshot_id = 1791065480393736880

query = f"""
SELECT * FROM nessie.demo_db.sales VERSION AS OF {snapshot_id}
"""
old_df = spark.sql(query)
old_df.show()


In [None]:
entry = next(client.get_log("main"))
print(entry)
print(dir(entry))
