In [1]:
from pyspark.sql import SparkSession

spark = SparkSession.builder \
    .appName("Iceberg Spark SQL") \
    .config("spark.sql.extensions", "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions") \
    .config("spark.sql.catalog.default", "org.apache.iceberg.spark.SparkCatalog") \
    .config("spark.sql.catalog.default.catalog-impl", "org.apache.iceberg.rest.RESTCatalog") \
    .config("spark.sql.catalog.default.uri", "http://iceberg-rest:8181") \
    .config("spark.sql.catalog.default.io-impl", "org.apache.iceberg.aws.s3.S3FileIO") \
    .config("spark.sql.catalog.default.s3.endpoint", "http://minio:9000") \
    .config("spark.sql.catalog.default.s3.path-style-access", "true") \
    .config("spark.sql.catalog.default.s3.access-key-id", "admin") \
    .config("spark.sql.catalog.default.s3.secret-access-key", "password") \
    .config("spark.sql.catalog.default.warehouse", "s3://warehouse/") \
    .getOrCreate()

spark.sparkContext.setLogLevel("ERROR")

25/07/30 07:55:13 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.


In [2]:
from pyiceberg.catalog import load_catalog

catalog = load_catalog("default")

print("✅ 카탈로그 타입:", catalog.__class__)
print("📂 카탈로그 이름:", catalog.name)
print("⚙️ 속성:", catalog.properties)

# 네임스페이스 확인
namespaces = catalog.list_namespaces()
print("📁 네임스페이스 목록:", namespaces)

# 각 네임스페이스의 테이블 목록 출력
for ns in namespaces:
    print(f"\n📋 [{ns}] 테이블 목록:")
    for table_name in catalog.list_tables(namespace=ns):
        print(" -", table_name)

✅ 카탈로그 타입: <class 'pyiceberg.catalog.rest.RestCatalog'>
📂 카탈로그 이름: default
⚙️ 속성: {'uri': 'http://iceberg-rest:8181', 's3.endpoint': 'http://minio:9000', 's3.access-key-id': 'admin', 's3.secret-access-key': 'password', 'type': 'rest', 'io.impl': 'org.apache.iceberg.aws.s3.S3FileIO'}
📁 네임스페이스 목록: [('user_events',)]

📋 [('user_events',)] 테이블 목록:
 - ('user_events', 'keydown_events')
 - ('user_events', 'mouse_events')


In [4]:
# Spark SQL로 테이블 목록 확인
print("📊 Spark SQL 테이블 목록:")
spark.sql("SHOW TABLES IN default.user_events").show()

# COUNT 쿼리 실행
spark.sql("SELECT COUNT(*) FROM default.user_events.mouse_events").show()

# 일부 데이터 조회
spark.sql("SELECT * FROM default.user_events.mouse_events LIMIT 5").show()

📊 Spark SQL 테이블 목록:
+-----------+--------------+-----------+
|  namespace|     tableName|isTemporary|
+-----------+--------------+-----------+
|user_events|keydown_events|      false|
|user_events|  mouse_events|      false|
+-----------+--------------+-----------+

+--------+
|count(1)|
+--------+
|      83|
+--------+

+------+-------+-------+--------+------+-------+-------+-------+-----+-----+-------+-------+-------------+--------------------+----------+---+--------------------+------------+-------+---------+---------+---------+--------------------+
|altKey|ctrlKey|metaKey|shiftKey|button|buttons|clientX|clientY|pageX|pageY|screenX|screenY|relatedTarget|           timestamp|event_type| id|            canvasId|     canvasX|canvasY|movementX|movementY|isTrusted|               shape|
+------+-------+-------+--------+------+-------+-------+-------+-----+-----+-------+-------+-------------+--------------------+----------+---+--------------------+------------+-------+---------+---------+-