In [4]:
import os
print("SPARK_CONF_DIR =", os.environ.get("SPARK_CONF_DIR"))
print("SPARK_HOME =", os.environ.get("SPARK_HOME"))

SPARK_CONF_DIR = /opt/spark/conf
SPARK_HOME = /opt/spark


In [5]:
conf_path = "/opt/spark/conf/spark-defaults.conf"
with open(conf_path, 'r') as f:
    lines = f.readlines()
print("First 10 lines of spark-defaults.conf:", lines[:10])



In [6]:
from pyspark.sql import SparkSession

spark = (
    SparkSession.builder
    .appName("Polaris Debug")
    .getOrCreate()
)

In [7]:
# Test
print(spark.conf.get("spark.sql.catalog.polaris.uri"))

http://polaris:8181/api/catalog


In [8]:
spark.sql("CREATE NAMESPACE IF NOT EXISTS polaris.olist_raw")

[12:07:26] [Thread-3] INFO  org.apache.iceberg.rest.auth.AuthManagers - Loading AuthManager implementation: org.apache.iceberg.rest.auth.OAuth2Manager
[12:07:27] [Thread-3] INFO  org.apache.iceberg.CatalogUtil - Loading custom FileIO implementation: org.apache.iceberg.aws.s3.S3FileIO


DataFrame[]

In [9]:
spark.sql("SHOW NAMESPACES IN polaris").show()

+---------+
|namespace|
+---------+
|olist_raw|
+---------+



In [10]:
spark.sql("""
    CREATE TABLE IF NOT EXISTS polaris.olist_raw.test_connection (
        id INT,
        status STRING
    ) USING iceberg
""")

[12:07:33] [Thread-3] INFO  org.apache.iceberg.rest.RESTSessionCatalog - Table properties set at catalog level through catalog properties: {}
[12:07:33] [Thread-3] INFO  org.apache.iceberg.rest.RESTSessionCatalog - Table properties enforced at catalog level through catalog properties: {}
[12:07:34] [Thread-3] INFO  org.apache.iceberg.CatalogUtil - Loading custom FileIO implementation: org.apache.iceberg.aws.s3.S3FileIO


DataFrame[]

In [11]:
import socket
socket.gethostbyname("minio")  # should return the internal IP

'172.18.0.7'

In [12]:
# Insert data
spark.sql("""
INSERT INTO polaris.olist_raw.test_connection VALUES
(1, 'pending'),
(2, 'complete')
""")

# Read back
spark.sql("SELECT * FROM polaris.olist_raw.test_connection").show()

                                                                                

[12:07:40] [Thread-3] INFO  org.apache.iceberg.spark.source.SparkWrite - Committing append with 2 new data files to table polaris.olist_raw.test_connection
[12:07:41] [Thread-3] INFO  org.apache.iceberg.SnapshotProducer - Committed snapshot 4142068896744963022 (MergeAppend)
[12:07:41] [Thread-3] INFO  org.apache.iceberg.metrics.LoggingMetricsReporter - Received metrics report: CommitReport{tableName=polaris.olist_raw.test_connection, snapshotId=4142068896744963022, sequenceNumber=1, operation=append, commitMetrics=CommitMetricsResult{totalDuration=TimerResult{timeUnit=NANOSECONDS, totalDuration=PT0.715520675S, count=1}, attempts=CounterResult{unit=COUNT, value=1}, addedDataFiles=CounterResult{unit=COUNT, value=2}, removedDataFiles=null, totalDataFiles=CounterResult{unit=COUNT, value=2}, addedDeleteFiles=null, addedEqualityDeleteFiles=null, addedPositionalDeleteFiles=null, addedDVs=null, removedDeleteFiles=null, removedEqualityDeleteFiles=null, removedPositionalDeleteFiles=null, removed

                                                                                

In [13]:
# Check what Spark actually loaded
conf = spark.sparkContext.getConf().getAll()
for item in conf:
    if "s3.endpoint" in item[0]:
        print(f"✅ FOUND: {item}")

✅ FOUND: ('spark.sql.catalog.polaris.s3.endpoint', 'http://minio:9000/')


In [14]:
import boto3

s3 = boto3.client(
    's3',
    endpoint_url='http://minio:9000',
    aws_access_key_id='minioadmin',
    aws_secret_access_key='minioadmin',
    region_name='us-east-1'
)

print(s3.list_buckets())

{'ResponseMetadata': {'RequestId': '188785C66FE0311D', 'HostId': 'dd9025bab4ad464b049177c95eb6ebf374d3b3fd1af9251148b658df7ac2e3e8', 'HTTPStatusCode': 200, 'HTTPHeaders': {'accept-ranges': 'bytes', 'content-length': '373', 'content-type': 'application/xml', 'server': 'MinIO', 'strict-transport-security': 'max-age=31536000; includeSubDomains', 'vary': 'Origin, Accept-Encoding', 'x-amz-id-2': 'dd9025bab4ad464b049177c95eb6ebf374d3b3fd1af9251148b658df7ac2e3e8', 'x-amz-request-id': '188785C66FE0311D', 'x-content-type-options': 'nosniff', 'x-ratelimit-limit': '1246', 'x-ratelimit-remaining': '1246', 'x-xss-protection': '1; mode=block', 'date': 'Sun, 04 Jan 2026 12:07:46 GMT'}, 'RetryAttempts': 0}, 'Buckets': [{'Name': 'olist-ecommerce', 'CreationDate': datetime.datetime(2026, 1, 4, 11, 54, 37, 889000, tzinfo=tzlocal())}], 'Owner': {'DisplayName': 'minio', 'ID': '02d6176db174dc93cb1b899f7c6078f08654445fe8cf1b6ce98d8855f66bdbf4'}}


In [15]:
spark.sql("""SHOW TABLES IN polaris.olist_raw;""").show()

+---------+---------------+-----------+
|namespace|      tableName|isTemporary|
+---------+---------------+-----------+
|olist_raw|test_connection|      false|
+---------+---------------+-----------+



In [16]:
spark.sql("""DROP TABLE polaris.olist_raw.test_connection;""")

DataFrame[]

In [17]:
spark.sql("""SHOW NAMESPACES IN polaris;""").show()

+---------+
|namespace|
+---------+
|olist_raw|
+---------+



In [18]:
spark.sql("""
DROP SCHEMA polaris.olist_raw CASCADE;
""")

DataFrame[]

In [19]:
spark.catalog.clearCache()  # clears all cached tables
spark.stop()  