In [None]:
import os
from pyspark.sql import SparkSession

In [None]:
# change this URI for your account
SF_URL = "https://sfseeurope-ie-demo99.snowflakecomputing.com"
HORIZON_CATALOG_URI = f"{SF_URL}/polaris/api/catalog"

SF_USER = "SPARK_USER"
SF_DATABASE = "ICEBERG_TEST_DB"
SCHEMA_NAME = "DEMO"
SF_WAREHOUSE = "MYWH"

ICEBERG_TABLE_NAME = "USERINFORMATION"

#SPARK_USER_PAT
PAT_TOKEN               = ""
#ICEBERG_DATA_ENGINEER,ICEBERG_DATA_ANALYST
HORIZON_SESSION_ROLE        = "ICEBERG_DATA_ANALYST"
STORAGE_REGION          = "eu-west-1" 

ICEBERG_VERSION = "1.10.1"

#Snowflake Connector for Spark
DRIVER_VERSION = "3.24.0" # (or above)
SNOWFLAKE_CONNECTOR_VERSION = "3.1.6"

JAVA_HOME = "/opt/homebrew/Cellar/openjdk@17/17.0.18/libexec/openjdk.jdk/Contents/Home/"
os.environ["JAVA_HOME"] = JAVA_HOME


In [None]:
try:
    spark.stop()
except:
    pass

spark = (
    SparkSession.builder
        .master("local[*]")
        .config("spark.ui.port", "0")
        .config("spark.driver.bindAddress", "127.0.0.1")
        .config("spark.driver.host", "127.0.0.1")
        .config("spark.driver.port", "0")
        .config("spark.blockManager.port", "0")
        # ðŸ”‘ Pull the needed JARs automatically
         # JAR Dependencies for Iceberg, Azure and Snowflake Connector for Spark
        .config(
            "spark.jars.packages",
            f"org.apache.iceberg:iceberg-spark-runtime-4.0_2.13:{ICEBERG_VERSION},"
            f"org.apache.iceberg:iceberg-aws-bundle:{ICEBERG_VERSION},"

            # for Azure storage, use the below package and comment above azure bundle
            # f"org.apache.iceberg:iceberg-azure-bundle:{ICEBERG_VERSION}"
            # for Snowflake Connector for Spark
            f"net.snowflake:snowflake-jdbc:{DRIVER_VERSION},"
            f"net.snowflake:spark-snowflake_2.13:{SNOWFLAKE_CONNECTOR_VERSION}"
        )
        # Iceberg SQL Extensions
        .config("spark.sql.extensions", "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions")
        .config("spark.sql.defaultCatalog", "horizoncatalog")
        .config("spark.sql.catalog.horizoncatalog", "org.apache.spark.sql.snowflake.catalog.SnowflakeFallbackCatalog")

        #Horizon REST Catalog Configuration
        .config(f"spark.sql.catalog.horizoncatalog.catalog-impl", "org.apache.iceberg.spark.SparkCatalog")
        .config(f"spark.sql.catalog.horizoncatalog.type", "rest")
        .config(f"spark.sql.catalog.horizoncatalog.uri", HORIZON_CATALOG_URI)
        .config(f"spark.sql.catalog.horizoncatalog.warehouse", SF_DATABASE)
        .config(f"spark.sql.catalog.horizoncatalog.scope", f"session:role:{HORIZON_SESSION_ROLE}")
        .config(f"spark.sql.catalog.horizoncatalog.client.region", STORAGE_REGION)
        .config(f"spark.sql.catalog.horizoncatalog.credential", PAT_TOKEN)
        .config("spark.sql.iceberg.vectorization.enabled", "false")
        .config("spark.sql.catalog.horizoncatalog.header.X-Iceberg-Access-Delegation","vended-credentials")
        .config("spark.sql.catalog.horizoncatalog.io-impl","org.apache.iceberg.aws.s3.S3FileIO")
        .config("spark.sql.catalog.horizoncatalog.file-io-impl","org.apache.iceberg.aws.s3.S3FileIO")
        .config("spark.snowflake.sfURL", SF_URL)
        .config("spark.snowflake.sfUser", SF_USER)
        .config("spark.snowflake.sfPassword", PAT_TOKEN)
        .config("spark.snowflake.sfDatabase", SF_DATABASE)
        .config("spark.snowflake.sfSchema",SCHEMA_NAME) # Optional
        .config("spark.snowflake.sfRole",HORIZON_SESSION_ROLE)
        .config("spark.snowflake.sfWarehouse",SF_WAREHOUSE)
        .getOrCreate()   
)

spark.sparkContext.setLogLevel("ERROR")

In [None]:
spark.version

In [None]:
spark.sql("SHOW NAMESPACES").show(truncate=False)

In [None]:
spark.sql(f"SELECT * FROM DEMO.USERINFORMATION").show()