In [3]:
from pyspark.sql import SparkSession
from pyiceberg.catalog import load_catalog
from dotenv import load_dotenv
import os
load_dotenv()

# Get variables from environment
db_host = os.getenv('DB_HOST')
db_port = os.getenv('DB_PORT')
db_name = os.getenv('DB_NAME')
db_user = os.getenv('DB_USER')
db_password = os.getenv('DB_PASSWORD')
jdbc_jar = os.getenv('JDBC_JAR_PATH')
os.makedirs("/tmp/iceberg-warehouse", exist_ok=True)


spark = SparkSession \
    .builder \
    .appName("Python Spark SQL basic example") \
    .config("spark.jars", jdbc_jar) \
    .getOrCreate()



spark_db_conf = {
    "url":f"jdbc:postgresql://{db_host}:{db_port}/{db_name}",
    "user":db_user,
    "pw": db_password
    
}

print(spark_db_conf['url'])

pgsql_details = spark.read \
    .format("jdbc") \
    .option("url", spark_db_conf['url']) \
    .option("query", "SELECT version()") \
    .option("user", spark_db_conf['user']) \
    .option("password", spark_db_conf['pw']) \
    .option("driver", "org.postgresql.Driver") \
    .load()
    

catalog = load_catalog(
    "postgres",
    **{
        "type": "sql",
        "uri": f"postgresql+psycopg2://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}",
        "warehouse": "/tmp/iceberg-warehouse"
    }
)
    
pgsql_details.show()

jdbc:postgresql://172.22.0.9:5432/spark_db
+--------------------+
|             version|
+--------------------+
|PostgreSQL 15.14 ...|
+--------------------+

