In [None]:
# Similar structure as customer ETL but for product data
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, current_timestamp, lit

spark = SparkSession.builder \
    .appName("ETL Product") \
    .config("spark.jars.packages", "org.postgresql:postgresql:42.6.0") \
    .getOrCreate()

jdbc_url = "jdbc:postgresql://localhost:5432/retail_dw"
conn_props = {"user": "your_user", "password": "your_password", "driver": "org.postgresql.Driver"}

raw_products = spark.read.jdbc(url=jdbc_url, table='wh_source.online_retail_raw', properties=conn_props) \
    .select(col("StockCode"), col("Description")) \
    .distinct()

product_stage = raw_products.withColumnRenamed("StockCode", "stock_code") \
                            .withColumn("etl_insert_ts", current_timestamp()) \
                            .withColumn("etl_update_ts", current_timestamp())

try:
    stage_products = spark.read.jdbc(jdbc_url, "wh_stage.stg_product", properties=conn_props)
except Exception:
    stage_products = None

if stage_products:
    new_products = product_stage.join(stage_products.select("stock_code"), "stock_code", "left_anti")
    if new_products.count() > 0:
        new_products.write.jdbc(jdbc_url, "wh_stage.stg_product", mode="append", properties=conn_props)
else:
    product_stage.write.jdbc(jdbc_url, "wh_stage.stg_product", mode="overwrite", properties=conn_props)

try:
    dim_products = spark.read.jdbc(jdbc_url, "core.dim_product", properties=conn_props)
except Exception:
    dim_products = None

if dim_products:
    new_dim_products = product_stage.join(dim_products, "stock_code", "left_anti") \
        .withColumn("start_date", current_timestamp()) \
        .withColumn("end_date", lit(None).cast("timestamp")) \
        .withColumn("current_flag", lit(True))

    if new_dim_products.count() > 0:
        new_dim_products.write.jdbc(jdbc_url, "core.dim_product", mode="append", properties=conn_props)
else:
    product_stage.withColumn("start_date", current_timestamp()) \
                 .withColumn("end_date", lit(None).cast("timestamp")) \
                 .withColumn("current_flag", lit(True)) \
                 .write.jdbc(jdbc_url, "core.dim_product", mode="overwrite", properties=conn_props)

spark.stop()
