In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

In [0]:
dbutils.widgets.removeAll()

dbutils.widgets.text("catalogName", "catalog_supermarket")
dbutils.widgets.text("bronzeSchema", "bronze")
dbutils.widgets.text("silverSchema", "silver")

print("catalogName:", dbutils.widgets.get("catalogName"))
print("bronzeSchema:", dbutils.widgets.get("bronzeSchema"))
print("silverSchema:", dbutils.widgets.get("silverSchema"))


In [0]:
catalog = dbutils.widgets.get("catalogName")
bronze_schema = dbutils.widgets.get("bronzeSchema")
silver_schema = dbutils.widgets.get("silverSchema")


In [0]:
df_products = spark.table(f"{catalog}.{bronze_schema}.products")
df_aisles = spark.table(f"{catalog}.{bronze_schema}.aisles")
df_departments = spark.table(f"{catalog}.{bronze_schema}.departments")

display(df_products.limit(5))
display(df_aisles.limit(5))
display(df_departments.limit(5))


In [0]:
df_product_hierarchy = (
    df_products.alias("p")
        .join(df_aisles.alias("a"), col("p.aisle_id") == col("a.aisle_id"), "left")
        .join(df_departments.alias("d"), col("p.department_id") == col("d.department_id"), "left")
        .select(
            col("p.product_id"),
            col("p.product_name"),
            col("p.aisle_id"),
            col("a.aisle").alias("aisle_name"),
            col("p.department_id"),
            col("d.department").alias("department_name")
        )
)

display(df_product_hierarchy.limit(20))
df_product_hierarchy.printSchema()


In [0]:
(
    df_product_hierarchy
        .write
        .mode("overwrite")
        .saveAsTable(f"{catalog}.{silver_schema}.product_hierarchy")
)


In [0]:
print(f"Tabla creada/actualizada: {catalog}.{silver_schema}.product_hierarchy")

display(
    spark.table(f"{catalog}.{silver_schema}.product_hierarchy").limit(20)
)
