### This notebook runs the dimensional load for all dims from sybase.

%run SLV_NB_Functions

# Dim Products Section

farticulos_df = spark.read.parquet(get_latest_file('farticulos')) ##product
fgrupos_df = spark.read.parquet(get_latest_file('fgrupos')) ##group
flineas_df = spark.read.parquet(get_latest_file('flineas')) ## line
tipo_articulos_df = spark.read.parquet(get_latest_file('tipo_articulos')) ##product_type
estados_articulos = spark.read.parquet(get_latest_file('estado_articulos')) ##product_status


farticulos_df.createTempView('temp_products')
fgrupos_df.createTempView('temp_groups')
flineas_df.createTempView('temp_product_lines')
tipo_articulos_df.createTempView('temp_product_types')
estados_articulos.createTempView('temp_product_status')

In [1]:
farticulos_path = "abfss://b2c899fb-e571-4496-aebf-c7a23083635a@onelake.dfs.fabric.microsoft.com/a00cf91a-f92e-498a-9f14-ba10221fb05a/Tables/farticulos"
fgrupos_path = "abfss://b2c899fb-e571-4496-aebf-c7a23083635a@onelake.dfs.fabric.microsoft.com/a00cf91a-f92e-498a-9f14-ba10221fb05a/Tables/fgrupos"
flineas_path = "abfss://b2c899fb-e571-4496-aebf-c7a23083635a@onelake.dfs.fabric.microsoft.com/a00cf91a-f92e-498a-9f14-ba10221fb05a/Tables/flineas"
estadoarticulos_path = "abfss://b2c899fb-e571-4496-aebf-c7a23083635a@onelake.dfs.fabric.microsoft.com/a00cf91a-f92e-498a-9f14-ba10221fb05a/Tables/estado_articulos"
tipo_articulos_path = "abfss://b2c899fb-e571-4496-aebf-c7a23083635a@onelake.dfs.fabric.microsoft.com/a00cf91a-f92e-498a-9f14-ba10221fb05a/Tables/tipo_articulos"


StatementMeta(, 1d1b7dcb-0f28-41b3-9543-b0087fd542e0, 3, Finished, Available, Finished)

In [2]:
farticulos_sc = spark.read.format("delta").load(farticulos_path)
fgrupos_sc = spark.read.format("delta").load(fgrupos_path)
flineas_sc = spark.read.format("delta").load(flineas_path)
estadoarticulos_sc = spark.read.format("delta").load(estadoarticulos_path)
tipo_articulos_sc = spark.read.format("delta").load(tipo_articulos_path)

StatementMeta(, 1d1b7dcb-0f28-41b3-9543-b0087fd542e0, 4, Finished, Available, Finished)

In [3]:
farticulos_sc.createTempView('temp_products')
fgrupos_sc.createTempView('temp_groups')
flineas_sc.createTempView('temp_product_lines')
tipo_articulos_sc.createTempView('temp_product_types')
estadoarticulos_sc.createTempView('temp_product_status')


StatementMeta(, 1d1b7dcb-0f28-41b3-9543-b0087fd542e0, 5, Finished, Available, Finished)

In [10]:
%%sql
WITH raw_source AS (
  SELECT
    p.ICLAVE            AS src_product_id,
    p.ICLAVEMADRE       AS is_parent,
    p.ICODIGOBARRAS     AS barcode,
    p.ICB               AS barcode_2,
    p.IDESC             AS product_name,
    p.IUNIDAD           AS unit_clean_str,
    p.iunicant          AS unit_str,
    p.UDDATE            AS update_date_product_str,
    p.USERID            AS user_id_product,
    p.IV_CLAVE          AS iva_tax_id,
    p.IE_CLAVE          AS ieps_tax_id,
    p.desc_1            AS iva_tax_rate_str,
    p.desc_2            AS ieps_tax_rate_str,
    p.iclave_padre      AS parent_src_id,
    t.type_desc         AS product_type,
    p.isustn            AS substance,
    p.imarca            AS product_brand,
    l.LCLAVE            AS src_line_id,
    l.LDESC             AS line_name,
    l.lactivo           AS line_status,
    g.GDESC             AS group_name,
    g.GCLAVE            AS src_group_id,
    g.gactivo           AS group_status,
    s.descripcion       AS desc,
    p.IACTIVO           AS product_status
  FROM temp_products p
  LEFT JOIN temp_product_lines  l ON p.LCLAVE = l.LCLAVE   AND p.GCLAVE = l.GCLAVE
  LEFT JOIN temp_groups         g ON l.GCLAVE = g.GCLAVE
  LEFT JOIN temp_product_types  t ON p.itype = t.itype
  LEFT JOIN temp_product_status s ON s.id_estado = p.iactivo
),
source_sanitized AS (
  SELECT
    src_product_id,
    is_parent,
    barcode,
    barcode_2,
    product_name,
    -- DECIMAL fields
    TRY_CAST(regexp_replace(unit_clean_str, ',', '.') AS DECIMAL(38,18))  AS unit_clean,
    TRY_CAST(regexp_replace(unit_str,       ',', '.') AS DECIMAL(38,18))  AS unit,
    user_id_product,
    iva_tax_id,
    ieps_tax_id,
    TRY_CAST(regexp_replace(iva_tax_rate_str, ',', '.') AS DECIMAL(38,18))  AS iva_tax_rate,
    TRY_CAST(regexp_replace(ieps_tax_rate_str, ',', '.') AS DECIMAL(38,18))  AS ieps_tax_rate,
    parent_src_id,
    product_type,
    substance,
    product_brand,
    src_line_id,
    line_name,
    line_status,
    group_name,
    src_group_id,
    group_status,
    desc,
    product_status,
    -- TIMESTAMP field: formato 'dd/MM/yyyy H:mm:ss'
    TRY_CAST(
      to_timestamp(update_date_product_str, 'dd/MM/yyyy H:mm:ss')
      AS TIMESTAMP
    ) AS update_date_product,
    -- dedupe
    ROW_NUMBER() OVER (
      PARTITION BY src_product_id
      ORDER BY TRY_CAST(
                to_timestamp(update_date_product_str, 'dd/MM/yyyy H:mm:ss')
              AS TIMESTAMP) DESC
    ) AS rn
  FROM raw_source
),
filtered_source AS (
  SELECT
    src_product_id,
    is_parent,
    barcode,
    barcode_2,
    product_name,
    unit_clean,
    unit,
    update_date_product,
    user_id_product,
    iva_tax_id,
    ieps_tax_id,
    iva_tax_rate,
    ieps_tax_rate,
    parent_src_id,
    product_type,
    substance,
    product_brand,
    src_line_id,
    line_name,
    line_status,
    group_name,
    src_group_id,
    group_status,
    desc,
    product_status
  FROM source_sanitized
  WHERE rn = 1
)

MERGE INTO DEV_SLV_LH.dimproducts AS target
USING filtered_source AS source
  ON target.src_product_id = source.src_product_id

WHEN MATCHED THEN
  UPDATE SET
    target.is_parent           = source.is_parent,
    target.barcode             = source.barcode,
    target.barcode_2           = source.barcode_2,
    target.product_name        = source.product_name,
    target.unit_clean          = source.unit_clean,
    target.unit                = source.unit,
    target.update_date_product = source.update_date_product,
    target.user_id_product     = source.user_id_product,
    target.iva_tax_id          = source.iva_tax_id,
    target.ieps_tax_id         = source.ieps_tax_id,
    target.iva_tax_rate        = source.iva_tax_rate,
    target.ieps_tax_rate       = source.ieps_tax_rate,
    target.parent_src_id       = source.parent_src_id,
    target.product_type        = source.product_type,
    target.substance           = source.substance,
    target.product_brand       = source.product_brand,
    target.src_line_id         = source.src_line_id,
    target.line_name           = source.line_name,
    target.line_status         = source.line_status,
    target.group_name          = source.group_name,
    target.src_group_id        = source.src_group_id,
    target.group_status        = source.group_status,
    target.desc                = source.desc,
    target.product_status      = source.product_status

WHEN NOT MATCHED THEN
  INSERT (
    src_product_id, is_parent, barcode, barcode_2, product_name, unit_clean,
    unit, update_date_product, user_id_product, iva_tax_id, ieps_tax_id,
    iva_tax_rate, ieps_tax_rate, parent_src_id, product_type, substance,
    product_brand, src_line_id, line_name, line_status, group_name,
    src_group_id, group_status, desc, product_status
  )
  VALUES (
    source.src_product_id, source.is_parent, source.barcode, source.barcode_2,
    source.product_name, source.unit_clean, source.unit, source.update_date_product,
    source.user_id_product, source.iva_tax_id, source.ieps_tax_id,
    source.iva_tax_rate, source.ieps_tax_rate, source.parent_src_id,
    source.product_type, source.substance, source.product_brand,
    source.src_line_id, source.line_name, source.line_status,
    source.group_name, source.src_group_id, source.group_status,
    source.desc, source.product_status
  );


StatementMeta(, 1d1b7dcb-0f28-41b3-9543-b0087fd542e0, 12, Finished, Available, Finished)

<Spark SQL result set with 1 rows and 4 fields>

In [11]:
df = spark.sql("SELECT * FROM DEV_SLV_LH.dimproducts LIMIT 1000")
display(df)

StatementMeta(, 1d1b7dcb-0f28-41b3-9543-b0087fd542e0, 13, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, db30ddfd-105f-409f-9c30-527a0bb5986e)