## Bronze Layer

#### Criei um volume delta já que no free tier o acesso a mnt/ é negado

#### Verificando os dados de teste disponíveis

In [0]:
%py
# Definir caminhos base
# BRONZE_PATH = "/mnt/delta/bronze"
BRONZE_PATH = "/Volumes/workspace/default/delta/bronze/retail"
SILVER_PATH = "/Volumes/workspace/default/delta/silver/retail"


# Criando diretório Silver
# Definir caminhos base

# Criar diretórios se não existirem
dbutils.fs.mkdirs(SILVER_PATH)


In [0]:
display(dbutils.fs.ls(BRONZE_PATH))


### Dataset retail silver


In [0]:
silver_map = {
    "tmp_customers": f"{BRONZE_PATH}/customers",
    "tmp_company_employees": f"{BRONZE_PATH}/company_employees",
    "tmp_active_promotions": f"{BRONZE_PATH}/active_promotions",
    "tmp_loyalty_segment": f"{BRONZE_PATH}/loyalty_segment",
    "tmp_products": f"{BRONZE_PATH}/products",
    "tmp_promotions": f"{BRONZE_PATH}/promotions",
    "tmp_purchase_orders": f"{BRONZE_PATH}/purchase_orders",
    "tmp_sales_orders": f"{BRONZE_PATH}/sales_orders",
    "tmp_sales_stream": f"{BRONZE_PATH}/sales_stream",
    "tmp_suppliers": f"{BRONZE_PATH}/suppliers"
}

for view_name, path in silver_map.items():
    print(f"Loading {view_name} from {path}")
    spark.read.format("delta").load(path).createOrReplaceTempView(view_name)
# DBTITLE 1,customers

### Lendo as tabelas

In [0]:
%sql
SELECT * FROM tmp_sales_orders  LIMIT 15;
-- cada instancia de sale_order comtem uma lista de itens clicados durante esse compra
-- cada instancia de sale_order comtem uma lista de produtos comprados durante esse compra
-- cada instancia de sale_order comtem uma lista de promoções aplicadas ou não para algum produto durante esse compra

In [0]:
%sql
-- EXPLODIR CLICKED_ITEMS (Array de Arrays)
SELECT 
    order_number,
    customer_id,
    clicked_item[0] AS product_id,
    clicked_item[1] AS click_count
FROM tmp_sales_orders
LATERAL VIEW explode(clicked_items) AS clicked_item


In [0]:
%sql
-- EXPLODIR ORDERED_PRODUCTS (Array de Structs)
SELECT 
    order_number,
    customer_name,
    order_datetime,
    product.id as product_id,
    product.name as product_name,
    product.price as product_price,
    product.qty as quantity,
    product.curr as currency,
    product.unit as unit
FROM tmp_sales_orders
LATERAL VIEW explode(ordered_products) as product;

In [0]:
%sql
-- EXPLODIR ORDERED_PRODUCTS (Array de Structs)
SELECT 
    order_number,
    promo.promo_id,
    promo.promo_item as product_id,
    promo.promo_qty as promo_quantity,
    promo.promo_disc as discount_rate
FROM tmp_sales_orders
LATERAL VIEW explode(promo_info) as promo
WHERE size(promo_info) > 0;

In [0]:
%sql
DESCRIBE tmp_sales_orders;


In [0]:
%sql
WITH products AS (
    SELECT 
        order_number,
        customer_name,
        order_datetime,
        product.id AS product_id,
        product.name AS product_name,
        product.price AS product_price,
        product.qty AS quantity,
        product.curr AS currency,
        product.unit AS unit
    FROM tmp_sales_orders
    LATERAL VIEW explode(ordered_products) AS product
),

promotions AS (
    SELECT 
        order_number,
        promo.promo_id,
        promo.promo_item AS product_id,
        promo.promo_qty AS promo_quantity,
        promo.promo_disc AS discount_rate
    FROM tmp_sales_orders
    LATERAL VIEW explode(promo_info) AS promo
    WHERE size(promo_info) > 0
),

clicks AS (
        SELECT 
        order_number,
        customer_id,
        clicked_item[0] AS product_id,
        clicked_item[1] AS click_count
    FROM tmp_sales_orders ck
    LATERAL VIEW explode(clicked_items) AS clicked_item
)

SELECT 
    p.order_number,
    p.customer_name,
    p.customer_id,
    p.order_datetime,
    ci.click_count,
    p.product_id,
    p.product_name,
    p.product_price,
    p.quantity,
    p.currency,
    p.unit,
    pr.promo_id,
    pr.promo_quantity,
    pr.discount_rate,
    p.product_price * p.quantity as total_estimated_price,
    ROUND(p.product_price * pr.promo_quantity * (1 - pr.discount_rate), 2) as value_with_dicount,
    ROUND(p.product_price * pr.promo_quantity * (pr.discount_rate), 2) as dicount_value,
    CASE 
        WHEN pr.promo_id IS NULL THEN ROUND((p.product_price * p.quantity), 2)
        ELSE ROUND((p.product_price * p.quantity) - pr.promo_quantity * (pr.discount_rate), 2)
    END AS sale_value
FROM products p
LEFT JOIN promotions pr
    ON p.order_number = pr.order_number
   AND p.product_id = pr.product_id
LEFT JOIN clicks ci
    ON p.order_number = ci.order_number
   AND p.product_id = ci.product_id
