# _Inicio Silver_

### Aqui vale aclarar que usaremos Initialization buffer
Un tiempo de espera al inicio para asegurar que los recursos estén disponibles (montajes, paths), ya que en el mismo job se aloja bronce y silver.

In [0]:
import time

# Espera de 3 minutos (180 segundos)
wait_seconds = 180
print(f"Esperando {wait_seconds} segundos antes de iniciar el procesamiento Silver...")
for i in range(wait_seconds):
    if i % 30 == 0:  # Mostrar cada 10 segundos
        print(f"Esperando... {i}/{wait_seconds} segundos")
    time.sleep(1)

print("Espera completada. Iniciando módulo Silver...")

com.databricks.backend.common.rpc.CommandCancelledException
	at com.databricks.spark.chauffeur.ExecContextState.cancel(ExecContextState.scala:434)
	at com.databricks.spark.chauffeur.ExecutionContextManagerV1.cancelExecution(ExecutionContextManagerV1.scala:473)
	at com.databricks.spark.chauffeur.ChauffeurState.$anonfun$process$1(ChauffeurState.scala:750)
	at com.databricks.logging.UsageLogging.$anonfun$recordOperation$1(UsageLogging.scala:510)
	at com.databricks.logging.UsageLogging.executeThunkAndCaptureResultTags$1(UsageLogging.scala:616)
	at com.databricks.logging.UsageLogging.$anonfun$recordOperationWithResultTags$4(UsageLogging.scala:643)
	at com.databricks.logging.AttributionContextTracing.$anonfun$withAttributionContext$1(AttributionContextTracing.scala:49)
	at com.databricks.logging.AttributionContext$.$anonfun$withValue$1(AttributionContext.scala:293)
	at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
	at com.databricks.logging.AttributionContext$.withValue(Attr

In [0]:
silver_mount_path= f"/mnt/silver/sales"
checkpoint_path_silver = "abfss://silver@mistorageprincipal.dfs.core.windows.net/checkpoints"
bronze_checkpoint_path = f"abfss://bronze@mistorageprincipal.dfs.core.windows.net/checkpoints"
bronze_mount_path = f"/mnt/bronze/sales"

### Lectura de archivos planos csv para enriquecimiento posterior

In [0]:
df_productos = (
    spark.read
    .format("csv")
    .option("header", True)
    .option("inferSchema", True) 
    .option("delimiter", ",")
    .option("encoding", "ISO-8859-1")
    .load(f"/mnt/rawmarket/csv-raw/products.csv")
)

df_stock = (
    spark.read
    .format("csv")
    .option("header", True)
    .option("inferSchema", True) 
    .option("delimiter", ",")
    .option("encoding", "ISO-8859-1") 
    .load(f"/mnt/rawmarket/csv-raw/stock.csv")
)

df_stores = (
    spark.read
    .format("csv")
    .option("header", True)
    .option("inferSchema", True) 
    .option("delimiter", ",")
    .option("encoding", "ISO-8859-1")    
    .load(f"/mnt/rawmarket/csv-raw/stores.csv")
)

df_channels = (
    spark.read
    .format("csv")
    .option("header", True)
    .option("inferSchema", True) 
    .option("delimiter", ",")
    .option("encoding", "ISO-8859-1")  
    .load(f"/mnt/rawmarket/csv-raw/channels.csv")
)

df_customers = (
    spark.read
    .format("csv")
    .option("header", True)
    .option("inferSchema", True) 
    .option("delimiter", ",")
    .option("encoding", "ISO-8859-1")   
    .load(f"/mnt/rawmarket/csv-raw/customers.csv")
)

## Transformaciones y limpiezas

#### 1 - Creacion de alias de cada df: Evitar conflictos con nombres de columnas repetidas.

#### 2 - Creacion de Joins entre los dfs y broadcasting para optimizar el rendimiento.

#### 3 -  Limpieza y filtros básicos

#### 4 - Generación de columnas de tiempo (event_timestamp, event_date)

#### 5 - Normaliza nombres de sucursales (expresión regular)

#### 6 - Creación de Hash único por transacción

#### 7 -  Categorización del ticket

#### 8 - Escritura en formato Delta de 2 dfs (original y enriquecido)

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

# 1. Lectura streaming Bronze
bronze_stream_df = spark.readStream.format("delta").load(bronze_mount_path)

# 2. Renombrar columnas con alias para evitar ambigüedad

# Alias y rename bronze
bronze_alias = bronze_stream_df.alias("b").select(
    col("transaction_id").alias("b_transaction_id"),
    col("timestamp").alias("b_timestamp"),
    col("product_id").alias("b_product_id"),
    col("store").alias("b_store"),
    col("customer_id").alias("b_customer_id"),
    col("amount").alias("b_amount"),
    col("channel").alias("b_channel"),
    col("ingestion_time").alias("b_ingestion_time")
)

# Alias y rename batch dfs (adaptá los nombres reales)
df_productos_alias = df_productos.alias("p").select(
    col("product_id").alias("p_product_id"),
    col("name").alias("p_product_name"),
    col("category").alias("p_category"),
    col("price").alias("p_price")
)

df_stock_alias = df_stock.alias("st").select(
    col("product_id").alias("st_product_id"),
    col("available_stock").alias("st_available_stock")
)

df_stores_alias = df_stores.alias("s").select(
    col("store").alias("s_store_info"), 
    col("store_name").alias("s_store_name"),
    col("location").alias("s_location"),
    col("region").alias("s_region")
)

df_channels_alias = df_channels.alias("ch").select(
    col("channel").alias("ch_channel_info"),
    col("channel_name").alias("ch_channel_name"),
    col("is_digital").alias("ch_is_digital")
)

df_customers_alias = df_customers.alias("cu").select(
    col("customer_id").alias("cu_customer_info_id"),
    col("full_name").alias("cu_full_name"),
    col("age").alias("cu_age"),
    col("gender").alias("cu_gender"),
    col("loyalty_tier").alias("cu_loyalty_tier")
)

# 3. Joins con alias y condiciones explícitas

joined_df = bronze_alias \
    .join(broadcast(df_productos_alias), col("b_product_id") == col("p_product_id"), "left") \
    .join(broadcast(df_stock_alias), col("b_product_id") == col("st_product_id"), "left") \
    .join(broadcast(df_stores_alias), col("b_store") == col("s_store_info"), "left") \
    .join(broadcast(df_channels_alias), col("b_channel") == col("ch_channel_info"), "left") \
    .join(broadcast(df_customers_alias), col("b_customer_id") == col("cu_customer_info_id"), "left")

# 4. Aplicar transformaciones y select final

silver_enriched_df = (
    joined_df
    .dropDuplicates(["b_transaction_id"])
    .filter(col("b_transaction_id").isNotNull())
    .filter(col("b_amount").isNotNull() & (col("b_amount") > 0))
    .withColumn("event_timestamp", to_timestamp(col("b_timestamp")))
    .withColumn("event_date", to_date(col("event_timestamp")))
    .drop("b_timestamp")
    .withColumn("channel", upper(trim(col("b_channel"))))
    .withColumn("store", trim(col("b_store")))
    .withColumn("store_letter", regexp_extract(lower(col("b_store")), r"sucursal\s*([a-z])$", 1))
    .withColumn(
        "store",
        when(col("store_letter") != "", concat(lit("Sucursal "), upper(col("store_letter"))))
        .otherwise(initcap(col("store")))
    )
    .drop("store_letter")
    .withColumn("customer_id", trim(col("b_customer_id")))
    .withColumn("transaction_hash", sha2(concat_ws("|", col("b_transaction_id"), col("b_customer_id"), col("b_product_id"), col("event_timestamp")), 256))
    .withColumn(
        "ticket_category",
        when(col("b_amount") < 50, "BAJO")
        .when(col("b_amount").between(50, 200), "MEDIO")
        .otherwise("ALTO")
    )
    .select(
        col("b_transaction_id").alias("transaction_id"),
        col("event_date"),
        col("store"),
        col("channel"),
        col("b_product_id").alias("product_id"),
        col("customer_id"),
        col("b_amount").alias("amount"),
        col("ticket_category"),
        col("transaction_hash"),
        col("event_timestamp"),
        col("b_ingestion_time").alias("ingestion_time"),
        # Columnas enriquecidas
        col("p_product_name").alias("product_name"),
        col("st_available_stock").alias("stock_quantity"),
        col("s_store_name").alias("store_name"),
        col("s_location").alias("store_location"),
        col("s_region").alias("store_region"),
        col("ch_channel_name").alias("channel_name"),
        col("ch_is_digital").alias("channel_is_digital"),
        col("cu_full_name").alias("customer_name"),
        col("cu_age").alias("customer_age"),
        col("cu_gender").alias("customer_gender"),
        col("cu_loyalty_tier").alias("customer_loyalty_tier")
    )
)

# 6. Silver simple sin enriquecimiento para referencia

silver_simple_df = (
    bronze_stream_df
    .dropDuplicates(["transaction_id"])
    .filter(col("transaction_id").isNotNull())
    .filter(col("amount").isNotNull() & (col("amount") > 0))
    .withColumn("event_timestamp", to_timestamp(col("timestamp")))
    .withColumn("event_date", to_date(col("event_timestamp")))
    .drop("timestamp")
    .withColumn("channel", upper(trim(col("channel"))))
    .withColumn("store", trim(col("store")))
    .withColumn("store_letter", regexp_extract(lower(col("store")), r"sucursal\s*([a-z])$", 1))
    .withColumn(
        "store",
        when(col("store_letter") != "", concat(lit("Sucursal "), upper(col("store_letter"))))
        .otherwise(initcap(col("store")))
    )
    .drop("store_letter")
    .withColumn("customer_id", trim(col("customer_id")))
    .withColumn("transaction_hash", sha2(concat_ws("|", "transaction_id", "customer_id", "product_id", "event_timestamp"), 256))
    .withColumn(
        "ticket_category",
        when(col("amount") < 50, "BAJO")
        .when(col("amount").between(50, 200), "MEDIO")
        .otherwise("ALTO")
    )
    .select(
        "transaction_id", "event_date",
        "store", "channel", "product_id", "customer_id", "amount",
        "ticket_category", "transaction_hash", "event_timestamp", "ingestion_time"
    )
)

# Escritura streaming Silver normal
query_simple = (
    silver_simple_df.writeStream
    .format("delta")
    .option("checkpointLocation", silver_mount_path + "/_checkpoint_simple")
    .option("path", silver_mount_path + "/simple")
    .outputMode("append")
    .start()
)

# Escritura streaming Silver enriquecida
query_enriched = (
    silver_enriched_df.writeStream
    .format("delta")
    .option("checkpointLocation", silver_mount_path + "/_checkpoint_enriched")
    .option("path", silver_mount_path + "/enriched")
    .outputMode("append")
    .start()
)

display(silver_enriched_df)


transaction_id,event_date,store,channel,product_id,customer_id,amount,ticket_category,transaction_hash,event_timestamp,ingestion_time,product_name,stock_quantity,store_name,store_location,store_region,channel_name,channel_is_digital,customer_name,customer_age,customer_gender,customer_loyalty_tier
22e56892-8844-4677-8157-05673205f297,2025-07-20,Sucursal B,ONLINE,P002,CUST005,386.32,ALTO,ddcffd1f37df6de6a027a151e4d04c8c7ccb5b68c18b1344f531380dd9307b50,2025-07-20T00:34:09.097107Z,2025-07-20T00:34:09.597Z,Pan integral,80,Tienda Norte,Tigre,Región Norte,Ecommerce,Sí,Laura Acosta,22,F,Silver
e0f6733c-e6da-4231-b3f0-810755d394ce,2025-07-18,Sucursal C,SELFCHECKOUT,P001,CUST003,35.53,BAJO,abd40766fdb05b3a1c62c4489b622b88fd6499bc3071bcf94dfe9a475b8bdb8b,2025-07-18T18:56:14.635382Z,2025-07-18T19:06:25.395Z,Leche entera,60,Tienda Sur,La Plata,Región Sur,Caja Automática,Sí,Ana Ruiz,27,F,Bronze
be11a5c2-cac3-41a8-bf84-d17d63c7f071,2025-07-20,Sucursal B,ONLINE,P005,CUST002,180.75,MEDIO,8a9dd610fed7fd19e2c70332c6e76cd1169d7b8bce44d0252d4063cf67a449b4,2025-07-20T00:26:36.549324Z,2025-07-20T00:26:36.964Z,Yerba mate,30,Tienda Norte,Tigre,Región Norte,Ecommerce,Sí,Carlos Pérez,41,M,Silver
2214e6bc-454d-4139-b53a-9505b1cec184,2025-07-20,Sucursal A,POS,P001,CUST005,426.04,ALTO,e6d28ce2a229f74fa903224998895b81305ef4a74a25a08511f67cb7b69ebb13,2025-07-20T20:26:32.244801Z,2025-07-20T20:26:34.014Z,Leche entera,60,Tienda Central,CABA,Región AMBA,Punto de Venta Físico,No,Laura Acosta,22,F,Silver
9deb40fd-161b-4765-a228-e3f4046f6a48,2025-07-16,Sucursal A,ONLINE,P005,CUST001,118.43,MEDIO,142cd116a4539fb16947d1122a46b84048d13c911a291358111faa54e6bdaf45,2025-07-16T20:44:11.643866Z,2025-07-16T20:44:13.771Z,Yerba mate,30,Tienda Central,CABA,Región AMBA,Ecommerce,Sí,María Gómez,34,F,Gold
e64d0707-9808-4f06-a126-9c85b3bdeecc,2025-07-20,Sucursal B,POS,P001,CUST004,208.44,ALTO,ce14f3d0d73f4b9ba82efb32070d67b2f3364e3189747852e706171ce73dfb8c,2025-07-20T00:32:08.421643Z,2025-07-20T00:32:08.926Z,Leche entera,60,Tienda Norte,Tigre,Región Norte,Punto de Venta Físico,No,Juan Torres,38,M,Gold
9b07503b-0f5b-46df-8e1e-f85e0ffe26a9,2025-07-20,Sucursal A,SELFCHECKOUT,P003,CUST005,68.81,MEDIO,6bbbbb40e55e2c71437077695a637debf592ec6c5bd9ad88d410a1157bef6265,2025-07-20T20:27:43.943079Z,2025-07-20T20:27:44.382Z,Queso cremoso,50,Tienda Central,CABA,Región AMBA,Caja Automática,Sí,Laura Acosta,22,F,Silver
7401cadf-0bd5-4a85-a515-0f7a37917c33,2025-07-20,Sucursal C,ONLINE,P003,CUST004,432.93,ALTO,99d39e63e657108d5c793921debbac62b67ad5338dfd641ac2f45b7f7ec470b2,2025-07-20T00:30:07.725976Z,2025-07-20T00:30:08.61Z,Queso cremoso,50,Tienda Sur,La Plata,Región Sur,Ecommerce,Sí,Juan Torres,38,M,Gold
3a90b0d1-75e6-4366-9aa2-4f361fd546f3,2025-07-20,Sucursal A,POS,P003,CUST003,200.24,ALTO,570ea0bc415f2a8bc505841efe83e055dbdaf008a98c391cdcf9a439a823c90e,2025-07-20T00:24:25.812048Z,2025-07-20T00:24:26.49Z,Queso cremoso,50,Tienda Central,CABA,Región AMBA,Punto de Venta Físico,No,Ana Ruiz,27,F,Bronze
76b19a2e-9d7b-487b-b186-f2e7de83d5e6,2025-07-15,Sucursal C,ONLINE,P002,CUST003,341.39,ALTO,aaef1c7a2f6e2896acb7aff27864bc4464f54f388299404b9c8c0ce5b7607c15,2025-07-15T19:20:32.838269Z,2025-07-15T19:28:27.966Z,Pan integral,80,Tienda Sur,La Plata,Región Sur,Ecommerce,Sí,Ana Ruiz,27,F,Bronze


In [0]:
silver_mount_path_enriched= f"/mnt/silver/sales/enriched"
spark.sql(f"SELECT * FROM delta.`{silver_mount_path_enriched}` ORDER BY ingestion_time DESC").display()

transaction_id,event_date,store,channel,product_id,customer_id,amount,ticket_category,transaction_hash,event_timestamp,ingestion_time,product_name,stock_quantity,store_name,store_location,store_region,channel_name,channel_is_digital,customer_name,customer_age,customer_gender,customer_loyalty_tier
be5eb819-e587-4c57-a896-0305c60b11f0,2025-07-20,Sucursal B,ONLINE,P003,CUST003,462.1,ALTO,da3425bf4d1286735d04ad7b2125930e58853c7cc30a2d31b018221e50f3f688,2025-07-20T20:30:44.98703Z,2025-07-20T20:30:45.53Z,Queso cremoso,50,Tienda Norte,Tigre,Región Norte,Ecommerce,Sí,Ana Ruiz,27,F,Bronze
5ff8ac4c-a8ca-4bba-b8a9-9dfc621025d6,2025-07-20,Sucursal B,ONLINE,P004,CUST005,27.06,BAJO,54bacc43d255d6ecb9799f67472889d13d2b84fd00563fd11548c6dbf3d60821,2025-07-20T20:30:34.926896Z,2025-07-20T20:30:35.255Z,Coca Cola 1.5L,40,Tienda Norte,Tigre,Región Norte,Ecommerce,Sí,Laura Acosta,22,F,Silver
1e58f84d-cf01-46b0-b18e-642312a3f1f8,2025-07-20,Sucursal C,POS,P002,CUST002,401.19,ALTO,b0bb2546bc2cd5eb89643af829031a847a8b99ef55c0983024e335298c3a44aa,2025-07-20T20:30:24.866053Z,2025-07-20T20:30:25.018Z,Pan integral,80,Tienda Sur,La Plata,Región Sur,Punto de Venta Físico,No,Carlos Pérez,41,M,Silver
ac211ee6-9621-4a22-aa84-7fcfaf18c2af,2025-07-20,Sucursal A,POS,P005,CUST001,423.09,ALTO,147c344156d6a4844ccb283d87bdb3ed6cb68e5638e48899671494215f824f9d,2025-07-20T20:30:14.809077Z,2025-07-20T20:30:15.354Z,Yerba mate,30,Tienda Central,CABA,Región AMBA,Punto de Venta Físico,No,María Gómez,34,F,Gold
ffa457e1-86ac-4f12-82d8-dee73cb51c59,2025-07-20,Sucursal A,POS,P002,CUST002,477.67,ALTO,cfbb3e0bddc4934b0d4e06681a9f5378b977f7e7c92ae1b25f9419ce7c8c2204,2025-07-20T20:30:04.750987Z,2025-07-20T20:30:05.037Z,Pan integral,80,Tienda Central,CABA,Región AMBA,Punto de Venta Físico,No,Carlos Pérez,41,M,Silver
8cd6a662-faa8-43e3-a8bc-7832db02204d,2025-07-20,Sucursal B,SELFCHECKOUT,P004,CUST005,377.0,ALTO,0d2cc5809f9782c5e149b8a08a07502203e9f1e20f724ff9fb1613a271553b1c,2025-07-20T20:29:54.693356Z,2025-07-20T20:29:54.819Z,Coca Cola 1.5L,40,Tienda Norte,Tigre,Región Norte,Caja Automática,Sí,Laura Acosta,22,F,Silver
3bcb4794-01ff-4e20-99dd-1e9fd25c76ee,2025-07-20,Sucursal C,SELFCHECKOUT,P003,CUST005,313.35,ALTO,3de04db3c1cc3c2a6f46db3d403a2786d33cbf31902b308ce934c2f819a90fc0,2025-07-20T20:29:44.63603Z,2025-07-20T20:29:44.947Z,Queso cremoso,50,Tienda Sur,La Plata,Región Sur,Caja Automática,Sí,Laura Acosta,22,F,Silver
5dfcb298-8838-480f-bf05-d45ae8c3d65b,2025-07-20,Sucursal A,ONLINE,P001,CUST004,20.37,BAJO,92ecd6b36140ad57a0e73c51107b2f8729327abb36f11178dac093173c79a43f,2025-07-20T20:29:34.579047Z,2025-07-20T20:29:35.005Z,Leche entera,60,Tienda Central,CABA,Región AMBA,Ecommerce,Sí,Juan Torres,38,M,Gold
13d66405-add5-4a41-b82f-8ba0acdfe7b8,2025-07-20,Sucursal C,SELFCHECKOUT,P004,CUST002,195.25,MEDIO,1bf2cc79c483cf16af199fbee0a4c39140ec4d7b717a8a61590f1f47de2a14cc,2025-07-20T20:29:24.521416Z,2025-07-20T20:29:24.881Z,Coca Cola 1.5L,40,Tienda Sur,La Plata,Región Sur,Caja Automática,Sí,Carlos Pérez,41,M,Silver
24fdba35-5d4e-427b-826f-370cdcbcbb41,2025-07-20,Sucursal A,ONLINE,P003,CUST002,106.06,MEDIO,9674ad605765218cfc1fd1616e0e8318303f81b552c069b51b7cb3d4ba640de3,2025-07-20T20:29:14.463121Z,2025-07-20T20:29:15.03Z,Queso cremoso,50,Tienda Central,CABA,Región AMBA,Ecommerce,Sí,Carlos Pérez,41,M,Silver


In [0]:
# display(silver_simple_df)

%md
# _Fin Silver_