# _Inicio Bronce_

### Configuración inicial de scopes, keys, conns, paths

## IMPORTANTE: 
### Encriptar la cadena de eventhub, Spark Event Hubs no acepta directamente la cadena de conexión plana para seguridad y manejo interno.

Se necesita 
- Instalar libreria com.microsoft.azure:azure-eventhubs-spark_2.12:2.3.21
- Computo con Scala 2.12 (2.13 no soporta encrypt al dia de la fecha 7/2025)

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
import json

eventhub_secret_scope = "eventhub_scope"
eventhub_secret_key = "eventhub_conn"

storage_secret_scope = "storage-scope"
storage_secret_key = "storage-conn"

eventhub_connection_string = dbutils.secrets.get(scope=eventhub_secret_scope, key=eventhub_secret_key)
client_secret = dbutils.secrets.get(scope="client-scope", key="client-secret")

starting_position = {
    "offset": "0",
    "seqNo": 0,
    "enqueuedTime": None,
    "isInclusive": True
}

eventhub_config = {
    "eventhubs.connectionString": dbutils.secrets.get(scope=eventhub_secret_scope, key=eventhub_secret_key),
    "eventhubs.consumerGroup": "market-consumer-group",
    "eventhubs.startingPosition": json.dumps(starting_position)
}
configs = {
  "fs.azure.account.auth.type": "OAuth",
  "fs.azure.account.oauth.provider.type": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
  "fs.azure.account.oauth2.client.id": "42add4c7-97b5-46af-b412-3d22af3f601b",
  "fs.azure.account.oauth2.client.secret": client_secret,
  "fs.azure.account.oauth2.client.endpoint": "https://login.microsoftonline.com/f8665296-99ec-4c56-8ec6-6385ed1c7cc9/oauth2/token"
}

# Una vez montado el path, comentar la linea para que no desmonte cada vez que se ejecuta el script
# Mount rawmarket
if any(mount.mountPoint == '/mnt/rawmarket' for mount in dbutils.fs.mounts()): dbutils.fs.unmount('/mnt/rawmarket')
dbutils.fs.mount(
  source = "abfss://rawmarket@mistorageprincipal.dfs.core.windows.net/",
  mount_point = "/mnt/rawmarket",
  extra_configs = configs
)

# Mount bronze
if any(mount.mountPoint == '/mnt/bronze' for mount in dbutils.fs.mounts()): dbutils.fs.unmount('/mnt/bronze')
dbutils.fs.mount(
  source = "abfss://bronze@mistorageprincipal.dfs.core.windows.net/",
  mount_point = "/mnt/bronze",
  extra_configs = configs
)
# Mount silver
if any(mount.mountPoint == '/mnt/silver' for mount in dbutils.fs.mounts()): dbutils.fs.unmount('/mnt/silver')
dbutils.fs.mount(
  source = "abfss://silver@mistorageprincipal.dfs.core.windows.net/",
  mount_point = "/mnt/silver",
  extra_configs = configs
)

# Mount gold
if any(mount.mountPoint == '/mnt/gold' for mount in dbutils.fs.mounts()): dbutils.fs.unmount('/mnt/gold')
dbutils.fs.mount(
  source = "abfss://gold@mistorageprincipal.dfs.core.windows.net/",
  mount_point = "/mnt/gold",
  extra_configs = configs
)

# 🗄️ Conexión a Storage (ADLS)
storage_account_name = "mistorageprincipal"
storage_key = dbutils.secrets.get(scope="storage-scope", key="storage-conn")
spark.conf.set(f"fs.azure.account.key.{storage_account_name}.dfs.core.windows.net", storage_key)

bronze_mount_path = f"/mnt/bronze/sales"
bronze_checkpoint_path = f"abfss://bronze@mistorageprincipal.dfs.core.windows.net/checkpoints"

In [0]:
dbutils.fs.ls(bronze_mount_path)

### Definicion de esquema

In [0]:
event_schema = StructType([
    StructField("transaction_id", StringType(), True),
    StructField("timestamp", StringType(), True),
    StructField("product_id", StringType(), True),
    StructField("store", StringType(), True),
    StructField("customer_id", StringType(), True),
    StructField("amount", DoubleType(), True),
    StructField("channel", StringType(), True)
])


Prueba para ver si el stream esta activo

In [0]:
# Obtener connection string desde el secret
eventhub_connection_string = dbutils.secrets.get(scope=eventhub_secret_scope, key=eventhub_secret_key)

# Encriptar correctamente desde JVM sin class
encrypted_conn_str = sc._jvm.org.apache.spark.eventhubs.EventHubsUtils.encrypt(eventhub_connection_string)

# Definir configuración con connection string encriptado
eventhub_config = {
    "eventhubs.connectionString": encrypted_conn_str,
    "eventhubs.consumerGroup": "market-consumer-group",
    "eventhubs.startingPosition": json.dumps(starting_position)
}

In [0]:
spark.conf.set(
    "fs.azure.account.key.mistorageprincipal.dfs.core.windows.net",
    dbutils.secrets.get(scope="storage-scope", key="storage-conn")
)

raw_stream_df = (
        spark.readStream
        .format("eventhubs")
        .options(**eventhub_config)
        .load()
    )

parsed_df = (
        raw_stream_df
        .selectExpr("cast(body as string) as json_data")
        .select(from_json(col("json_data"), event_schema).alias("data"))
        .select("data.*")
        .withColumn("event_timestamp", to_timestamp("timestamp"))
        .withColumn("ingestion_time", current_timestamp())
    )

(
    parsed_df.writeStream
    .format("delta")
    .option("checkpointLocation", bronze_mount_path + "/_checkpoint")
    .option("path", bronze_mount_path)
    .outputMode("append")
    .start()
)
display(parsed_df)


In [0]:
spark.sql(f"""SELECT transaction_id,event_timestamp, ingestion_time
FROM delta.`{bronze_mount_path}` ORDER BY ingestion_time DESC""").display()

In [0]:
# catalog_table = spark.read.format("delta").load(bronze_mount_path)
# display(catalog_table) 

## _Fin bronce_