# Bronze (Raw Kafka → **Internal** Delta Tables) — Databricks


## Kết nối Confluent Cloud
Lấy `bootstrap_servers` trong **Cluster settings → Bootstrap servers** của Confluent Cloud.

In [0]:
bootstrap_servers = dbutils.secrets.get(scope="databricks_scope", key="bootstrap_servers")
kafka_api_key     = dbutils.secrets.get(scope="databricks_scope", key="confluent_api_key")
kafka_api_secret  = dbutils.secrets.get(scope="databricks_scope", key="confluent_api_secret")

topic_items   = "book-metadata"

# Checkpoint paths
ckpt_base = "/Volumes/bigdata-and-bi/bronze/amazon"
ckpt_items   = f"{ckpt_base}/{topic_items}"


## Stream Kafka (items) → **internal table** `bronze.items_raw`


In [0]:
from pyspark.sql.functions import current_timestamp, to_date

kafka_opts_items = {
    "kafka.bootstrap.servers": bootstrap_servers,
    "kafka.security.protocol": "SASL_SSL",
    "kafka.sasl.mechanism": "PLAIN",
    "kafka.sasl.jaas.config": f'kafkashaded.org.apache.kafka.common.security.plain.PlainLoginModule required username="{kafka_api_key}" password="{kafka_api_secret}";',
    "subscribe": topic_items,
    "startingOffsets": "latest",
}

items_raw_df = (spark.readStream
    .format("kafka")
    .options(**kafka_opts_items)
    .load()
    .withColumn("ingest_ts", current_timestamp())
    .withColumn("ingest_date", to_date(current_timestamp()))
)

query_items = (
    items_raw_df
    .writeStream
    .format("delta")
    .outputMode("append")
    .option("checkpointLocation", ckpt_items)
    .trigger(availableNow=True)
    .toTable("`bigdata-and-bi`.bronze.items_raw")
)
