# Bronze (Raw Kafka → **Internal** Delta Tables) — Databricks


## Kết nối Confluent Cloud
Lấy `bootstrap_servers` trong **Cluster settings → Bootstrap servers** của Confluent Cloud.

In [0]:
bootstrap_servers = dbutils.secrets.get(scope="databricks_scope", key="bootstrap_servers") 
kafka_api_key     = dbutils.secrets.get(scope="databricks_scope", key="confluent_api_key")
kafka_api_secret  = dbutils.secrets.get(scope="databricks_scope", key="confluent_api_secret")

topic_reviews = "book-reviews"

# Checkpoint paths
ckpt_base = "/Volumes/bigdata-and-bi/bronze/amazon"
ckpt_reviews = f"{ckpt_base}/{topic_reviews}"


## Stream Kafka (reviews) → **internal table** `bronze.reviews_raw`


In [0]:
from pyspark.sql.functions import current_timestamp, to_date

kafka_opts_reviews = {
    "kafka.bootstrap.servers": bootstrap_servers,
    "kafka.security.protocol": "SASL_SSL",
    "kafka.sasl.mechanism": "PLAIN",
    "kafka.sasl.jaas.config": f'kafkashaded.org.apache.kafka.common.security.plain.PlainLoginModule required username="{kafka_api_key}" password="{kafka_api_secret}";',
    "subscribe": topic_reviews,
    "startingOffsets": "latest",
}

reviews_raw_df = (spark.readStream
    .format("kafka")
    .options(**kafka_opts_reviews)
    .load()
    .withColumn("ingest_ts", current_timestamp())
    .withColumn("ingest_date", to_date(current_timestamp()))
)

# Ghi vào bảng nội bộ (managed). 
query_reviews = (
    reviews_raw_df
    .writeStream
    .format("delta")
    .outputMode("append")
    .option("checkpointLocation", ckpt_reviews)
    .trigger(availableNow=True)
    .toTable("`bigdata-and-bi`.bronze.reviews_raw")
)


key,value,topic,partition,offset,timestamp,timestampType,ingest_ts,ingest_date
