In [0]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("ADLSGen2_Autoloader_Pipeline").getOrCreate()



In [0]:
# -----------------------------
# 1. Retrieve secrets from Key Vault via Databricks secret scope
# -----------------------------
# Assumes you already created a secret scope in Databricks linked to Key Vault
storage_account_name = dbutils.secrets.get(scope="kv_scope", key="storageAccountName")
storage_account_key  = dbutils.secrets.get(scope="kv_scope", key="storageAccountKey")



In [0]:
# -----------------------------
# 2. Configure ADLS Gen2 access
# -----------------------------
spark.conf.set(f"fs.azure.account.key.{storage_account_name}.dfs.core.windows.net", storage_account_key)



In [0]:
# -----------------------------
# 3. Define source path in ADLS Gen2
# -----------------------------
source_path = f"abfss://raw@{storage_account_name}.dfs.core.windows.net/lifescience-data"



In [0]:
# -----------------------------
# 4. Autoloader ingestion (Bronze layer)
# -----------------------------
bronze_df = (
    spark.readStream.format("cloudFiles")
    .option("cloudFiles.format", "csv")   # adjust to parquet/json if needed
    .option("cloudFiles.inferColumnTypes", "true")
    .load(source_path)
)



In [0]:
# -----------------------------
# 5. Write to Delta (Bronze table)
# -----------------------------
bronze_df.writeStream.format("delta") \
    .option("checkpointLocation", f"/mnt/checkpoints/lifescience/bronze") \
    .outputMode("append") \
    .start(f"/mnt/datalake/lifescience/bronze")
