**Service Principal** **Authentication**

In [0]:
# --------------------------------
# Bronze Layer - Medallion Architecture (Service Principal Auth)
# --------------------------------


# Retrieve Service Principal credentials securely from Azure Key Vault via Databricks secrets
client_id = dbutils.secrets.get(scope="secretscope_datacapus6", key="client-id")
client_secret = dbutils.secrets.get(scope="secretscope_datacapus6", key="client-secret")
tenant_id = dbutils.secrets.get(scope="secretscope_datacapus6", key="tenant-id")


# Define Azure Data Lake Storage account and container for Bronze layer
storage_account = "storageaccus6"
container_name = "bronze"


# Configure Spark for Service Principal + OAuth
spark.conf.set("fs.azure.account.auth.type", "OAuth")
spark.conf.set("fs.azure.account.oauth.provider.type","org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set("fs.azure.account.oauth2.client.id", client_id)
spark.conf.set("fs.azure.account.oauth2.client.secret", client_secret)
spark.conf.set("fs.azure.account.oauth2.client.endpoint",f"https://login.microsoftonline.com/{tenant_id}/oauth2/token")


# Base path for Bronze layer
abfss_path = f"abfss://{container_name}@{storage_account}.dfs.core.windows.net/"


In [0]:
display(dbutils.fs.ls(f"{abfss_path}"))

**Sample Reads for Validation**

In [0]:


df_json_sample = spark.read.text(f"{abfss_path}house-price-parquet.json")
print("JSON Sample: ")
display(df_json_sample.limit(5))


df_orders_sample = spark.read.text(f"{abfss_path}sales_products.txt")
print("Sales Orders Sample: ")
display(df_orders_sample.limit(5))


df_world_sample = spark.read.text(f"{abfss_path}population.txt")
print("World Population Sample: ")
display(df_world_sample.limit(5))

**Ingest raw data into Bronze layer as Delta tables**

In [0]:
df_house_bronze = spark.read.text(f"{abfss_path}house-price-parquet.json")
df_house_bronze.write.format("delta").mode("overwrite").option("mergeSchema", "true").save(f"{abfss_path}house_price")


df_orders_bronze = spark.read.text(f"{abfss_path}sales_products.txt")
df_orders_bronze.write.format("delta").mode("overwrite").save(f"{abfss_path}sales_products")


df_world_bronze = spark.read.text(f"{abfss_path}population.txt")
df_world_bronze.write.format("delta").mode("overwrite").save(f"{abfss_path}population")

**Demonstrate Delta Lake Time Travel (Versioning)**

In [0]:
history_df = spark.sql(f"DESCRIBE HISTORY delta.`{abfss_path}sales_products`")
display(history_df)

In [0]:
df_house_v0 = spark.read.format("delta").option("versionAsOf", 0).load(f"{abfss_path}house_price")


df_orders_old = spark.read.format("delta").option("timestampAsOf", "2025-11-04T11:34:22").load(f"{abfss_path}sales_products")


print("House Price Data Version 0: ")
display(df_house_v0.limit(5))


print("Sales Orders Data as of 2025-11-04T11:34:22 ")
display(df_orders_old.limit(5))