# Ingest to Bronze Layer
   
   Reads customers, orders, and products tables from Azure SQL Database and writes them to Delta tables in the bronze layer with ingestion timestamps.
   
   **Note:** Replace placeholder values with actual credentials (use Databricks secrets in production).

In [0]:
# Replace placeholders with your own Azure/Databricks secrets if you want to run the notebook.
jdbc_hostname = "<your_sql_server_name>.database.windows.net"
jdbc_port = 1433
jdbc_database = "<your_database_name>"
aad_client_id = "<your_aad_client_id>"
aad_client_secret = "<your_aad_client_secret>"
aad_tenant_id = "<your_aad_tenant_id>"

jdbc_url = (
    f"jdbc:sqlserver://{jdbc_hostname}:{jdbc_port};"
    f"database={jdbc_database};"
    "encrypt=true;"
    "hostNameInCertificate=*.database.windows.net;"
    "authentication=ActiveDirectoryServicePrincipal;"
    f"tenantId={aad_tenant_id};"
)

connection_properties = {
    "user": aad_client_id,
    "password": aad_client_secret,
    "authentication": "ActiveDirectoryServicePrincipal",
    "driver": "com.microsoft.sqlserver.jdbc.SQLServerDriver"
}

In [0]:
customers_df = spark.read.jdbc(
    url=jdbc_url,
    table="dbo.customers",
    properties=connection_properties
)

In [0]:
orders_df = spark.read.jdbc(
    url=jdbc_url,
    table="dbo.orders",
    properties=connection_properties
)

In [0]:
products_df = spark.read.jdbc(
    url=jdbc_url,
    table="dbo.products",
    properties=connection_properties
)

In [0]:
from pyspark.sql.functions import current_timestamp

# Map table names to their DataFrames
table_names = {
    "customers": customers_df,
    "orders": orders_df,
    "products": products_df
}

for table_name, df in table_names.items():
    # Add ingestion time metadata
    df = df.withColumn("time_ingested", current_timestamp())
    
    # Write to bronze layer
    df.write \
        .mode("append") \
        .format("delta") \
        .save(f"/mnt/bronze/{table_name}")