In [0]:
from datetime import datetime
from pyspark.sql import Row
 
# Step 1: Capture start time
start_time = datetime.now()
 
# Step 2: Initialize status
status = "Success"
error_message = ""
 
# Step 3: Wrap ingestion logic
try:
    file_name = dbutils.widgets.get("p_file_name")
    # Extract base name without extension
    base_name = file_name.split('.')[0]
    # Create table name with prefix
    table_name = f"bronze_{base_name}"
    # Define Delta path
    delta_path = f"abfss://sedpcontainer@sedpstorageaccount.dfs.core.windows.net/Bronze/{file_name}_delta"
    # Read CSV and write to Delta
    df = spark.read.format("json") \
        .option("multiline", True) \
        .option("header", True) \
        .option("inferSchema", True) \
        .load(f'abfss://sedpcontainer@sedpstorageaccount.dfs.core.windows.net/raw/{file_name}')
    df.write.mode("overwrite") \
        .option("mergeSchema", "true") \
        .format("delta") \
        .save(delta_path)
        # Register Delta table
    spark.sql(f"""
        CREATE TABLE IF NOT EXISTS {table_name}
        USING DELTA
        LOCATION '{delta_path}'""")
    # Query the table
    spark.sql(f"SELECT * FROM {table_name}").show()
except Exception as e:
    status = "Failed"
    error_message = str(e)
    print(f"Ingestion failed: {error_message}")
    raise
finally:
    # Step 4: Capture end time
    end_time = datetime.now()
 
    # Step 5: Create audit row
    audit_row = [Row(
        SourceType="JSON",
        FileName=file_name,
        Status=status,
        StartTime=start_time,
        EndTime=end_time
    )]
    audit_df = spark.createDataFrame(audit_row)
    # Step 6: Write audit log to Delta
    audit_log_path = "abfss://sedpcontainer@sedpstorageaccount.dfs.core.windows.net/audit_logs/Pipeline_Run_Audit_Delta"
    audit_df.write \
        .format("delta") \
        .mode("append") \
        .option("mergeSchema", "true") \
        .save(audit_log_path)
 
    # Step 7: Register audit table (only once per notebook)
    spark.sql(f"""
        CREATE TABLE IF NOT EXISTS Pipeline_Run_Audit_Delta
        USING DELTA
        LOCATION '{audit_log_path}'
    """)
    spark.sql("SELECT * FROM Pipeline_Run_Audit_Delta").show()
 
 
 

+--------+---------+------------+------------+-------+------+
|asset_id| category|commissioned|manufacturer|  model|region|
+--------+---------+------------+------------+-------+------+
|       1|  Turbine|  2018-11-19|          GE| TX-525|    EU|
|       2|  Battery|  2022-11-05|          GE| PN-180| LATAM|
|       3|Generator|  2018-06-05|     Siemens| GE-497|    NA|
|       4|  Turbine|  2021-10-15|     LG Chem|LGX-667|    EU|
|       5|  Turbine|  2022-10-09|   SolarEdge| PN-261|    NA|
|       6|Generator|  2020-03-18|     LG Chem| PN-716|    EU|
|       7|  Battery|  2020-04-09|     LG Chem|LGX-593| LATAM|
|       8|  Turbine|  2020-06-09|     LG Chem| SE-195|    UK|
|       9|  Turbine|  2018-12-21|     LG Chem| PN-563|    UK|
|      10|  Turbine|  2019-08-20|     Siemens|LGX-762|    NA|
|      11|  Turbine|  2021-03-02|     Siemens|LGX-156|    UK|
|      12|Generator|  2019-05-29|   Panasonic|LGX-755|    EU|
|      13|Generator|  2018-06-18|          GE| SE-311|    UK|
|      1