In [0]:
# Import necessary functions for metadata and auditing columns
from pyspark.sql.functions import current_timestamp, input_file_name, lit

In [0]:
dbutils.widgets.text("env", "dev")
env = dbutils.widgets.get("env")

In [0]:
catalog = f"supply_{env}"

In [0]:
source_path = f"/Volumes/{catalog}/bronze/raw_data/supply_chain_data.csv"


In [0]:
# Load raw supply chain data from CSV for initial inspection
df = spark.read.csv(source_path, header=True, inferSchema=True)
display(df.limit(10))

In [0]:
# Databricks tables do not allow spaces in column names, so we replace spaces with underscores
for col_name in df.columns:
    if " " in col_name:
        df = df.withColumnRenamed(col_name, col_name.replace(" ", "_"))
df_raw = df

In [0]:
df_raw.printSchema()

In [0]:
from pyspark.sql.functions import current_timestamp, lit, col

# Add metadata and environment columns for data lineage and auditing
df_bronze = (
    df_raw
    .withColumn("_ingest_ts", current_timestamp())           # Ingestion timestamp
    .withColumn("_source_file", col("_metadata.file_path"))  # Source file path from metadata
    .withColumn("_env", lit(env))                            # Environment identifier
)

In [0]:
# Write the bronze DataFrame to a Delta table in append mode
df_bronze.write.format("delta") \
    .mode("append") \
    .saveAsTable(f"{catalog}.bronze.makeup_supply_chain_raw")