# Dynamic Bronze Ingestion

In [0]:
filelookup_df = spark.read.table("inventory_project.metadata.filelookup")

In [0]:
import os
from pyspark.sql.functions import *

In [0]:
def ingest_to_bronze(table_name, raw_path, bronze_path, fmt, delimiter=None, schema_json=None):
    print(f"Processing: {table_name}")

    # Load schema if provided
    schema = None
    if schema_json:
        from pyspark.sql.types import StructType
        import json
        schema = StructType.fromJson(json.loads(schema_json))

    # Read raw data
    reader = spark.read.format(fmt)
    if fmt == "csv":
        reader = reader.option("header", True).option("inferSchema", True)

    df = reader.load(raw_path)

    # Add ingestion metadata
    df = (df
          .withColumn("ingestion_timestamp", current_timestamp())
          .withColumn("source_file",col("_metadata.file_path")))

    # Write to Bronze (Delta or Parquet depending on your choice)

    (df.write
       .mode("append")
       .format("parquet")
       .save(bronze_path))

    print(f"âœ… {table_name} ingested successfully into {bronze_full_path}")

In [0]:
active_files = filelookup_df.filter(filelookup_df.active == True).collect()
for row in active_files:
    ingest_to_bronze(
        table_name=row['table_name'],
        raw_path=row['source_path'],
        bronze_path=row['target_table'],
        fmt=row['source_format']
    )