In [0]:
from pyspark.sql.functions import current_timestamp

# Configuration
volume_path = "/Volumes/workspace/instacart/raw/"

files = [
    "aisles",
    "departments",
    "orders",
    "products",
    "order_products__prior",
    "order_products__train"
]

# Set context
spark.sql("USE CATALOG workspace")
spark.sql("USE SCHEMA instacart")

# Ingestion Loop
for file in files:
    try:
        source_path = f"{volume_path}{file}.csv"
        
        print(f"Reading {file} from {source_path}...")
        
        df = spark.read.format("csv") \
            .option("header", "true") \
            .option("inferSchema", "true") \
            .load(source_path)
        
        # Add metadata
        df_bronze = df.withColumn("ingestion_timestamp", current_timestamp())
        
        table_name = f"bronze_{file}"
        
        df_bronze.write.format("delta") \
            .mode("overwrite") \
            .saveAsTable(table_name)
        
        print(f"Success: Created table workspace.instacart.{table_name}")
        
    except Exception as e:
        print(f"Error processing {file}: {e}")

# Validation
print("\n--- Verifying Tables ---")
display(spark.sql("SHOW TABLES IN workspace.instacart LIKE 'bronze*'"))

Reading aisles from /Volumes/workspace/instacart/raw/aisles.csv...
Success: Created table workspace.instacart.bronze_aisles
Reading departments from /Volumes/workspace/instacart/raw/departments.csv...
Success: Created table workspace.instacart.bronze_departments
Reading orders from /Volumes/workspace/instacart/raw/orders.csv...
Success: Created table workspace.instacart.bronze_orders
Reading products from /Volumes/workspace/instacart/raw/products.csv...
Success: Created table workspace.instacart.bronze_products
Reading order_products__prior from /Volumes/workspace/instacart/raw/order_products__prior.csv...
Success: Created table workspace.instacart.bronze_order_products__prior
Reading order_products__train from /Volumes/workspace/instacart/raw/order_products__train.csv...
Success: Created table workspace.instacart.bronze_order_products__train

--- Verifying Tables ---


database,tableName,isTemporary
instacart,bronze_aisles,False
instacart,bronze_departments,False
instacart,bronze_order_products__prior,False
instacart,bronze_order_products__train,False
instacart,bronze_orders,False
instacart,bronze_products,False
