In [0]:
%sql
CREATE SCHEMA IF NOT EXISTS workspace.bronze;



In [0]:
%sql
USE workspace.bronze;


In [0]:
from pyspark.sql.functions import current_timestamp, col

# 1. Read raw CSV
raw_df = (
    spark.read
         .option("header", "true")
         .option("inferSchema", "true")
         .csv("/databricks-datasets/samples/population-vs-price/data_geo.csv")
)

# 2. Rename columns to UC-compliant names
renamed_df = (
    raw_df
    .withColumnRenamed("2014 rank", "rank_2014")
    .withColumnRenamed("State Code", "state_code")
    .withColumnRenamed("2014 Population estimate", "population_estimate_2014")
    .withColumnRenamed("2015 median sales price", "median_sales_price_2015")
)

# 3. Add ingestion metadata
bronze_df = (
    renamed_df
    .withColumn("ingestion_timestamp", current_timestamp())
    .withColumn("source_file", col("_metadata.file_path"))
)

# 4. Write to Bronze table (UC-compliant)
bronze_df.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable("workspace.bronze.population_raw")

display(bronze_df)


In [0]:
# DEV version - ready for promotion
