In [0]:
from pyspark.sql import functions as F

bronze_table = "dev_flowers.bronze.iris"
silver_table = "dev_flowers.silver.iris"

df_bronze = spark.table(bronze_table)

print("Bronze row count:", df_bronze.count())
display(df_bronze.limit(5))

df_silver = (
    df_bronze
    # Drop rows missing key values
    .dropna(subset=["sepal_length", "sepal_width", "petal_length", "petal_width"])

    # Ensure numeric types
    .withColumn("sepal_length", F.col("sepal_length").cast("double"))
    .withColumn("sepal_width",  F.col("sepal_width").cast("double"))
    .withColumn("petal_length", F.col("petal_length").cast("double"))
    .withColumn("petal_width",  F.col("petal_width").cast("double"))

    # Normalize species names
    .withColumn("species", F.lower(F.col("species")))

    # Add ingestion metadata
    .withColumn("processed_at", F.current_timestamp())
)

print("Silver row count:", df_silver.count())
display(df_silver.limit(5))

# ---- Write Silver Table (full refresh) ----
(
    df_silver.write
        .format("delta")
        .mode("overwrite")
        .option("overwriteSchema", "true")
        .saveAsTable(silver_table)
)

print(f"Silver table written: {silver_table}")