In [None]:
%run ../config/load_config

In [None]:
%run ../common/transformations

In [None]:
%run ../common/data_quality

In [None]:
from pyspark.sql.functions import *

target_table = "boroughs_sv"
silver_table_path = get_storage_path("silver", target_table)

# Ensure silver table exists
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog}.{schema_silver}.{target_table}(
    borough_code STRING,
    borough_name STRING,
    hectares DOUBLE,
    shape_area DOUBLE,
    shape_length DOUBLE,
    geometry_geojson STRING
)  
LOCATION '{silver_table_path}'
"""
)

# Transform and load silver table
source_table = get_table_name(schema_bronze, "boroughs_bz")

df_transformed = spark.read.table(source_table).select(
    col("properties.CODE").alias("borough_code"),
    col("properties.BOROUGH").alias("borough_name"),
    col("properties.HECTARES").alias("hectares"),
    col("properties.Shape__Area").alias("shape_area"),
    col("properties.Shape__Length").alias("shape_length"),
    to_json(expr("named_struct('type','Polygon','coordinates', geometry)")).alias(
        "geometry_geojson"
    ),
)

# Clean data
df_cleaned = trim_strings(df_transformed).dropna(subset=["borough_code"])
df_deduped = df_cleaned.dropDuplicates(["borough_code"])

df_quality = add_quality_flag(df_deduped, not_null_columns=["borough_code"])
df_silver = add_transformation_metadata(df_quality)

# Write to silver table
query = (
    df_silver.write.format("delta")
    .mode("overwrite")
    .option("overwriteSchema", "true")
    .saveAsTable(f"{catalog}.{schema_silver}.{target_table}")
)