In [None]:
%run ../config/load_config

In [None]:
%run ../common/transformations

In [None]:
%run ../common/data_quality

In [None]:
from pyspark.sql.functions import *

target_table = "stops_sv"
silver_table_path = get_storage_path("silver", target_table)

# Ensure silver table exists
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog}.{schema_silver}.{target_table}(
    naptan_id STRING,
    indicator STRING,
    ics_code BIGINT,
    stop_type STRING,
    common_name STRING,
    longitude DOUBLE,
    latitude DOUBLE
)    
LOCATION '{silver_table_path}'
"""
)

# Transform and load silver table
source_table = get_table_name(schema_bronze, "stops_bz")

df_transformed = (
    spark.read.table(source_table)
    .select(explode_outer(col("stopPoints")).alias("stop"))
    .select(
        col("stop.naptanId").alias("naptan_id"),
        split(col("stop.indicator"), ",")[0].alias("indicator"),
        col("stop.icsCode").cast("bigint").alias("ics_code"),
        split(col("stop.stopType"), ",")[0].alias("stop_type"),
        col("stop.hubNaptanCode").alias("hub_naptan_code"),
        split(col("stop.commonName"), ",")[0].alias("common_name"),
        col("stop.lon").alias("longitude"),
        col("stop.lat").alias("latitude"),
    )
)

# Clean data
df_cleaned = trim_strings(df_transformed)
df_deduped = df_cleaned.dropDuplicates(["naptan_id"])

df_quality = add_quality_flag(df_deduped, not_null_columns=["naptan_id"])
df_silver = add_transformation_metadata(df_quality)


# Write to silver table
query = (
    df_silver.write.mode("overwrite")
    .option("overwriteSchema", "true")
    .saveAsTable(f"{catalog}.{schema_silver}.{target_table}")
)