In [0]:
import datetime as dt
from pyspark.sql import functions as F

# Date to match your Bronze ingest
TODAY = "2025-05-21"

# Bronze and Silver base paths
BRONZE_PATH = f"dbfs:/bronze/gtfs_static/{TODAY}"
SILVER_PATH = f"dbfs:/silver/gtfs_static/{TODAY}"


In [0]:
df_stops = (
    spark.read.format("delta").load(f"{BRONZE_PATH}/stops")
    .withColumn("stop_lat", F.col("stop_lat").cast("double"))
    .withColumn("stop_lon", F.col("stop_lon").cast("double"))
    .withColumn("location", F.expr("struct(stop_lat, stop_lon)"))
    .withColumn("ingestion_ts", F.current_timestamp())
)

# Write to Silver
(
    df_stops.write
    .format("delta")
    .mode("overwrite")
    .save(f"{SILVER_PATH}/stops")
)


In [0]:
spark.read.format("delta").load(f"{SILVER_PATH}/stops").show(5)


In [0]:
spark.read.format("delta").load(f"{SILVER_PATH}/stops").printSchema()


In [0]:
df_silver = spark.read.format("delta").load(f"{SILVER_PATH}/stops")

# Show only location struct
df_silver.select("stop_id", "location").show(5, truncate=False)


In [0]:
df_silver.select(
    "stop_id",
    "location.stop_lat",
    "location.stop_lon"
).show(5)
