In [0]:
# Databricks notebook: Limpieza de tabla Bronze y guardado en capa Silver

from pyspark.sql.functions import col, explode
from pyspark.sql.types import StructType, StructField, StringType, LongType, DoubleType, BooleanType

# 1. Leer la tabla Bronze
df_bronze = spark.table("default.bronze_opensky")

# 2. Explode del array "states"
df_exploded = df_bronze.selectExpr("time", "explode(states) as state")

# 3. Definir nombres de columnas según documentación OpenSky
fields = [
    "icao24", "callsign", "origin_country", "time_position", "last_contact", "longitude",
    "latitude", "baro_altitude", "on_ground", "velocity", "true_track", "vertical_rate",
    "sensors", "geo_altitude", "squawk", "spi", "position_source"
]

# 4. Mapear los valores de la lista a columnas nombradas
df_silver = df_exploded.select(
    col("time").alias("snapshot_time"),
    *[col("state")[i].cast("string").alias(fields[i]) for i in range(len(fields))]
)

# 5. Conversión de tipos (solo algunos como ejemplo)
df_silver = df_silver.select(
    col("snapshot_time").cast("long"),
    col("icao24"),
    col("callsign"),
    col("origin_country"),
    col("time_position").cast("long"),
    col("last_contact").cast("long"),
    col("longitude").cast("double"),
    col("latitude").cast("double"),
    col("baro_altitude").cast("double"),
    col("on_ground").cast("boolean"),
    col("velocity").cast("double"),
    col("true_track").cast("double"),
    col("vertical_rate").cast("double"),
    col("geo_altitude").cast("double"),
    col("squawk"),
    col("spi").cast("boolean"),
    col("position_source").cast("int")
)

# 6. Guardar en tabla Delta: capa Silver
df_silver.write.format("delta").mode("overwrite").saveAsTable("default.silver_opensky")

print("✅ Datos transformados y guardados en tabla default.silver_opensky")
