In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

In [0]:
df = spark.read.format('json')\
    .option("inferschema", True)\
    .option("multiline",True )\
    .load("/Volumes/workspace/pysparkcsv/iot_sensor")

In [0]:
display(df)

facility
"List(FAC-001, List(Building A, 2, Production), Manufacturing Plant Alpha, List(List(List(List(2025-11-15, 2026-05-15, List(TECH-42, Robert Brown)), TMP-500X, SensorTech Inc), List(List(List(null, 0.98, good), 2026-01-04T08:00:00Z, celsius, 22.5), List(List(null, 0.97, good), 2026-01-04T08:15:00Z, celsius, 23.1), List(List(List(List(ALT-001, Temperature approaching upper threshold, medium)), 0.85, warning), 2026-01-04T08:30:00Z, celsius, 24.8)), SENS-TMP-001, List(List(30.0, 15.0), 26.0, 18.0), temperature), List(List(List(2025-10-20, 2026-04-20, List(TECH-42, Robert Brown)), HUM-300, SensorTech Inc), List(List(List(null, 0.99, good), 2026-01-04T08:00:00Z, percent, 45.2)), SENS-HUM-001, List(List(70.0, 20.0), 60.0, 30.0), humidity)))"


In [0]:
df_iotsensor = (
    df
    .withColumn("sensors", explode_outer("facility.sensors"))
    .withColumn("readings", explode_outer("sensors.readings"))
    .withColumn("alerts", explode_outer("readings.metadata.alerts"))
    .select(
        # Facility Information
        col("facility.facilityId").alias("facility_id"),
        col("facility.name").alias("facility_name"),
        col("facility.location.building").alias("loc_building"),
        col("facility.location.floor").alias("loc_floor"),
        col("facility.location.zone").alias("loc_zone"),

        # Sensor Information
        col("sensors.sensorId").alias("sensor_id"),
        col("sensors.type").alias("sensor_type"),
        col("sensors.manufacturer.name").alias("manu_name"),
        col("sensors.manufacturer.model").alias("manu_model"),
        col("sensors.manufacturer.calibration.lastDate").alias("cali_lastdate"),
        col("sensors.manufacturer.calibration.nextDate").alias("cali_nextdate"),
        col("sensors.manufacturer.calibration.technician.id").alias("tech_id"),
        col("sensors.manufacturer.calibration.technician.name").alias("tech_name"),

        # Readings Information
        col("readings.timestamp").alias("read_timestamp"),
        col("readings.value").alias("read_value"),
        col("readings.unit").alias("read_unit"),
        col("readings.metadata.quality").alias("meta_quality"),
        col("readings.metadata.confidence").alias("meta_confidence"),

        # Alerts Information
        col("alerts.alertId").alias("alert_id"),
        col("alerts.severity").alias("alert_severity"),
        col("alerts.message").alias("alert_message"),

        # Thresholds inside sensors Information
        col("sensors.thresholds.min").alias("th_min"),
        col("sensors.thresholds.max").alias("th_max"),
        col("sensors.thresholds.critical.min").alias("critical_min"),
        col("sensors.thresholds.critical.max").alias("critical_max")
    )
)




In [0]:
display(df_iotsensor)

facility_id,facility_name,loc_building,loc_floor,loc_zone,sensor_id,sensor_type,manu_name,manu_model,cali_lastdate,cali_nextdate,tech_id,tech_name,read_timestamp,read_value,read_unit,meta_quality,meta_confidence,alert_id,alert_severity,alert_message,th_min,th_max,critical_min,critical_max
FAC-001,Manufacturing Plant Alpha,Building A,2,Production,SENS-TMP-001,temperature,SensorTech Inc,TMP-500X,2025-11-15,2026-05-15,TECH-42,Robert Brown,2026-01-04T08:00:00Z,22.5,celsius,good,0.98,,,,18.0,26.0,15.0,30.0
FAC-001,Manufacturing Plant Alpha,Building A,2,Production,SENS-TMP-001,temperature,SensorTech Inc,TMP-500X,2025-11-15,2026-05-15,TECH-42,Robert Brown,2026-01-04T08:15:00Z,23.1,celsius,good,0.97,,,,18.0,26.0,15.0,30.0
FAC-001,Manufacturing Plant Alpha,Building A,2,Production,SENS-TMP-001,temperature,SensorTech Inc,TMP-500X,2025-11-15,2026-05-15,TECH-42,Robert Brown,2026-01-04T08:30:00Z,24.8,celsius,warning,0.85,ALT-001,medium,Temperature approaching upper threshold,18.0,26.0,15.0,30.0
FAC-001,Manufacturing Plant Alpha,Building A,2,Production,SENS-HUM-001,humidity,SensorTech Inc,HUM-300,2025-10-20,2026-04-20,TECH-42,Robert Brown,2026-01-04T08:00:00Z,45.2,percent,good,0.99,,,,30.0,60.0,20.0,70.0
