In [11]:
import requests
import pandas as pd
from datetime import datetime as dt
from urllib.parse import quote
import json
import os


In [12]:
def write_url():
    measurement_columns = 'measurement_columns=Timestamp,PM1,PM2.5,PM4,PM10,PM1_A,PM1_B,PM2.5_A,PM2.5_B,PM4_A,PM4_B,PM10_A,PM10_B,PM1Raw,PM2.5Raw,PM4Raw,PM10Raw,PM0.3Count,NO,NO2,NOx,NOxRaw,NOxIndex,CO,CO2,O3,SO2,VOC,TVOCRaw,VOCIndex,Temperature,AmbientTemperature,Humidity,AmbientHumidity,Pressure,AmbientPressure,ScatteringCoefficient,Deciviews,VisualRange,NoiseLAMax,NoiseLAMin,NoiseLAEq,NoiseLAEqDelog,'
    # or leave measurement_columns empty to get all columns and include deserialize bool to get all data types correctly
    polygon = "POLYGON ((-1.890019 52.452531, -1.889241 52.452531, -1.889241 52.452887, -1.890019 52.452887, -1.890019 52.452531))"
    start_date = dt(2025, 8, 24)
    end_date = dt(2025, 8, 25)

    start_date_str = start_date.strftime("%d-%m-%Y")
    end_date_str = end_date.strftime("%d-%m-%Y")

    columns = "columns=sensor_id,measurement_data,timestamp&include_sensor_metadata=true"
    spatial_query = f"spatial_query_type=within&geom={quote(polygon)}" # URL-encode the polygon

    base_url = "https://rn3rb93aq5.execute-api.eu-west-2.amazonaws.com/prod/sensor-summary/as-json"
    url = f"{base_url}?start={start_date_str}&end={end_date_str}&{columns}&{measurement_columns}&{spatial_query}"
    return url

def save_data(url, file_name='./data/air_quality_data.json'):
    response = requests.get(url)
    if response.status_code == 200:
        # write the data to a JSON file
        with open(file_name, 'w') as file:
            # write JSON data properly
            json.dump(response.json(), file)
        print(f"Data saved to {file_name}")
    else:
        print(f"Failed to retrieve data: {response.status_code}")

# save_data(url=write_url(), file_name='air_quality_data.json')

print(write_url())

https://rn3rb93aq5.execute-api.eu-west-2.amazonaws.com/prod/sensor-summary/as-json?start=24-08-2025&end=25-08-2025&columns=sensor_id,measurement_data,timestamp&include_sensor_metadata=true&measurement_columns=Timestamp,PM1,PM2.5,PM4,PM10,PM1_A,PM1_B,PM2.5_A,PM2.5_B,PM4_A,PM4_B,PM10_A,PM10_B,PM1Raw,PM2.5Raw,PM4Raw,PM10Raw,PM0.3Count,NO,NO2,NOx,NOxRaw,NOxIndex,CO,CO2,O3,SO2,VOC,TVOCRaw,VOCIndex,Temperature,AmbientTemperature,Humidity,AmbientHumidity,Pressure,AmbientPressure,ScatteringCoefficient,Deciviews,VisualRange,NoiseLAMax,NoiseLAMin,NoiseLAEq,NoiseLAEqDelog,&spatial_query_type=within&geom=POLYGON%20%28%28-1.890019%2052.452531%2C%20-1.889241%2052.452531%2C%20-1.889241%2052.452887%2C%20-1.890019%2052.452887%2C%20-1.890019%2052.452531%29%29


In [13]:
if "air_quality_data.json" not in os.listdir('./data'):
    save_data(url=write_url(), file_name='./data/air_quality_data.json')
else:
    print("Data file already exists. Skipping download.")

Data saved to ./data/air_quality_data.json


In [14]:
class ColocatedSensor:
    def __init__(self, sensor_id, sensor_type, timestamp):
        self.sensor_id = sensor_id
        self.sensor_type = sensor_type
        self.measurement_data = []
        self.df = pd.DataFrame()
        self.timestamp = timestamp

    def concat_df(self):
        if self.measurement_data:
            self.df = pd.concat(self.measurement_data, ignore_index=False)
            # to save memory, clear the measurement_data list
            self.measurement_data.clear()
            # sort df by index (timestamp)
            self.df.sort_index(inplace=True)
        else:
            raise ValueError("No measurement data to concatenate.")

    def add_measurement(self, measurement_data:list):
        self.measurement_data.append(ColocatedSensor.to_df(measurement_data))

    @staticmethod
    def to_df(measurement_data:list, filter_columns=None):
        df = pd.DataFrame.from_records(measurement_data)
        if filter_columns:
            df = df[filter_columns]
        # convert timestamps to datetime
        df['datetime'] = pd.to_datetime(df['Timestamp'], unit='s', errors='coerce', utc=True)
        df.set_index('datetime', inplace=True)

        # infer data types
        df = df.infer_objects()
        return df
        


In [15]:
# make ColocatedSensor objects from the JSON data
sensors = {}

# read the data air_quality_data.json
with open('./data/air_quality_data.json', 'r') as file:
    json_data = json.loads(file.read())

    for sensor in json_data:
        sensor_id = sensor['sensor_id']
        sensor_type = sensor['type_name']
        timestamp = sensor['timestamp_UTC']

        if sensor_id in sensors:
            # if sensor already exists, add the measurement
            sensors[sensor_id].add_measurement(sensor['measurement_data'])
        else:
            # create a new ColocatedSensor object
            colocated_sensor = ColocatedSensor(sensor_id, sensor_type, timestamp)
            colocated_sensor.add_measurement(sensor['measurement_data'])
            sensors[sensor_id] = colocated_sensor

sensors = list(sensors.values())

# Concatenate the dataframes for each sensor
for sensor in sensors:
    sensor.concat_df()
    print(f"Sensor ID: {sensor.sensor_id}, Type: {sensor.sensor_type}, DataFrame Shape: {sensor.df.shape}")

Sensor ID: 62, Type: Zephyr, DataFrame Shape: (16638, 12)
Sensor ID: 60, Type: Zephyr, DataFrame Shape: (16760, 12)
Sensor ID: 70, Type: PurpleAir, DataFrame Shape: (1436, 17)
Sensor ID: 69, Type: AirGradient, DataFrame Shape: (2805, 14)
Sensor ID: 68, Type: AirGradient, DataFrame Shape: (2804, 14)
Sensor ID: 67, Type: PurpleAir, DataFrame Shape: (1430, 17)


# Plotting co-located PM data from multiple sensors with Plotly

In [16]:
import plotly.express as px
import plotly.graph_objects as go

In [17]:
def plot_sensor_data(sensors, column:str, unit:str='µg/m³', start_time:str=None, end_time:str=None):
    fig = go.Figure()

    #number each sensor anonymously
    num = 0
    for sensor in sensors:
        num += 1
        # if start_time and end_time are provided, filter the dataframe
        if start_time and end_time:
            mask = (sensor.df.index >= start_time) & (sensor.df.index <= end_time)
            sensor.df = sensor.df.loc[mask]
        if column in sensor.df.columns:
            fig.add_trace(
                go.Scatter(
                    x=sensor.df.index,
                    y=sensor.df[column],
                    mode='lines',
                    name= f"Sensor{num}"
                    #f"Sensor {sensor.sensor_id} ({sensor.sensor_type})"
                )
            )

    fig.update_layout(
        title=f"{column} Levels for All Sensors",
        xaxis_title="Timestamp",
        yaxis_title=f"{column} {unit}"
    )

    fig.update_xaxes(rangeslider_visible=True)

    # on hover show the column value
    fig.update_traces(
        hovertemplate="%{x}<br>%{y} " + unit + "<extra>%{fullData.name}</extra>"
    )
    
    return fig

In [18]:
for sensor in sensors:
    print(f"Sensor ID: {sensor.sensor_id}, Type: {sensor.sensor_type}, DataFrame Columns: {sensor.df.columns}")

Sensor ID: 62, Type: Zephyr, DataFrame Columns: Index(['Timestamp', 'PM1', 'PM2.5', 'PM10', 'NO', 'NO2', 'O3', 'Temperature',
       'AmbientTemperature', 'Humidity', 'AmbientHumidity', 'AmbientPressure'],
      dtype='object')
Sensor ID: 60, Type: Zephyr, DataFrame Columns: Index(['Timestamp', 'PM1', 'PM2.5', 'PM10', 'NO', 'NO2', 'O3', 'Temperature',
       'AmbientTemperature', 'Humidity', 'AmbientHumidity', 'AmbientPressure'],
      dtype='object')
Sensor ID: 70, Type: PurpleAir, DataFrame Columns: Index(['Timestamp', 'PM1', 'PM2.5', 'PM10', 'PM1_A', 'PM1_B', 'PM2.5_A',
       'PM2.5_B', 'PM10_A', 'PM10_B', 'VOC', 'AmbientTemperature',
       'AmbientHumidity', 'AmbientPressure', 'ScatteringCoefficient',
       'Deciviews', 'VisualRange'],
      dtype='object')
Sensor ID: 69, Type: AirGradient, DataFrame Columns: Index(['Timestamp', 'PM1', 'PM2.5', 'PM10', 'PM1Raw', 'PM2.5Raw', 'PM10Raw',
       'PM0.3Count', 'NOxIndex', 'CO2', 'VOC', 'VOCIndex',
       'AmbientTemperature', 'Ambien

In [19]:
start_time = "2025-08-24T00:00:00Z"
end_time = "2025-08-25T23:59:59Z"

plot_sensor_data(sensors, "PM10", "µg/m³", start_time, end_time).show()
plot_sensor_data(sensors,"PM2.5","µg/m³",start_time,end_time).show()
plot_sensor_data(sensors, "PM1","µg/m³",start_time,end_time).show()