In [25]:
import pandas as pd
import geojson
from pprint import pprint
from tqdm import tqdm

# Microclimate

In [33]:
df_pq = pd.read_parquet('data/microclimate-sensors-data.parquet')

with open('data/microclimate-sensors-data.geojson') as f:
    df_gj = geojson.load(f)

In [3]:
df_pq.columns

Index(['device_id', 'received_at', 'sensorlocation', 'latlong',
       'minimumwinddirection', 'averagewinddirection', 'maximumwinddirection',
       'minimumwindspeed', 'averagewindspeed', 'gustwindspeed',
       'airtemperature', 'relativehumidity', 'atmosphericpressure', 'pm25',
       'pm10', 'noise'],
      dtype='object')

In [31]:
devices = sorted(df_pq.device_id.unique())

In [20]:
df_pq.groupby('device_id').agg({
    'sensorlocation': 'first',
})

Unnamed: 0_level_0,sensorlocation
device_id,Unnamed: 1_level_1
ICTMicroclimate-01,Birrarung Marr Park - Pole 1131
ICTMicroclimate-02,101 Collins St L11 Rooftop
ICTMicroclimate-03,CH1 rooftop
ICTMicroclimate-04,Batman Park
ICTMicroclimate-05,Enterprize Park - Pole ID: COM1667
ICTMicroclimate-06,Tram Stop 7B - Melbourne Tennis Centre Precinc...
ICTMicroclimate-07,Tram Stop 7C - Melbourne Tennis Centre Precinc...
ICTMicroclimate-08,Swanston St - Tram Stop 13 adjacent Federation...
ICTMicroclimate-09,SkyFarm (Jeff's Shed). Rooftop - Melbourne Con...
ICTMicroclimate-10,1 Treasury Place


In [10]:
df_gj.keys()

dict_keys(['type', 'features'])

In [17]:
pprint(df_gj['features'][0])

{"geometry": {"coordinates": [144.96728, -37.814035], "type": "Point"}, "properties": {"airtemperature": 11.3, "atmosphericpressure": 998.2, "averagewinddirection": 6.0, "averagewindspeed": 2.0, "device_id": "ICTMicroclimate-03", "gustwindspeed": 2.8, "maximumwinddirection": 355.0, "minimumwinddirection": 0.0, "minimumwindspeed": 1.0, "noise": 69.9, "pm10": 6.0, "pm25": 2.0, "received_at": "2024-08-17T05:12:34+10:00", "relativehumidity": 87.2, "sensorlocation": "CH1 rooftop"}, "type": "Feature"}


In [34]:
# impute missing locations
## Grab the sensorlocation and geometry of sensor 
locations = {}
geometries = {}

for row in df_gj['features']:
    device_id = row['properties']['device_id']
    # Only proceed when the current device hasn't been recorded location
    if device_id not in locations:
        if row['geometry']:
            locations[device_id] = row['properties']['sensorlocation']
            geometries[device_id] = row['geometry']
        # Exit criteria: locations for all device are filled 
        if len(locations) == len(devices):
            break

## Imputing the missing values
for row in tqdm(df_gj['features']):
    device_id = row['properties']['device_id']
    if not row['geometry']:
        row['geometry'] = geometries[device_id]
        row['properties']['sensorlocation'] = locations[device_id]

100%|██████████| 117263/117263 [00:00<00:00, 2988653.14it/s]


In [35]:
with open('data/microclimate-sensors-data_imputed.geojson', 'w') as f:
    geojson.dump(df_gj, f)