In [1]:
#Data manipulation
import requests
import numpy as np
import pandas as pd


In [43]:
base_url = 'https://data.melbourne.vic.gov.au/api/explore/v2.0/catalog/datasets/'

#Dataset name
sensor_dataset = 'microclimate-sensor-readings'
number_of_records = 100

# Add a filter for the number of results returned
filters = f'records?limit={number_of_records}&offset=0&timezone=UTC'

In [44]:
sensor_url = f'{base_url}{sensor_dataset}/{filters}'
sensor_url

'https://data.melbourne.vic.gov.au/api/explore/v2.0/catalog/datasets/microclimate-sensor-readings/records?limit=100&offset=0&timezone=UTC'

In [28]:
# Use requests to get the data
result = requests.get(sensor_url)

In [20]:
result_json = result.json()

In [21]:
result_json.keys()

dict_keys(['total_count', 'results'])

In [22]:
result_json['total_count']

1340

In [23]:
results = result_json['results']

In [24]:
pm10_df = pd.json_normalize(results)

In [25]:
pm10_df

Unnamed: 0,local_time,id,site_id,sensor_id,value,type,units,gatewayhub_id,site_status
0,2023-07-26T14:15:02+00:00,7595165,1014,5b,80.14,TPH.RH,%,arc1045,C
1,2023-07-26T14:15:05+00:00,7595073,1011,5b,75.54,TPH.RH,%,arc1055,C
2,2023-07-26T14:15:05+00:00,7595078,1011,5c,1028.80,TPH.PRESSURE,hPa,arc1055,C
3,2023-07-26T14:15:05+00:00,7595112,1012,5a,8.72,TPH.TEMP,C,arc1048,C
4,2023-07-26T14:15:07+00:00,7595204,1016,5a,9.35,TPH.TEMP,C,arc1049,C
...,...,...,...,...,...,...,...,...,...
95,2023-07-26T16:00:00+00:00,7595503,1012,0b.EPA-1h.NOPK.EPA-24h,28.90,PM10-EPA-1h-NOPK-EPA-24h,ug/m^3,arc1048,C
96,2023-07-26T16:00:00+00:00,7595518,1012,5a.EPA-1h,8.53,TPH.TEMP-EPA-1h,C,arc1048,C
97,2023-07-26T16:00:00+00:00,7595537,1014,0a.EPA-1h.NOPK,17.90,PM2.5-EPA-1h-NOPK,ug/m^3,arc1045,C
98,2023-07-26T16:00:00+00:00,7595538,1014,0a.EPA-1h.NOPK.EPA-24h,11.20,PM2.5-EPA-1h-NOPK-EPA-24h,ug/m^3,arc1045,C


In [73]:
# Use links & iterate calls

def get_data(base, data_url, offset = 0):
    
    filters = f'records?limit={100}&offset={offset}&timezone=UTC'
    url = f'{base}{data_url}/{filters}'
    result = requests.get(url)
    result_json = result.json()
    max_results = result_json['total_count']
    links = result_json['links']
    records = result_json['records']
    records_df = pd.json_normalize(records)
    
    # fix columns names
    records_df.drop(columns=['links'],inplace=True)
    column_names = records_df.columns.values.tolist()
    
    column_names = [i.split('.')[-1] for i in column_names]
    records_df.columns = column_names
    
    next_url = None
    
    # get next url
    if records_df.shape[0] != max_results:
        for l in links:
            if l['rel']=='next':
                next_url = l['href']
                
    return [records_df, next_url, column_names]

In [74]:
def add_to_dataframe(dataframe, new_data):
    df = pd.concat([dataframe, new_data])
    return df

In [75]:
def microclimate_sensor_readings():
    df_new, url_next, column_names = get_data(base_url, sensor_dataset)
    new_ped_df = pd.DataFrame(columns=column_names)
    while url_next != None:
        df_new, url_next, column_names = get_data(base_url, sensor_dataset, new_ped_df.shape[0])
        new_ped_df = add_to_dataframe(new_ped_df, df_new)
        if new_ped_df.shape[0] == 9900:
            break
    return new_ped_df

In [76]:
df = microclimate_sensor_readings()

In [77]:
df

Unnamed: 0,id,timestamp,size,local_time,id.1,site_id,sensor_id,value,type,units,gatewayhub_id,site_status
0,9f9b74b79e33f6ac7eaa6a99bd13c875dc0efa6c,2023-07-27T00:21:56.081Z,80,2023-07-26T14:15:02+00:00,7595165,1014,5b,80.14,TPH.RH,%,arc1045,C
1,086f643d075f06e64b4d73af7a7ee6bc0f3242da,2023-07-27T00:21:56.081Z,80,2023-07-26T14:15:05+00:00,7595073,1011,5b,75.54,TPH.RH,%,arc1055,C
2,04bd749003363e5644a9909f4b564a14c45796eb,2023-07-27T00:21:56.081Z,89,2023-07-26T14:15:05+00:00,7595078,1011,5c,1028.80,TPH.PRESSURE,hPa,arc1055,C
3,b0f2dd975d9242c4b8188fdd77bb0c7e12123bb6,2023-07-27T00:21:56.081Z,81,2023-07-26T14:15:05+00:00,7595112,1012,5a,8.72,TPH.TEMP,C,arc1048,C
4,b6369e51c1f97a99371028d96f8f81acff8da909,2023-07-27T00:21:56.081Z,81,2023-07-26T14:15:07+00:00,7595204,1016,5a,9.35,TPH.TEMP,C,arc1049,C
...,...,...,...,...,...,...,...,...,...,...,...,...
35,411dc05555407afc7bc809a2dae67015fe4a1f92,2023-07-27T00:21:56.081Z,81,2023-07-26T23:00:02+00:00,7596665,1011,5a,9.33,TPH.TEMP,C,arc1055,C
36,bc91f282d25e6de4fb5d4799c5353c56cc3268e9,2023-07-27T00:21:56.081Z,79,2023-07-26T23:00:02+00:00,7596670,1011,5b,75.80,TPH.RH,%,arc1055,C
37,fbd3928605048c75562d6fd7765bf16eaf7db33c,2023-07-27T00:21:56.081Z,89,2023-07-26T23:00:02+00:00,7596767,1014,5c,1028.20,TPH.PRESSURE,hPa,arc1045,C
38,a6f0feed93618a9aba4b6164c884c478c4996b9a,2023-07-27T00:21:56.081Z,83,2023-07-26T23:00:11+00:00,7596625,1009,0b,23.60,PM10,ug/m^3,arc1050,C


In [78]:
df['local_time'] = pd.to_datetime(df['local_time'] )
df['local_time'].dtypes

datetime64[ns, UTC]

In [79]:
df = df.loc[df['type'] == 'PM10-EPA-1h-NOPK-EPA-24h']
df.shape

(31, 12)

In [81]:
site_1012_df = df.loc[df['site_id'] == 1012]
site_1012_df = site_1012_df.sort_values(by='local_time')
site_1012_df

Unnamed: 0,id,timestamp,size,local_time,id.1,site_id,sensor_id,value,type,units,gatewayhub_id,site_status
36,dd8d29d84cd71b12369eb7001fa277e7782418d5,2023-07-27T00:21:56.081Z,123,2023-07-26 15:00:00+00:00,7595301,1012,0b.EPA-1h.NOPK.EPA-24h,28.7,PM10-EPA-1h-NOPK-EPA-24h,ug/m^3,arc1048,C
95,68a3cb9f288e8085b5669ff55ee63217240c9ba7,2023-07-27T00:21:56.081Z,123,2023-07-26 16:00:00+00:00,7595503,1012,0b.EPA-1h.NOPK.EPA-24h,28.9,PM10-EPA-1h-NOPK-EPA-24h,ug/m^3,arc1048,C
86,64bf776c4fe62d2c8143feaa5d108b4350b1d9db,2023-07-27T00:21:56.081Z,123,2023-07-26 17:00:00+00:00,7595703,1012,0b.EPA-1h.NOPK.EPA-24h,28.9,PM10-EPA-1h-NOPK-EPA-24h,ug/m^3,arc1048,C
61,e9e1b7c36474d490a8e362c771c50eef019ff124,2023-07-27T00:21:56.081Z,123,2023-07-26 18:00:00+00:00,7595905,1012,0b.EPA-1h.NOPK.EPA-24h,29.1,PM10-EPA-1h-NOPK-EPA-24h,ug/m^3,arc1048,C
5,e586d65b4bef5aca657f83f058356719f6cec01c,2023-07-27T00:21:56.081Z,123,2023-07-26 19:00:00+00:00,7596057,1012,0b.EPA-1h.NOPK.EPA-24h,29.4,PM10-EPA-1h-NOPK-EPA-24h,ug/m^3,arc1048,C
40,9a71251df3c4107d0fdfa50e2a887ff8257ec05a,2023-07-27T00:21:56.081Z,123,2023-07-26 20:00:00+00:00,7596294,1012,0b.EPA-1h.NOPK.EPA-24h,29.5,PM10-EPA-1h-NOPK-EPA-24h,ug/m^3,arc1048,C
20,d900eb18d821a4c7bc897cd39fb60b17d8a75411,2023-07-27T00:21:56.081Z,123,2023-07-26 21:00:00+00:00,7596494,1012,0b.EPA-1h.NOPK.EPA-24h,29.5,PM10-EPA-1h-NOPK-EPA-24h,ug/m^3,arc1048,C
85,67005b90520d1a8b09382664dc068131c9e49332,2023-07-27T00:21:56.081Z,123,2023-07-26 22:00:00+00:00,7596695,1012,0b.EPA-1h.NOPK.EPA-24h,29.0,PM10-EPA-1h-NOPK-EPA-24h,ug/m^3,arc1048,C


In [82]:
site_1012_df = site_1012_df[['local_time', 'value']]
site_1012_df

Unnamed: 0,local_time,value
36,2023-07-26 15:00:00+00:00,28.7
95,2023-07-26 16:00:00+00:00,28.9
86,2023-07-26 17:00:00+00:00,28.9
61,2023-07-26 18:00:00+00:00,29.1
5,2023-07-26 19:00:00+00:00,29.4
40,2023-07-26 20:00:00+00:00,29.5
20,2023-07-26 21:00:00+00:00,29.5
85,2023-07-26 22:00:00+00:00,29.0


In [None]:
#plot this as a line graph