In [1]:
import requests
import pandas as pd
import os
from datetime import datetime, timedelta
from tqdm.notebook import tqdm  # Use tqdm from the notebook module
import asyncio
import aiohttp

In [2]:
def get_data(date_time, data_set_name):
    base_url = f"https://api-open.data.gov.sg/v2/real-time/api/{data_set_name}?date={date_time}"
    all_data = []

    while True:
        response = requests.get(base_url)
        data = response.json().get('data', {})
        readings = data.get('readings', [])

        if readings:
            for reading in readings:
                reading_data = reading.get('data', [])
                for entry in reading_data:
                    entry['Timestamp'] = reading.get('timestamp')  # Add Timestamp for each entry
                    all_data.append(entry)
        else:
            break  # No readings, exit loop

        # Check for paginationToken and update URL
        pagination_token = data.get('paginationToken')
        if pagination_token:
            base_url = f"https://api-open.data.gov.sg/v2/real-time/api/{data_set_name}?date={date_time}&paginationToken={pagination_token}"
        else:
            break  # No more pages
    df = pd.DataFrame(all_data)
    # Concatenate all pivot tables
    if not df.empty:
        pivot_table = df.pivot_table(index='Timestamp', columns='stationId', values='value', aggfunc='first')
        return pivot_table
    else:
        return pd.DataFrame()

In [3]:
# Example usage
df = get_data('2020-04-20',"rainfall")
df.head()

stationId,S07,S08,S100,S104,S106,S107,S108,S109,S11,S111,...,S81,S82,S84,S88,S89,S90,S900,S91,S92,S94
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-04-20T00:00:00+08:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-04-20T00:05:00+08:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-04-20T00:10:00+08:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-04-20T00:15:00+08:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-04-20T00:20:00+08:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
async def get_data_async(date_time, data_set_name, session):
    base_url = f"https://api-open.data.gov.sg/v2/real-time/api/{data_set_name}?date={date_time}"
    all_data = []

    while True:
        # Asynchronously fetch the data
        async with session.get(base_url) as response:
            data = await response.json()
            readings = data.get('data', {}).get('readings', [])

            if readings:
                for reading in readings:
                    reading_data = reading.get('data', [])
                    for entry in reading_data:
                        entry['Timestamp'] = reading.get('timestamp')  # Add Timestamp for each entry
                        all_data.append(entry)
            else:
                break  # No readings, exit loop

            # Check for paginationToken and update URL for next request
            pagination_token = data.get('data', {}).get('paginationToken')
            if pagination_token:
                base_url = f"https://api-open.data.gov.sg/v2/real-time/api/{data_set_name}?date={date_time}&paginationToken={pagination_token}"
            else:
                break  # No more pages

    # Convert the collected data into a DataFrame
    df = pd.DataFrame(all_data)

    # Pivot table if data is available
    if not df.empty:
        pivot_table = df.pivot_table(index='Timestamp', columns='stationId', values='value', aggfunc='first')
        return pivot_table
    else:
        return pd.DataFrame()  # Return empty DataFrame if no data

async def download_range(start_date, end_date, data_set_name):
    save_dir = f'../data/data_gov_sg/{data_set_name}_data'
    os.makedirs(save_dir, exist_ok=True)

    start_dt = datetime.strptime(start_date, "%Y-%m-%d")
    end_dt = datetime.strptime(end_date, "%Y-%m-%d")
    current_dt = start_dt
    all_data = []
    current_month = None

    total_days = (end_dt - start_dt).days + 1  # Make sure to include the last day
    async with aiohttp.ClientSession() as session:  # Initialize aiohttp session
        with tqdm(total=total_days) as pbar:
            while current_dt <= end_dt:
                timestamp_str = current_dt.strftime("%Y-%m-%d")

                # Save and reset at month boundary
                next_month = current_dt.strftime("%Y-%m")
                if current_month and next_month != current_month:
                    combined = pd.concat(all_data)
                    combined.to_csv(f"{save_dir}/{data_set_name}_{current_month}.csv")
                    all_data = []
                current_month = next_month
                try:
                    df = await get_data_async(timestamp_str, data_set_name, session)
                    if not df.empty:
                        all_data.append(df)
                except Exception as e:
                    print("Error",timestamp_str,e,"Retrying...")
                    await asyncio.sleep(5)
                    try:
                        df = await get_data_async(timestamp_str, data_set_name, session)
                        if not df.empty:
                            all_data.append(df)
                    except Exception as e2:
                        print("Failed",timestamp_str,e2)
                        await asyncio.sleep(8)
                current_dt += timedelta(days=1)  # Increment day by 1
                pbar.update(1)  # Update progress bar

                await asyncio.sleep(2)  # Introduce 2-second delay between requests

        # Save remaining data after final loop
        if all_data:
            combined = pd.concat(all_data)
            combined.to_csv(f"{save_dir}/{data_set_name}_{current_month}.csv")


In [5]:
await download_range("2020-04-20", "2020-05-31", "rainfall")

  0%|          | 0/42 [00:00<?, ?it/s]

In [None]:
await download_range("2020-06-01", "2020-12-31", "rainfall")

  0%|          | 0/214 [00:00<?, ?it/s]

Error 'NoneType' object has no attribute 'get' Retrying...
Failed 'NoneType' object has no attribute 'get'
Error 'NoneType' object has no attribute 'get' Retrying...
Failed 'NoneType' object has no attribute 'get'
Error 'NoneType' object has no attribute 'get' Retrying...


In [11]:
await download_range("2021-01-01", "2021-12-31", "rainfall")

  0%|          | 0/365 [00:00<?, ?it/s]

Error 2021-01-02 'NoneType' object has no attribute 'get' Retrying...
Failed 2021-01-02 'NoneType' object has no attribute 'get'
Error 2021-01-03 'NoneType' object has no attribute 'get' Retrying...
Failed 2021-01-03 'NoneType' object has no attribute 'get'
Error 2021-01-04 'NoneType' object has no attribute 'get' Retrying...
Failed 2021-01-04 'NoneType' object has no attribute 'get'


In [12]:
await download_range("2022-01-01", "2022-12-31", "rainfall")

  0%|          | 0/365 [00:00<?, ?it/s]

In [13]:
await download_range("2023-01-01", "2023-12-31", "rainfall")

  0%|          | 0/365 [00:00<?, ?it/s]

Error 2023-01-02 'NoneType' object has no attribute 'get' Retrying...
Failed 2023-01-02 'NoneType' object has no attribute 'get'
Error 2023-01-03 'NoneType' object has no attribute 'get' Retrying...
Failed 2023-01-03 'NoneType' object has no attribute 'get'
Error 2023-01-04 'NoneType' object has no attribute 'get' Retrying...
Failed 2023-01-04 'NoneType' object has no attribute 'get'
Error 2023-01-05 'NoneType' object has no attribute 'get' Retrying...
Failed 2023-01-05 'NoneType' object has no attribute 'get'
Error 2023-01-06 'NoneType' object has no attribute 'get' Retrying...
Failed 2023-01-06 'NoneType' object has no attribute 'get'
Error 2023-01-07 'NoneType' object has no attribute 'get' Retrying...
Failed 2023-01-07 'NoneType' object has no attribute 'get'
Error 2023-01-08 'NoneType' object has no attribute 'get' Retrying...
Failed 2023-01-08 'NoneType' object has no attribute 'get'
Error 2023-01-09 'NoneType' object has no attribute 'get' Retrying...
Failed 2023-01-09 'NoneType

In [14]:
await download_range("2024-01-01", "2024-12-31", "rainfall")

  0%|          | 0/366 [00:00<?, ?it/s]

In [15]:
await download_range("2025-01-01", "2025-02-17", "rainfall")

  0%|          | 0/48 [00:00<?, ?it/s]

In [16]:
await download_range("2020-04-01", "2020-12-31", "relative-humidity")

  0%|          | 0/275 [00:00<?, ?it/s]

Error 2020-06-09 'NoneType' object has no attribute 'get' Retrying...
Failed 2020-06-09 'NoneType' object has no attribute 'get'
Error 2020-06-10 'NoneType' object has no attribute 'get' Retrying...
Failed 2020-06-10 'NoneType' object has no attribute 'get'


In [17]:
await download_range("2021-01-01", "2021-12-31", "relative-humidity")

  0%|          | 0/365 [00:00<?, ?it/s]

Error 2021-01-02 'NoneType' object has no attribute 'get' Retrying...
Failed 2021-01-02 'NoneType' object has no attribute 'get'
Error 2021-01-03 'NoneType' object has no attribute 'get' Retrying...
Failed 2021-01-03 'NoneType' object has no attribute 'get'
Error 2021-01-04 'NoneType' object has no attribute 'get' Retrying...
Failed 2021-01-04 'NoneType' object has no attribute 'get'


In [19]:
await download_range("2022-01-01", "2022-12-31", "relative-humidity")

  0%|          | 0/365 [00:00<?, ?it/s]

In [20]:
await download_range("2023-01-01", "2023-12-31", "relative-humidity")

  0%|          | 0/365 [00:00<?, ?it/s]

Error 2023-01-02 'NoneType' object has no attribute 'get' Retrying...
Failed 2023-01-02 'NoneType' object has no attribute 'get'
Error 2023-01-03 'NoneType' object has no attribute 'get' Retrying...
Failed 2023-01-03 'NoneType' object has no attribute 'get'
Error 2023-01-04 'NoneType' object has no attribute 'get' Retrying...
Failed 2023-01-04 'NoneType' object has no attribute 'get'
Error 2023-01-05 'NoneType' object has no attribute 'get' Retrying...
Failed 2023-01-05 'NoneType' object has no attribute 'get'
Error 2023-01-06 'NoneType' object has no attribute 'get' Retrying...
Failed 2023-01-06 'NoneType' object has no attribute 'get'
Error 2023-01-07 'NoneType' object has no attribute 'get' Retrying...
Failed 2023-01-07 'NoneType' object has no attribute 'get'
Error 2023-01-08 'NoneType' object has no attribute 'get' Retrying...
Failed 2023-01-08 'NoneType' object has no attribute 'get'
Error 2023-01-09 'NoneType' object has no attribute 'get' Retrying...
Failed 2023-01-09 'NoneType

In [21]:
await download_range("2024-01-01", "2024-12-31", "relative-humidity")

  0%|          | 0/366 [00:00<?, ?it/s]

Error 2024-03-15  Retrying...
Error 2024-12-02 'NoneType' object has no attribute 'get' Retrying...


In [22]:
await download_range("2025-01-01", "2025-04-17", "relative-humidity")

  0%|          | 0/107 [00:00<?, ?it/s]

Error 2025-04-08 'NoneType' object has no attribute 'get' Retrying...
Failed 2025-04-08 'NoneType' object has no attribute 'get'
Error 2025-04-09 'NoneType' object has no attribute 'get' Retrying...
Failed 2025-04-09 'NoneType' object has no attribute 'get'
Error 2025-04-10 'NoneType' object has no attribute 'get' Retrying...
Failed 2025-04-10 'NoneType' object has no attribute 'get'
Error 2025-04-11 'NoneType' object has no attribute 'get' Retrying...
Failed 2025-04-11 'NoneType' object has no attribute 'get'
Error 2025-04-12 'NoneType' object has no attribute 'get' Retrying...
Failed 2025-04-12 'NoneType' object has no attribute 'get'
Error 2025-04-13 'NoneType' object has no attribute 'get' Retrying...
Failed 2025-04-13 'NoneType' object has no attribute 'get'
Error 2025-04-14 'NoneType' object has no attribute 'get' Retrying...
Failed 2025-04-14 'NoneType' object has no attribute 'get'
Error 2025-04-15 'NoneType' object has no attribute 'get' Retrying...
Failed 2025-04-15 'NoneType

In [None]:
await download_range("2020-04-01", "2020-12-31", "wind-speed")

In [None]:
await download_range("2021-01-01", "2021-12-31", "wind-speed")

In [None]:
await download_range("2022-01-01", "2022-12-31", "wind-speed")

In [None]:
await download_range("2023-01-01", "2023-12-31", "wind-speed")

In [None]:
await download_range("2024-01-01", "2024-12-31", "wind-speed")

In [None]:
await download_range("2025-01-01", "2025-02-17", "wind-speed")

In [34]:
await download_range("2020-04-01", "2020-12-31", "air-temperature")

  0%|          | 0/275 [00:00<?, ?it/s]

Error 2020-06-09 'NoneType' object has no attribute 'get' Retrying...
Failed 2020-06-09 'NoneType' object has no attribute 'get'
Error 2020-06-10 'NoneType' object has no attribute 'get' Retrying...
Failed 2020-06-10 'NoneType' object has no attribute 'get'


In [33]:
await download_range("2021-01-01", "2021-12-31", "air-temperature")

  0%|          | 0/365 [00:00<?, ?it/s]

Error 2021-01-02 'NoneType' object has no attribute 'get' Retrying...
Failed 2021-01-02 'NoneType' object has no attribute 'get'
Error 2021-01-03 'NoneType' object has no attribute 'get' Retrying...
Failed 2021-01-03 'NoneType' object has no attribute 'get'
Error 2021-01-04 'NoneType' object has no attribute 'get' Retrying...
Failed 2021-01-04 'NoneType' object has no attribute 'get'


In [32]:
await download_range("2022-01-01", "2022-12-31", "air-temperature")

  0%|          | 0/365 [00:00<?, ?it/s]

Error 2022-06-01 'NoneType' object has no attribute 'get' Retrying...


In [31]:
await download_range("2023-01-01", "2023-12-31", "air-temperature")

  0%|          | 0/365 [00:00<?, ?it/s]

Error 2023-01-02 'NoneType' object has no attribute 'get' Retrying...
Failed 2023-01-02 'NoneType' object has no attribute 'get'
Error 2023-01-03 'NoneType' object has no attribute 'get' Retrying...
Failed 2023-01-03 'NoneType' object has no attribute 'get'
Error 2023-01-04 'NoneType' object has no attribute 'get' Retrying...
Failed 2023-01-04 'NoneType' object has no attribute 'get'
Error 2023-01-05 'NoneType' object has no attribute 'get' Retrying...
Failed 2023-01-05 'NoneType' object has no attribute 'get'
Error 2023-01-06 'NoneType' object has no attribute 'get' Retrying...
Failed 2023-01-06 'NoneType' object has no attribute 'get'
Error 2023-01-07 'NoneType' object has no attribute 'get' Retrying...
Failed 2023-01-07 'NoneType' object has no attribute 'get'
Error 2023-01-08 'NoneType' object has no attribute 'get' Retrying...
Failed 2023-01-08 'NoneType' object has no attribute 'get'
Error 2023-01-09 'NoneType' object has no attribute 'get' Retrying...
Failed 2023-01-09 'NoneType

In [30]:
await download_range("2024-01-01", "2024-12-31", "air-temperature")

  0%|          | 0/366 [00:00<?, ?it/s]

Error 2024-04-09  Retrying...


In [29]:
await download_range("2025-01-01", "2025-02-17", "air-temperature")

  0%|          | 0/48 [00:00<?, ?it/s]

In [23]:
await download_range("2020-04-01", "2020-12-31", "wind-direction")

  0%|          | 0/275 [00:00<?, ?it/s]

Error 2020-09-03  Retrying...


In [24]:
await download_range("2021-01-01", "2021-12-31", "wind-direction")

  0%|          | 0/365 [00:00<?, ?it/s]

Error 2021-01-02 'NoneType' object has no attribute 'get' Retrying...
Failed 2021-01-02 'NoneType' object has no attribute 'get'
Error 2021-01-03 'NoneType' object has no attribute 'get' Retrying...
Failed 2021-01-03 'NoneType' object has no attribute 'get'
Error 2021-01-04 'NoneType' object has no attribute 'get' Retrying...
Failed 2021-01-04 'NoneType' object has no attribute 'get'
Error 2021-04-30 'NoneType' object has no attribute 'get' Retrying...
Failed 2021-04-30 'NoneType' object has no attribute 'get'
Error 2021-05-01 'NoneType' object has no attribute 'get' Retrying...
Failed 2021-05-01 'NoneType' object has no attribute 'get'
Error 2021-05-02 'NoneType' object has no attribute 'get' Retrying...
Failed 2021-05-02 'NoneType' object has no attribute 'get'
Error 2021-05-03 'NoneType' object has no attribute 'get' Retrying...
Failed 2021-05-03 'NoneType' object has no attribute 'get'
Error 2021-05-04 'NoneType' object has no attribute 'get' Retrying...
Failed 2021-05-04 'NoneType

In [25]:
await download_range("2022-01-01", "2022-12-31", "wind-direction")

  0%|          | 0/365 [00:00<?, ?it/s]

Error 2022-04-23  Retrying...
Error 2022-08-24 'NoneType' object has no attribute 'get' Retrying...


In [26]:
await download_range("2023-01-01", "2023-12-31", "wind-direction")

  0%|          | 0/365 [00:00<?, ?it/s]

Error 2023-01-02 'NoneType' object has no attribute 'get' Retrying...
Failed 2023-01-02 'NoneType' object has no attribute 'get'
Error 2023-01-03 'NoneType' object has no attribute 'get' Retrying...
Failed 2023-01-03 'NoneType' object has no attribute 'get'
Error 2023-01-04 'NoneType' object has no attribute 'get' Retrying...
Failed 2023-01-04 'NoneType' object has no attribute 'get'
Error 2023-01-05 'NoneType' object has no attribute 'get' Retrying...
Failed 2023-01-05 'NoneType' object has no attribute 'get'
Error 2023-01-06 'NoneType' object has no attribute 'get' Retrying...
Failed 2023-01-06 'NoneType' object has no attribute 'get'
Error 2023-01-07 'NoneType' object has no attribute 'get' Retrying...
Failed 2023-01-07 'NoneType' object has no attribute 'get'
Error 2023-01-08 'NoneType' object has no attribute 'get' Retrying...
Failed 2023-01-08 'NoneType' object has no attribute 'get'
Error 2023-01-09 'NoneType' object has no attribute 'get' Retrying...
Failed 2023-01-09 'NoneType

In [27]:
await download_range("2024-01-01", "2024-12-31", "wind-direction")

  0%|          | 0/366 [00:00<?, ?it/s]

In [28]:
await download_range("2025-01-01", "2025-02-17", "wind-direction")

  0%|          | 0/48 [00:00<?, ?it/s]