In [2]:
import requests
import numpy as np
import pandas as pd

# Function to fetch data based on distance, latitude, and longitude
def fetch_safecast_data(latitude, longitude, date, interval):
    # URL of the JSON API with dynamic variables
    columns = ['id', 'user_id', 'value', 'unit', 'location_name', 'device_id', 'original_id', 'measurement_import_id', 'captured_at', 'height', 'devicetype_id', 'sensor_id', 'station_id', 'channel_id', 'latitude', 'longitude']
    end_date = date + pd.Timedelta(days=interval)
    hours = np.arange(0, 24, 3)
    batch = pd.DataFrame(columns=columns)
    for i in range(0, len(hours)-2):
        url = f"https://api.safecast.org/en-US/measurements?captured_after={date.year}%2F{date.month}%2F{date.day}+{hours[i]}%3A00%3A00&captured_before={date.year}%2F{date.month}%2F{date.day}+{hours[i+1]}%3A00%3A00&format=json&latitude={latitude}&longitude={longitude}"

        response = requests.get(url)


        # Check if the request was successful (status code 200)
        if response.status_code == 200:
            # Parse JSON response
            data = response.json()
            batch = pd.concat([batch, pd.DataFrame(data)], ignore_index=True)
        else:
            print("Failed to fetch data:", response.status_code)
            continue
    avg_batch= batch[['value', 'unit', 'location_name', 'captured_at', 'device_id', 'height', 'devicetype_id', 'station_id', 'latitude', 'longitude']].groupby(['latitude', 'longitude', 'unit']).agg({
    'value': 'mean',
    'location_name': 'first',
    'captured_at' : 'first',
    'device_id': 'first', 
    'height': 'first', 
    'devicetype_id': 'first', 
    'station_id': 'first'
}).reset_index()

    return avg_batch
    
def fetch_all(latitude_range, longitude_range, date_range, interval):
    columns = ['value', 'unit', 'location_name', 'captured_at', 'device_id', 'height', 'devicetype_id', 'station_id', 'latitude', 'longitude']
    result = pd.DataFrame(columns=columns)

    latitude = np.linspace(latitude_range[0], latitude_range[1], interval)
    longitude = np.linspace(longitude_range[0], longitude_range[1], interval)
    dates = pd.date_range(start=date_range[0], end=date_range[0], periods=(date_range[1] - date_range[0]).days)
    
    for date in dates:
        for lat in latitude:
            for long in longitude:
                batch = pd.DataFrame(fetch_safecast_data(lat, long, date, 1))
                result = pd.concat([result, batch], ignore_index=True)
    return result


In [3]:
result = fetch_all([34, 35], [134, 135], [pd.Timestamp('2024-03-20'), pd.Timestamp('2024-03-21')], 2)

In [4]:
print(result)

         value         unit                       location_name  \
0    19.500000          cpm                                None   
1    19.100000       status                                None   
2    14.666667          cpm                          Phoenix,AZ   
3    26.000000          cpm  Earl's House, Johns Creek, GA, USA   
4     0.172000          usv  Earl's House, Johns Creek, GA, USA   
..         ...          ...                                 ...   
447  19.000000          cpm                     Bad Pyrmont, DE   
448  17.333333          cpm                       Waterland, NL   
449  17.000000          cpm                          Berlin, DE   
450  10.650000   PM10 ug/m3                                None   
451   4.450000  PM2.5 ug/m3                                None   

                  captured_at  device_id  height  \
0    2024-03-20T03:00:03.000Z   100221.0    13.0   
1    2024-03-20T03:00:05.000Z   100229.0    13.0   
2    2024-03-20T03:00:35.000Z     4841.