In [None]:
import os
from dotenv import load_dotenv
from requests.auth import HTTPBasicAuth
import requests
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon
import contextily as ctx
from matplotlib import pyplot as plt
import h3
import calendar

load_dotenv()

APIKEY = os.getenv('APIKEY')

In [None]:
fetch_data = False
bbox = (46.4, 6.5, 46.6, 6.8) # Example bounding box for Lausanne area

In [None]:
def get_monthly_data(year, month, bbox, filter_name, api_key):
    """
    Fetches data from the Sparrow API for a specific month and filter within a bounding box.
    
    Args:
        year (int): The year (e.g., 2024).
        month (int): The month (1-12).
        bbox (tuple): A tuple containing (start_lat, start_lon, end_lat, end_lon).
        filter_name (str): The specific filter to query (e.g., 'co2').
        api_key (str): The API key.
        
    Returns:
        pd.DataFrame: A DataFrame containing the fetched data.
    """
    url = 'https://api.sparrow.city/get'
    headers = {'Accept': 'application/json'}
    
    # Calculate the last day of the specific month
    _, last_day = calendar.monthrange(year, month)
    
    # Format start and end dates based on API requirements
    # Ensure month and day are zero-padded
    start_date = f"{year}-{month:02d}-01T00:00:00"
    end_date = f"{year}-{month:02d}-{last_day:02d}T23:59:59"
    
    start_lat, start_lon, end_lat, end_lon = bbox
    
    params = {
        'filter': filter_name,
        'start_date': start_date,
        'end_date': end_date,
        'start_lat': start_lat,
        'start_lon': start_lon,
        'end_lat': end_lat,
        'end_lon': end_lon,
        'api_key': api_key
    }
    
    print(f"Fetching {filter_name} data for {start_date} to {end_date}...")
    
    try:
        r = requests.get(url, headers=headers, params=params)
        r.raise_for_status() # Raise error for bad status codes
        data = r.json()
        
        if 'body' in data and data['body']:
            df = pd.DataFrame(data['body'])
            df['filter'] = filter_name
            return df
        else:
            print("No data found or empty body.")
            return pd.DataFrame()
            
    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
        return pd.DataFrame()

In [None]:
# Loop over the 12 months of 2025 and combine the entire dataset into a single DataFrame

if fetch_data:
  all_data = []
  for month in range(1, 13):
      df_month = get_monthly_data(2024, month, bbox, 'co2', APIKEY)
      all_data.append(df_month)
      
  all_data = pd.concat(all_data, ignore_index=True)
  all_data.to_csv('sparrow_co2_2024.csv', index=False)

In [None]:
if not fetch_data:
    all_data = pd.read_csv('sparrow_co2_2024.csv')

In [None]:
url = 'https://api.sparrow.city/get'
headers = {'Accept': 'application/json'}

# so2 is not available in the data
filters = ['pm1', 'pm25', 'pm10', 'co2', 'no2', 'o3', 'temperature', 'humidity', 'pressure', 'iri', 'bumps']
start_date = '2024-03-01T00:00:00'
end_date = '2024-04-30T23:59:59'
start_lat = 46.4
start_lon = 6.5
end_lat = 46.6
end_lon = 6.8

dfs = []

for filter in filters:
    params = {
        'filter': filter,
        'start_date': start_date,
        'end_date': end_date,
        'start_lat': start_lat,
        'start_lon': start_lon,
        'end_lat': end_lat,
        'end_lon': end_lon,
        'api_key': APIKEY
    }
    r = requests.get(url, headers=headers, params=params)
    data = r.json()
    df = pd.DataFrame(data['body'])
    df['filter'] = filter
    dfs.append(df)

# Returns a dataframe wth the following columns:
# i: id of the measurement
# t: timestamp of the measurement, in Unix time format
# n: name of the measurement node
# x: longitude of the measurement
# y: latitude of the measurement
# s: node detected speed (km/h)
# a: node detected altitude over sea level (m)
# v: value of the measurement 

combined_df = pd.concat(dfs, ignore_index=True)
combined_df.to_csv('data.csv', index=False)

In [None]:
r.text

In [None]:
combined_df.head()

In [None]:
# Convert to geodataframe based on the x and y columns
geometry = [Point(xy) for xy in zip(combined_df['x'], combined_df['y'])]
gdf = gpd.GeoDataFrame(combined_df, geometry=geometry)
gdf.crs = "EPSG:4326"
# gdf.to_file('data.geojson', driver='GeoJSON')

In [None]:
gdf[gdf['filter'] == 'co2'].plot(column='filter', categorical=True, legend=True)

In [None]:
ax = gdf[gdf['filter'] == 'co2'].to_crs(epsg=3857).plot(figsize=(10, 10), color="red", alpha=0.5)
ctx.add_basemap(ax, source=ctx.providers.OpenStreetMap.CH)
# routes.to_crs(epsg=3857).plot(ax=ax, legend=True, column="network")

plt.title("Transit Routes in Geneva Area")
plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.show()

In [None]:
# Use the H3 library to create hexagonal bins of the data, and then plot the average value of each filter in each bin


def create_hex_bins(gdf, resolution=8):
    """Create hexagonal bins for a GeoDataFrame."""
    hex_bins = []
    for idx, row in gdf.iterrows():
        # Convert coordinates to H3 index
        h3_index = h3.latlng_to_cell(row.geometry.x, row.geometry.y, resolution)
        hex_bins.append(h3_index)
    return hex_bins

# Create hex bins for each point in the GeoDataFrame
gdf['hex_bin'] = create_hex_bins(gdf)

In [None]:
hex_avg = gdf.groupby(['hex_bin', 'filter'])['v'].mean().reset_index()

In [None]:
hex_avg

In [None]:
h3.cell_to_boundary('887a888325fffff')

In [None]:
# For each hexagon, recreate the geometry of the hexagon based on the H3 index, and create a new GeoDataFrame with the hexagon geometries and the average values
h3_geo=gpd.GeoDataFrame(data=hex_avg, geometry = hex_avg.apply(lambda x: Polygon(h3.cell_to_boundary(x.hex_bin)),axis=1),crs=4326)
# hex_avg['geometry'] = hex_avg['hex_bin'].apply(lambda x: h3.cell_to_polygon(x, geo_json=True))
# hex_gdf = gpd.GeoDataFrame(hex_avg, geometry='geometry')
# hex_gdf.crs = "EPSG:4326"
# hex_gdf.to_file('hex_data.geojson', driver='GeoJSON')

In [None]:
h3_geo.head()

In [None]:
ax=h3_geo[h3_geo['filter'] == 'co2'].to_crs(epsg=3857).plot(column='v',figsize=(10, 10),alpha=0.6)
ctx.add_basemap(ax, source=ctx.providers.OpenStreetMap.CH)
# routes.to_crs(epsg=3857).plot(ax=ax, legend=True, column="network")

plt.title("Transit Routes in Geneva Area")
plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.show()

In [None]:
hex_avg

In [None]:
# plot the h3 tiling of the data, colored by the average value of each filter in each bin
hex_avg.pivot(index='hex_bin', columns='filter', values='v').plot(kind='bar', figsize=(10, 5))
plt.title("Average Value of Each Filter in Hexagonal Bins")
plt.xlabel("Hexagonal Bin")
plt.ylabel("Average Value")
plt.show()

In [None]:
# Plot average value for each filter in each hex bin
hex_avg.pivot(index='hex_bin', columns='filter', values='v').plot(kind='bar', figsize=(10, 5))
plt.title("Average Value of Each Filter in Hexagonal Bins")
plt.xlabel("Hexagonal Bin")
plt.ylabel("Average Value")
plt.show()