 ## Note: this notebook is for processing and visualising data already downlaoded from open-meteo API. See POC-weather-eos-collect.ipynb for the data download aprt.

In [1]:
%pip install pytz -q # for timezone aware datetimes

Note: you may need to restart the kernel to use updated packages.


In [2]:
# OS packages
import os
import json
import requests
from datetime import datetime, time, timedelta
import pytz

# open-meteo
import openmeteo_requests

# Data handling
import pandas as pd
import numpy as np
import xarray as xr


# Geospatial
import geopandas as gpd
import rasterio
import rioxarray


# Visualisation
from IPython.display import display, JSON
import holoviews as hv
from holoviews import opts
import hvplot.pandas
import panel as pn

hv.extension('bokeh')
pn.extension() # for notebook

## Request parameters and input setup

### Current/ forecast weather
EOS requires a geojsion, start and end date for basic weather API

EOS basic weather API documentation here - https://doc.eos.com/docs/weather/basic-weather-providers/

- aggregate request only needs geojson and gives 3 day forecast out from request date/time
- request without data ggregation takes a start and end date in addition to the geosjon

### Historical Weather
put notes here

In [3]:
input_root_dir = '/workspace/notebooks/sandbox/data/input-data/open-meteo'
output_root_dir = '/workspace/notebooks/sandbox/data/output-data/open-meteo'

In [4]:
# Read in test area and get bounding box and lat/lon
geojson_file_path = '/workspace/notebooks/sandbox/data/input-data/dissolved-boundaries.geojson'
weather_data_file_path = '/workspace/notebooks/sandbox/data/output-data/open-meteo/yarrayarra_bom_data.csv'

hourly_dataframe = pd.read_csv(weather_data_file_path, parse_dates=['date'])

geom = gpd.read_file(geojson_file_path)
gpd_lon = geom.centroid.x[0]
gpd_lat = geom.centroid.y[0]
bbox = list(geom.total_bounds)

hourly_dataframe.head()


  gpd_lon = geom.centroid.x[0]

  gpd_lat = geom.centroid.y[0]


Unnamed: 0,date,temperature_2m,relative_humidity_2m,apparent_temperature,precipitation,rain,showers,surface_pressure,cloud_cover,cloud_cover_low,cloud_cover_mid,cloud_cover_high,visibility,wind_speed_10m,wind_speed_40m,wind_direction_10m,wind_direction_40m,surface_temperature,soil_temperature_0_to_10cm,soil_temperature_10_to_35cm
0,2024-03-15 00:00:00+00:00,23.796,72.0,24.022564,0.0,0.0,0.0,976.8052,22.0,22.0,0.0,0.0,12860.0,19.8,24.84,96.0,95.0,25.946,25.896,28.646
1,2024-03-15 01:00:00+00:00,26.196,64.0,26.818884,0.0,0.0,0.0,976.875,28.0,28.0,0.0,0.0,14320.0,18.359999,23.039999,88.0,89.0,30.396,26.246,28.546
2,2024-03-15 02:00:00+00:00,28.846,57.0,30.108345,0.0,0.0,0.0,976.96747,9.0,0.0,9.0,0.0,15420.0,16.56,19.8,79.0,85.0,35.146,27.246,28.546
3,2024-03-15 03:00:00+00:00,31.146,51.0,34.062515,0.0,0.0,0.0,976.534,23.0,23.0,0.0,0.0,16180.0,13.679999,16.199999,72.0,79.0,39.546,28.646,28.546
4,2024-03-15 04:00:00+00:00,32.596,46.0,37.31477,0.1,0.1,0.0,975.81506,0.0,0.0,0.0,0.0,17020.0,5.4,9.36,270.0,270.0,41.896,30.246,28.546


Calculate daily weather stats (can also be fetched from API but only 10 calls per say so trying to be conservative)

In [5]:
# Assuming 'date' is a datetime column in your DataFrame
hourly_dataframe['date'] = pd.to_datetime(hourly_dataframe['date'])

# Extract the date from the datetime column
hourly_dataframe['day'] = hourly_dataframe['date'].dt.date

# Assuming 'humidity' is a column in your DataFrame
# Create a new DataFrame with the maximum and minimum temperature for each day and average humidity
daily_summary = hourly_dataframe.groupby('day').agg({
    'temperature_2m': ['max', 'min'],
    'soil_temperature_0_to_10cm': ['max', 'min'],
    'relative_humidity_2m': 'mean',
    'precipitation': 'sum',
    'wind_speed_10m': 'median',
    'surface_pressure': 'mean',
    'cloud_cover': 'mean',
    
}).reset_index()

# Rename the columns
daily_summary.columns = ['day', 'temp_air_max', 'temp_air_min', 'temp_land_max', 'temp_land_min','avg_humidity','total_precipitation', 'avg_windspeed', 'avg_surface_pressure', 'avg_cloud_cover']

daily_summary.head()

Unnamed: 0,day,temp_air_max,temp_air_min,temp_land_max,temp_land_min,avg_humidity,total_precipitation,avg_windspeed,avg_surface_pressure,avg_cloud_cover
0,2024-03-15,35.996,23.796,36.646,25.896,47.416667,0.2,10.260001,975.827677,12.791667
1,2024-03-16,39.146,23.346,38.746,27.246,34.708333,0.0,12.96,975.742776,22.208333
2,2024-03-17,37.446,17.896,37.246,23.746,47.833333,0.0,17.64,974.262019,39.0
3,2024-03-18,32.646,17.346,32.146,22.746,48.541667,0.0,22.32,977.47758,11.666667
4,2024-03-19,29.346,16.796,29.396,21.396,52.75,0.0,28.26,983.056508,35.5


### exporting hourly and daily data as json

In [6]:
json_hourly_fname = os.path.join(output_root_dir, 'yarrayarra_bom_hourly.json')
json_daily_fname = os.path.join(output_root_dir, 'yarrayarra_bom_daily.json')


json_hourly_export = hourly_dataframe.to_json(json_hourly_fname, orient='records')
json_daily_export = daily_summary.to_json(json_daily_fname, orient='records')

## Visualisating temperature data alongside location

In [7]:
# Assuming daily_summary and hourly_dataframe are your DataFrames
daily_summary['day'] = pd.to_datetime(daily_summary['day'])
hourly_dataframe['day'] = pd.to_datetime(hourly_dataframe['date']).dt.date

In [8]:
pn.config.theme = 'default'


# Create a dropdown widget with dates and a default value
date_select = pn.widgets.Select(name='Date', options=sorted(daily_summary['day'].unique()), value=sorted(daily_summary['day'].unique())[0])

# Create a dropdown widget with column names for the hourly data and a default value
hourly_data_select = pn.widgets.Select(name='Select Column', 
                                       options=['temperature_2m', 'relative_humidity_2m', 'apparent_temperature', 'precipitation', 'surface_pressure', 'cloud_cover', 'wind_speed_10m', 'surface_temperature', 'soil_temperature_0_to_10cm', 'soil_temperature_10_to_35cm'], 
                                       value='temperature_2m')

# Initialize the widgets as empty
daily_data_widget = pn.pane.DataFrame()
hourly_data_widget = pn.pane.HoloViews()
map_view = geom.hvplot(geo=True, tiles='ESRI')

# Define a callback to update the DataFrame widgets
def update_df_widgets(event):
    selected_date = date_select.value
    selected_column = hourly_data_select.value
    daily_data_widget.object = daily_summary[daily_summary['day'] == selected_date].T
    
    # Convert the selected date to datetime objects that represent the start and end of the day
    start_of_day = datetime.combine(selected_date, time.min).replace(tzinfo=pytz.UTC)
    end_of_day = datetime.combine(selected_date, time.max).replace(tzinfo=pytz.UTC)
    
    # Filter the hourly_dataframe to include only the rows where 'date' is between start_of_day and end_of_day
    filtered_hourly_dataframe = hourly_dataframe[(hourly_dataframe['date'] >= start_of_day) & (hourly_dataframe['date'] <= end_of_day)]
    
    hourly_data_widget.object = filtered_hourly_dataframe.hvplot.line(x='date', y=selected_column)

# Link the callback to the dropdown widgets
date_select.param.watch(update_df_widgets, 'value')
hourly_data_select.param.watch(update_df_widgets, 'value')

# Call the function once to initialize the widgets
update_df_widgets(None)

# Create a Panel layout
selectors = pn.Row(date_select, hourly_data_select)

daily_data_layout = pn.Row(daily_data_widget, map_view)

layout = pn.Column(selectors, daily_data_layout, hourly_data_widget)

# Show the layout
layout.servable()

BokehModel(combine_events=True, render_bundle={'docs_json': {'34a16858-3654-413e-8373-4dbf7ab12186': {'versionâ€¦