 ## Demo: processing and visualising Open Meteo weather data (demo part 2)

In [1]:
%pip install pytz -q # for timezone aware datetimes

[0mNote: you may need to restart the kernel to use updated packages.


In [2]:
# OS packages
import os
from datetime import datetime, time
import pytz

# Data handling
import pandas as pd
import numpy as np


# Geospatial
import geopandas as gpd

# Visualisation
import holoviews as hv
from holoviews import opts
import hvplot.pandas
import panel as pn

import plotly.express as px

hv.extension('bokeh', 'plotly', 'matplotlib')
pn.extension(sizing_mode="stretch_width", template="bootstrap", theme="default")

## What data can we get from Open Meteo and how does it compare to EOS?

- add comparison to EOS and list side-by-side table


### Current/ forecast weather
Open Meteo lets the user select from XXX government weather models, documented in the API:

(put link to open meteo here)

This demonstration uses the Beureau of Meteorology model.


- aggregate request only needs geojson and gives 3 day forecast out from request date/time
- request without data ggregation takes a start and end date in addition to the geosjon

### Historical Weather

Open Meteo also lets suers get historical, actual weather data (forecast data is modelled, historical data is measured)



## Set up directories, cache, input geometry

For visualisation purposes, the same geojson that was used in part one to get the weatehr data from the API is imported.

The pre-downloaded csv containing the API weather data is also imported. The top 5 rows of the csv are shown to confirm what data was imported.

In [3]:
# set output directory for json files and saved images
output_root_dir = 'yarrayarra_bom_data.csv'

weather_data_file_path = 'yarrayarra_bom_data.csv'

# import geojson for visualisation
geojson_file_path = 'dissolved-boundaries.geojson'
geom = gpd.read_file(geojson_file_path)

# import pre-downloaded Open Meteo data
#weather_data_file_path = os.path.join(output_root_dir, 'yarrayarra_bom_data.csv')
#hourly_dataframe = pd.read_csv(weather_data_file_path, parse_dates=['date'])

hourly_dataframe = pd.read_csv(weather_data_file_path, parse_dates=['date'])
hourly_dataframe.head()

Unnamed: 0,date,temperature_2m,relative_humidity_2m,precipitation,rain,surface_pressure,cloud_cover,visibility,wind_speed_10m,wind_direction_10m,soil_temperature_0_to_10cm,soil_temperature_10_to_35cm
0,2024-03-18 00:00:00+00:00,20.296,75.0,0.0,0.0,976.7037,36.0,12400.0,13.32,194.0,23.946,30.146
1,2024-03-18 01:00:00+00:00,22.496,63.0,0.0,0.0,976.6611,16.0,13540.0,15.119999,187.0,23.896,29.946
2,2024-03-18 02:00:00+00:00,24.846,54.0,0.0,0.0,976.8242,0.0,15180.0,14.759999,183.0,24.646,29.896
3,2024-03-18 03:00:00+00:00,27.546,43.0,0.0,0.0,976.5377,0.0,16520.0,15.84,180.0,25.896,29.746
4,2024-03-18 04:00:00+00:00,29.396,37.0,0.0,0.0,976.4458,0.0,17040.0,17.28,182.0,27.246,29.746


While daily weather data can be requested from the API, it is also possible to calculate it using the hourly data that was already downloaded. This allows us to keep API calls (and costs) lower.

This also gives us the flexibility to calculate metrics in different time blocks - 3 hourly blocks, for example.

Before continuing, the datetime data from the OpenMeteo API needs to be converted to the right timezone, and the day extracted from the datetime variable.

In [4]:
# Check if the 'date' column is timezone-aware
if hourly_dataframe['date'].dt.tz is not None:
    # Convert the timezone
    hourly_dataframe['date'] = hourly_dataframe['date'].dt.tz_convert('Australia/Perth')
else:
    # Localize the timezone and then convert it
    hourly_dataframe['date'] = hourly_dataframe['date'].dt.tz_localize('UTC').dt.tz_convert('Australia/Perth')

# Extract the time from the 'date' column
formatted_time = hourly_dataframe['date'].dt.time

# Extract the date from the 'date' column, discarding the timestamp, and save it as 'day'
hourly_dataframe['day'] = hourly_dataframe['date'].dt.date


In [5]:


# Create a new DataFrame and generate the summary data from the hourly data, summarising by 'day'
daily_summary = hourly_dataframe.groupby('day').agg({
    'temperature_2m': ['max', 'min'],
    'soil_temperature_0_to_10cm': ['max', 'min'],
    'soil_temperature_10_to_35cm': ['max', 'min'],
    'relative_humidity_2m': 'mean',
    'precipitation': 'sum',
    'wind_speed_10m': 'median',
    'surface_pressure': 'mean',
    'cloud_cover': 'mean',
    
}).reset_index()

# Rename the columns
daily_summary.columns = ['day', 
                         'temp_air_max', 
                         'temp_air_min', 
                         'temp_soil_surface_max', 
                         'temp_soil_surface_min',
                         'temp_soil_10-35cm_max', 
                         'temp_soil_10-35cm_min',
                         'avg_humidity',
                         'total_precipitation', 
                         'avg_windspeed', 
                         'avg_surface_pressure', 
                         'avg_cloud_cover']

# Visual check of generated summary data
daily_summary.head()

Unnamed: 0,day,temp_air_max,temp_air_min,temp_soil_surface_max,temp_soil_surface_min,temp_soil_10-35cm_max,temp_soil_10-35cm_min,avg_humidity,total_precipitation,avg_windspeed,avg_surface_pressure,avg_cloud_cover
0,2024-03-18,32.146,20.296,32.146,23.896,30.146,29.646,42.0,0.0,18.54,976.595802,3.25
1,2024-03-19,28.746,16.746,28.746,22.646,29.646,28.546,54.083333,0.0,26.64,982.507015,21.333333
2,2024-03-20,29.996,18.046,29.896,21.896,28.396,27.546,51.916667,0.0,23.039999,984.795428,25.5
3,2024-03-21,31.995998,17.146,32.246,21.546,27.646,26.896,40.75,0.0,14.579999,984.007456,0.125
4,2024-03-22,30.246,18.046,30.246,22.146,27.546,26.896,36.333333,0.0,21.599998,985.44982,0.0


In [6]:
# create another dataframe that only includes some of the columns to demonstrate its a flexible system

### exporting hourly and daily data as json

Now that we have both hourly and daily weather data, they can be exported as JSON files:

In [7]:
# set directory and filename to save data to
#json_hourly_filename = os.path.join(output_root_dir, 'yarrayarra_bom_hourly.json')
#json_daily_filename = os.path.join(output_root_dir, 'yarrayarra_bom_daily.json')

# export hourly and daily data as json
#json_hourly_export = hourly_dataframe.to_json(json_hourly_filename, orient='records')
#json_daily_export = daily_summary.to_json(json_daily_filename, orient='records')


# export hourly and daily data as json
json_hourly_export = hourly_dataframe.to_json('yarrayarra_bom_hourly.json', orient='records')
json_daily_export = daily_summary.to_json('yarrayarra_bom_daily.json', orient='records')

## Visualisating temperature data alongside location

In [8]:
# to have both the table and graph change to the same day, modify the 'day' columns in both dataframes to match
daily_summary['day'] = pd.to_datetime(daily_summary['day'])
hourly_dataframe['day'] = pd.to_datetime(hourly_dataframe['date']).dt.date

In [9]:
# Create map widget to show the geojson polygon with the 'bokeh' backend
map_view = geom.hvplot(geo=True, tiles='ESRI', line_color='blue', line_width=3, fill_alpha=0, frame_width=300)

# Create a dropdown widget with dates and a default value
date_select = pn.widgets.Select(name='Date', options=sorted(daily_summary['day'].unique()), value=sorted(daily_summary['day'].unique())[0])

# Create a dropdown widget with column names for the hourly data and a default value
hourly_data_select = pn.widgets.Select(name='Select Column', 
                                       options=['relative_humidity_2m', 
                                                'precipitation', 
                                                'surface_pressure', 
                                                'cloud_cover', 
                                                'wind_speed_10m'], 
                                       value='temperature_2m')

# Initialize the widgets as empty
daily_data_widget = pn.pane.DataFrame(width=400,
                                      bold_rows=True,
                                      header=False,
                                      justify='left')

hourly_data_widget = pn.pane.HoloViews(width=800)
hourly_temp_data_widget = pn.pane.HoloViews(width=800)
wind_chart = pn.pane.Plotly()

# Link widgets to user input to update the visualisations
def update_df_widgets(event):
    selected_date = date_select.value
    selected_column = hourly_data_select.value
    daily_data_widget.object = daily_summary[daily_summary['day'] == selected_date].T

    start_of_day = pd.to_datetime(selected_date).replace(hour=0, minute=0).tz_localize('Australia/Perth')
    end_of_day = pd.to_datetime(selected_date).replace(hour=23, minute=59).tz_localize('Australia/Perth')

    # Filter the hourly_dataframe to include only the rows where 'date' is between start_of_day and end_of_day
    filtered_hourly_dataframe = hourly_dataframe[(hourly_dataframe['date'] >= start_of_day) & (hourly_dataframe['date'] <= end_of_day)]

    line_plot = filtered_hourly_dataframe.hvplot.line(x='date', y=selected_column).opts(backend='plotly')
    line_plot = line_plot.opts(title=selected_column, xlabel='time', ylabel=selected_column, fontscale=1.1, framewise=True)
    hourly_data_widget.object = line_plot


    hourly_air_temp = hv.Curve((filtered_hourly_dataframe['date'], filtered_hourly_dataframe['temperature_2m']), 'Time', 'Temperature 2m', label='Air temperature').opts(backend='plotly')
    hourly_soil_surface_temp = hv.Curve((filtered_hourly_dataframe['date'], filtered_hourly_dataframe['soil_temperature_0_to_10cm']), 'Time', 'Soil Temperature 0-10cm', label='Soil surface temperature').opts(backend='plotly')
    
    overlay = (hourly_air_temp * hourly_soil_surface_temp)
    overlay = overlay.opts(width=600, height=400, show_legend=True, title='Temperature 2m and Soil Temperature 0-10cm', xlabel='time', ylabel='temp(C)', fontscale=1.1, framewise=True, backend='plotly')

    hourly_temp_data_widget.object = overlay
    
    
    # Create a polar bar chart using plotly.express
    # Divide the wind speeds into discrete bins
    bins = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
    labels = ['0-10', '10-20', '20-30', '30-40', '40-50', '50-60', '60-70', '70-80', '80-90', '90-100']
    filtered_hourly_dataframe['wind_speed_bins'] = pd.cut(filtered_hourly_dataframe['wind_speed_10m'], bins=bins, labels=labels)
    
    fig = px.bar_polar(filtered_hourly_dataframe, 
                       r="wind_speed_10m", 
                       theta="wind_direction_10m",
                       color="wind_speed_bins", 
                       template="plotly",
                       color_discrete_sequence=px.colors.sequential.Plasma_r)
    
    # Move the legend to the bottom of the graph
    fig.update_layout(legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    ))

    # Wrap the Plotly figure in a Panel object
    wind_chart.object = fig

# Trigger the function when the widgets are changed
date_select.param.watch(update_df_widgets, 'value')
hourly_data_select.param.watch(update_df_widgets, 'value')

# Call the function once to initialize the plots
update_df_widgets(None)

# Create a Panel layout
selectors = pn.Column(date_select, hourly_data_select, map_view, daily_data_widget, width=400)
wind_section = pn.Column(wind_chart,width=500)
hourly_data_layout = pn.Column(hourly_data_widget, hourly_temp_data_widget, width=500)

layout = pn.Row(selectors, wind_section, hourly_data_layout)
layout.servable()