In [1]:
#papermill_description=imports

import json
import os
import logging
import sys
import geopandas as gpd
from io import StringIO
import pandas as pd
from datetime import datetime
from gis_utils.dataframe import get_bbox_from_geodf
from retry_requests import retry
from aws_utils import S3Utils
import calendar
import time
from gis_utils.meteo import OpenMeteoAPI

# Configure logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logger = logging.getLogger(__name__)

In [2]:
#papermill_description=function_definitions

def process_weather_data(weather_data, variable_order):
    """
    Process weather data and return a pandas DataFrame.

    Args:
        weather_data (WeatherData): The weather data object containing the variables and time information.
        variable_order (list): The order of variables to be included in the DataFrame.

    Returns:
        pandas.DataFrame: The processed weather data as a DataFrame, with the date as the index and variables as columns.
    """
    variables = {name: weather_data.Variables(index).ValuesAsNumpy() for index, name in enumerate(variable_order)}
    time_range = pd.date_range(
        start=pd.to_datetime(weather_data.Time(), unit="s", utc=True),
        end=pd.to_datetime(weather_data.TimeEnd(), unit="s", utc=True),
        freq=pd.Timedelta(seconds=weather_data.Interval()),
        inclusive="left"
    )
    data = {"date": time_range}
    data.update(variables)
    return pd.DataFrame(data)


In [3]:
#papermill_description=parameters

model = "forecast" #check with Nav if this has to stay as 'weather'
boundaryId = "018f9876-b3d7-73aa-97e6-0cf7a874383d"
geojson = {
	'body':	{
    "type": "FeatureCollection",
    "name": "Wooreen A (Lot 1 TP747507)",
    "crs": {
      "type": "name",
      "properties": {
        "name": "urn:ogc:def:crs:OGC:1.3:CRS84"
      }
    },
    "features": [
      {"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[146.0154942961413,-38.41345712138248],[146.0113264587011,-38.41158412279525],[146.0106828955673,-38.41035944362287],[146.01059095797672,-38.40896664600539],[146.01083612488486,-38.407669879265725],[146.01120387524702,-38.40654119297819],[146.0115409797457,-38.4060849105399],[146.01871211181486,-38.41019134878331],[146.0154942961413,-38.41345712138248]]]},"properties":{}}
    ]
  }
}
notebook_key = geojson['body']['name']

# removed years and months as these are not used by the forecast API. Instead, forecast_days should be used.
forecast_days = 7 # API docs indicate 7 days is the max forecast period

# dynamically generate start/end dates based on current date and forecast_days
datetime_from = (datetime.now().date()).strftime("%Y-%m-%d")
datetime_end = (datetime.now().date() + pd.Timedelta(days=forecast_days)).strftime("%Y-%m-%d")

propertyName = "test"
output_type = "weather-forecast" #check with Nav if this has to stay as 'weather'

boundaryName = "boundaryName"
historical = False #changed from true to false

workspaceId = "018f9876-b3d7-73aa-97e6-0cf7a874383d"
propertyId = "018f99ea-564e-72fa-a4b7-2dbd24b65c3e"
local=True


In [4]:
# Check correct dates are being generated based on forecast_days
date_ranges = pd.date_range(start=datetime_from, end=datetime_end, freq='D')

calculated_date_from = date_ranges[0].strftime("%Y-%m-%d")
calculated_date_end = date_ranges[-1].strftime("%Y-%m-%d")


# Iterate over each date range calculated previously

print(f"start date: {calculated_date_from}")
print(f"start date: {calculated_date_end}")

for date in date_ranges:
    print(f"Date = {date}")
    


start date: 2024-05-27
start date: 2024-06-03
Date = 2024-05-27 00:00:00
Date = 2024-05-28 00:00:00
Date = 2024-05-29 00:00:00
Date = 2024-05-30 00:00:00
Date = 2024-05-31 00:00:00
Date = 2024-06-01 00:00:00
Date = 2024-06-02 00:00:00
Date = 2024-06-03 00:00:00


In [5]:
#papermill_description=weather_data_to_fetch
hourly = [
		"temperature_2m",
        "apparent_temperature", # added as apparent temp included in daily forecast
		"relative_humidity_2m",
		"dew_point_2m",
		"precipitation",
		"weather_code",
		"cloud_cover",
		"et0_fao_evapotranspiration",
		"wind_speed_10m",
        "wind_speed_40m",
        "wind_direction_10m",
        "wind_direction_40m", # Do we need to fetch data at 10m and 40m above ground?
        "wind_gusts_10m",
        "sunshine_duration", # seconds of sunshine in preceeding hour
        "visibility",
        "soil_temperature_0_to_10cm",
        "soil_moisture_0_to_10cm"
]
daily = [
		"temperature_2m_max", 
		"temperature_2m_min",
		"apparent_temperature_max", 
		"apparent_temperature_min",
		"sunrise",
		"sunset",
		"daylight_duration",
		"sunshine_duration", # removed uv from forecast as BOM doesn't include it?
		"precipitation_sum",
		"precipitation_hours",
        "wind_speed_10m_max", # added for completeness
        "wind_gusts_10m_max", # added for completeness
		"wind_direction_10m_dominant",
		"shortwave_radiation_sum",
		"et0_fao_evapotranspiration"
]

In [6]:
#papermill_description=processing_file_io

req = geojson
geojson_data = req['body']  # Directly accessing the 'body' since it's already a dictionary in this mock setup

# Convert the GeoJSON string to a GeoDataFrame
gdf = gpd.read_file(StringIO(json.dumps(geojson_data)))

INFO:botocore.credentials:Found credentials in environment variables.
INFO:botocore.credentials:Found credentials in environment variables.


In [7]:
#papermill_description=processing_bounding_box

# Get bounding box from GeoJSON
bbox = get_bbox_from_geodf(geojson_data)

# get the central latitude and longitude of the bounding box
gpd_lon = (bbox[0] + bbox[2]) / 2
gpd_lat = (bbox[1] + bbox[3]) / 2
centroid = [gpd_lon, gpd_lat]
print(f"Centroid: {centroid}")

Centroid: [146.0146515348958, -38.40977101596119]


In [8]:
#papermill_description=process_weather_forecast_variables

# Set up the initial directory based on the environment
storage_directory = "/tmp"
if local:
    storage_directory = "/workspace/notebook_outputs"

# Ensure the storage directory exists
os.makedirs(storage_directory, exist_ok=True)


# Ensure the specific output directory for this date range exists
output_directory = f"{storage_directory}/{notebook_key}/{date_ranges[0].strftime('%Y-%m-%d')}-{date_ranges[-1].strftime('%Y-%m-%d')}"

os.makedirs(output_directory, exist_ok=True)

# Define the filenames for daily and hourly weather data outputs
weather_output_daily_filename = os.path.join(output_directory, f"{model}_{propertyName}_{calculated_date_from}_{calculated_date_end}_daily_weather.csv")
weather_output_hourly_filename = os.path.join(output_directory, f"{model}_{propertyName}_{calculated_date_from}_{calculated_date_end}_hourly_weather.csv")

api = OpenMeteoAPI()

responses = api.fetch_weather_data(
    latitude=gpd_lat,
    longitude=gpd_lon,
    start_date=calculated_date_from,
    end_date=calculated_date_end,
    historical=False, # set to false for forecast data, was true.
    daily=daily,
    hourly=hourly
)
response = responses[0]

hourly_dataframe = process_weather_data(response.Hourly(), hourly)
daily_dataframe = process_weather_data(response.Daily(), daily)

hourly_dataframe["longitude"] = gpd_lon
hourly_dataframe["latitude"] = gpd_lat
hourly_dataframe["boundary_id"] = boundaryId
hourly_dataframe["boundary_name"] = geojson_data["name"]
hourly_dataframe["workspace_id"] = workspaceId
hourly_dataframe["property_id"] = propertyId

daily_dataframe["longitude"] = gpd_lon
daily_dataframe["latitude"] = gpd_lat
daily_dataframe["boundary_id"] = boundaryId
daily_dataframe["boundary_name"] = geojson_data["name"]
daily_dataframe["workspace_id"] = workspaceId
daily_dataframe["property_id"] = propertyId

# Example function calls (you'll need to define these functions or processing steps):
daily_dataframe.to_csv(weather_output_daily_filename, index=False)
hourly_dataframe.to_csv(weather_output_hourly_filename, index=False)

time.sleep(2)

print("Jenna testing print statements:")
print(f"output direcotry: {output_directory}")


Jenna testing print statements:
output direcotry: /workspace/notebook_outputs/Wooreen A (Lot 1 TP747507)/2024-05-27-2024-06-03


In [9]:
# aws_s3_notebook_output = os.getenv('AWS_S3_BUCKET_NOTEBOOK_OUTPUT')
# aws_default_region = os.getenv('AWS_DEFAULT_REGION')
# s3_client = S3Utils(
# 		region_name=aws_default_region,
# 		s3_bucket=aws_s3_notebook_output,
# 		prefix=notebook_key
# )

In [10]:
# Save dataframes to CSV
# output_dir = f"{storage_directory}/{notebook_key}/{calculated_date_from}_{calculated_date_end}"
# os.makedirs(output_dir, exist_ok=True)
# daily_dataframe.to_csv(weather_output_daily_filename, index=False)
# hourly_dataframe.to_csv(weather_output_hourly_filename, index=False)


# s3_client.upload_file(
# 		file_path=weather_output_daily_filename,
# )

# s3_client.upload_file(
# 		file_path=weather_output_hourly_filename,
# )