In [None]:
#papermill_description=imports

import json
import os
import logging
import geopandas as gpd
from io import StringIO
import pandas as pd
from datetime import datetime, timedelta
from gis_utils.dataframe import get_bbox_from_geodf
from gis_utils.meteo import OpenMeteoAPI, convert_epoch_to_timezone, map_months_to_numbers
from datetime import datetime, timedelta

import calendar

logger = logging.getLogger()

# Set environment variable for AWS public datasets
os.environ['AWS_NO_SIGN_REQUEST'] = 'YES'

In [None]:
#papermill_description=parameters

notebook_key = "localjupyter"

geojson = {
    'body': {
        "type": "FeatureCollection",
        "name": "dissolved-boundaries",
        "crs": {
            "type": "name",
            "properties": {
                "name": "urn:ogc:def:crs:OGC:1.3:CRS84"
            }
        },
        "features": [
            {
                "type": "Feature",
                "properties": {
                    "fid": 1
                },
                "geometry": {
                    "type": "Polygon",
                    "coordinates": [
                        [
                            [116.26012130269045, -29.225295369642396],
                            [116.261724812149055, -29.241374854584375],
                            [116.283751968396274, -29.256813692452539],
                            [116.284342735038919, -29.268250184258388],
                            [116.292247755352392, -29.265992437426529],
                            [116.292360282331941, -29.293057573630019],
                            [116.314865678242256, -29.293523728033122],
                            [116.326259034921833, -29.293033039128805],
                            [116.326315298411629, -29.305397680579894],
                            [116.355065941687045, -29.307016748931797],
                            [116.355065941687045, -29.306575187382712],
                            [116.383366477044206, -29.307384715430175],
                            [116.384322956370426, -29.290407813444993],
                            [116.387586238777402, -29.282629879611861],
                            [116.386517232471661, -29.259807919053017],
                            [116.359201308185533, -29.259488866292969],
                            [116.359229439930417, -29.259243440415627],
                            [116.35242155766754, -29.259292525638209],
                            [116.352140240218716, -29.220237788279107],
                            [116.302234524787593, -29.223503148505326],
                            [116.281388901825679, -29.2239696200396],
                            [116.26012130269045, -29.225295369642396]
                        ]
                    ]
                }
            }
        ]
    }
}
propertyName = "test"

historical=False
forecast=False

hourly_forecast=False
hourly_historical=False

forecast_days = 7
start_year=2022
end_year=2023

timezone = "auto"

boundaryId = "01907ba3-4159-7e72-8912-05ad329edad1"
workspaceId = "018f9876-b3d7-73aa-97e6-0cf7a874383d"
propertyId = "018f99ea-564e-72fa-a4b7-2dbd24b65c3e"


In [None]:
#papermill_description=meater_variables_metadata


months = ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]

historical_hourly = [
		"temperature_2m",
		"relative_humidity_2m",
		"dew_point_2m",
		"precipitation",
		"weather_code",
		"cloud_cover",
		"et0_fao_evapotranspiration",
		"wind_speed_10m",
		"wind_speed_40m"
]
historical_daily = [
		"weather_code",
		"sunrise",
		"sunset",
		"uv_index_max",
		"temperature_2m_max", 
		"temperature_2m_min",
		"apparent_temperature_max", 
		"apparent_temperature_min",
		"daylight_duration",
		"sunshine_duration",
		"uv_index_clear_sky_max",
		"precipitation_sum",
		"precipitation_hours",
		"wind_direction_10m_dominant",
		"shortwave_radiation_sum",
		"et0_fao_evapotranspiration"
]

forecast_hourly = [
	"temperature_2m",
	"apparent_temperature", # added as apparent temp included in daily forecast
	"relative_humidity_2m",
	"dew_point_2m",
	"precipitation",
	"weather_code",
	"cloud_cover",
	"et0_fao_evapotranspiration",
	"wind_speed_10m",
	"wind_speed_40m",
	"wind_direction_10m",
	"wind_direction_40m", # Do we need to fetch data at 10m and 40m above ground?
	"wind_gusts_10m",
	"sunshine_duration", # seconds of sunshine in preceeding hour
	"visibility",
	"soil_temperature_0_to_10cm",
	"soil_moisture_0_to_10cm",
]

forecast_daily = [
	"weather_code",
	"sunrise",
	"sunset",
	"uv_index_max",
	"temperature_2m_max", 
	"temperature_2m_min",
	"apparent_temperature_max", 
	"apparent_temperature_min",
	"daylight_duration",
	"sunshine_duration", # removed uv from forecast as BOM doesn't include it?
	"precipitation_sum",
	"precipitation_hours",
	"wind_speed_10m_max", # added for completeness
	"wind_gusts_10m_max", # added for completeness
	"wind_direction_10m_dominant",
	"shortwave_radiation_sum",
	"et0_fao_evapotranspiration"
]

In [None]:
#papermill_parameters=functions

def get_month_date_range(year, month, month_numbers):
    """
    Generate the start (from) and end (to) date strings for a given month and year,
    using the month numbers dictionary provided. If the computed end date is in the future,
    it defaults to today's date.
    """
    month_number = month_numbers[month.lower()]
    first_day = 1
    last_day = calendar.monthrange(year, month_number)[1]
    datetime_from = f"{year}-{month_number:02d}-{first_day:02d}"
    datetime_end = f"{year}-{month_number:02d}-{last_day:02d}"
    
    # Convert strings to date objects for comparison
    date_end_obj = datetime.strptime(datetime_end, '%Y-%m-%d').date()
    today_date = datetime.today().date()
    yesterday_date = datetime.today().date() - timedelta(days=2)
    
  
    if date_end_obj > today_date:
        datetime_end = yesterday_date.strftime('%Y-%m-%d')
    
    return datetime_from, datetime_end


def get_date_ranges(years, months):
    """
    Compute date ranges for each month in each given year.
    """
    month_numbers = map_months_to_numbers(months)
    date_ranges = {}
    today_year = datetime.today().year
    today_month = datetime.today().month
    for year in years:
        for month in months:
            if year == today_year and month_numbers[month.lower()] > today_month:
                break
            datetime_from, datetime_end = get_month_date_range(year, month, month_numbers)
            date_ranges[f"{year} {month}"] = (datetime_from, datetime_end)
            if year == today_year and month_numbers[month.lower()] == today_month:
                break 
    return date_ranges


def process_weather_data(weather_data, variable_order, utc=True):
    """
    Process weather data and return a pandas DataFrame.

    Args:
        weather_data (WeatherData): The weather data object containing the variables.
        variable_order (list): The order of variables to be included in the DataFrame.
        utc (bool, optional): Whether to interpret the time in UTC. Defaults to True.

    Returns:
        pandas.DataFrame: The processed weather data as a DataFrame.

    """
    variables = {
        name: weather_data.Variables(index).ValuesInt64AsNumpy() if name in ['sunrise', 'sunset'] 
              else weather_data.Variables(index).ValuesAsNumpy()
        for index, name in enumerate(variable_order)
    }

    start_time = pd.to_datetime(weather_data.Time(), unit='s', utc=utc)
    end_time = pd.to_datetime(weather_data.TimeEnd(), unit='s', utc=utc)

    time_range = pd.date_range(
        start=start_time,
        end=end_time,
        freq=pd.Timedelta(seconds=weather_data.Interval()),
        inclusive="left"
    )

    # Create a time range if needed
    data = {"date": time_range}
    data.update(variables)

    return pd.DataFrame(data)


def process_forecast(storage_directory, gpd_lat, gpd_lon, days, hourly):
    """
    Process the forecast weather for a given location and number of forecaste days, and save the data.
    The maximum number of forecaste days will vary depending on the OpenMeteo model.
    refer to documentation here: https://open-meteo.com/en/docs/bom-api

    Args:
        storage_directory (str): The directory where the forecast data will be stored temporarily before being uploaded to S3.
        gpd_lat (float): The latitude of the location.
        gpd_lon (float): The longitude of the location.
        forecast_days (int): The number of forecast days. Defaults to 15.

    Returns:
        None
    """
    today = (datetime.now().date()).strftime('%Y-%m-%d %H:%M')
    
    weather_output_forecast_daily_filename = os.path.join(storage_directory, f"{today}_forecast_daily.csv")
    
    responses = api.fetch_weather_data(
        latitude=gpd_lat,
        longitude=gpd_lon,
        start_date=None,
        end_date=None,
        url="https://api.open-meteo.com/v1/forecast",
        daily=forecast_daily,
        hourly=forecast_hourly,
        timezone=timezone,
        timeformat="unixtime",
        forecast_days=days
    )
    response = responses[0]

    daily = response.Daily()  

    forecast_daily_data = process_weather_data(daily, forecast_daily, utc=True)
    forecast_daily_data = convert_epoch_to_timezone(forecast_daily_data, ["sunrise", "sunset"])

    responseTimezoneRaw = response.Timezone()
    
    # timezone should be converter to string if in bytes else print an empty string
    if isinstance(responseTimezoneRaw, bytes):
        responsetimezone = responseTimezoneRaw.decode("utf-8")
    else:
        responsetimezone = ""
    
    responseTimezoneAbbreviationRaw = response.TimezoneAbbreviation()
    if isinstance(responseTimezoneAbbreviationRaw, bytes):
        responsetimezoneAbbreviation = responseTimezoneAbbreviationRaw.decode("utf-8")
    else:
        responsetimezoneAbbreviation = ""

    # use a ternary to apply to timezone
    forecast_daily_data["location_timezone"] = responsetimezone
    forecast_daily_data["location_timezone_abbreviation"] = responsetimezoneAbbreviation
    forecast_daily_data["longitude"] = gpd_lon
    forecast_daily_data["latitude"] = gpd_lat
    forecast_daily_data["boundary_id"] = boundaryId
    forecast_daily_data["boundary_name"] = propertyName
    forecast_daily_data["workspace_id"] = workspaceId
    forecast_daily_data["property_id"] = propertyId

    forecast_daily_data.to_csv(weather_output_forecast_daily_filename, index=False)
    

    if hourly_forecast is True:
        weather_output_forecast_hourly_filename = os.path.join(storage_directory, f"{today}_forecast_hourly.csv")

        hourly = response.Hourly()

        forecast_hourly_data = process_weather_data(hourly, forecast_hourly, utc=True)
        #forecast_hourly_data = convert_epoch_to_timezone(forecast_hourly_data, ["sunrise", "sunset"])

        forecast_hourly_data["location_timezone"] = responsetimezone
        forecast_hourly_data["location_timezone_abbreviation"] = responsetimezoneAbbreviation
        forecast_hourly_data["longitude"] = gpd_lon
        forecast_hourly_data["latitude"] = gpd_lat
        forecast_hourly_data["boundary_id"] = boundaryId
        forecast_hourly_data["boundary_name"] = propertyName
        forecast_hourly_data["workspace_id"] = workspaceId
        forecast_hourly_data["property_id"] = propertyId

        forecast_hourly_data.to_csv(weather_output_forecast_hourly_filename, index=False)



def process_historical(storage_directory, gpd_lat, gpd_lon, date_range, dates, hourly):
    """
    Process historical weather data for a given date range and location and save the data.
    The maximum number of forecaste days will vary depending on the OpenMeteo model.
    refer to documentation here: https://open-meteo.com/en/docs/bom-api

    Args:
        date_range (str): The date range in the format "year month".
        dates (list): A list containing the start and end dates of the range.
        storage_directory (str): The directory where the forecast data will be stored temporarily before being uploaded to S3.
        gpd_lat (float): The latitude of the location.
        gpd_lon (float): The longitude of the location.

    Returns:
        None
    """
    date_from = dates[0]
    date_end = dates[1]
    #year, month = date_range.split(" ")

    historical_output_daily_filename = os.path.join(storage_directory, f"{date_from}_{date_end}_historical_daily.csv")
    
    responses = api.fetch_weather_data(
        latitude=gpd_lat,
        longitude=gpd_lon,
        start_date=date_from,
        end_date=date_end,
        url="https://archive-api.open-meteo.com/v1/archive",
        daily=historical_daily,
        hourly = historical_hourly,
        timezone=timezone,
        timeformat="unixtime"
    )
    response = responses[0]

    daily = response.Daily()
    
    daily_dataframe = process_weather_data(daily, historical_daily, utc=True)
    daily_dataframe = convert_epoch_to_timezone(daily_dataframe, ["sunrise", "sunset"])

    responseTimezoneRaw = response.Timezone()
    # timezone should be converter to string if in bytes else print an empty string
    if isinstance(responseTimezoneRaw, bytes):
        responsetimezone = responseTimezoneRaw.decode("utf-8")
    else:
        responsetimezone = ""
    
    responseTimezoneAbbreviationRaw = response.TimezoneAbbreviation()
    if isinstance(responseTimezoneAbbreviationRaw, bytes):
        responsetimezoneAbbreviation = responseTimezoneAbbreviationRaw.decode("utf-8")
    else:
        responsetimezoneAbbreviation = ""

    # use a ternary to apply to timezone
    daily_dataframe["location_timezone"] = responsetimezone
    daily_dataframe["location_timezone_abbreviation"] = responsetimezoneAbbreviation
    daily_dataframe["longitude"] = gpd_lon
    daily_dataframe["latitude"] = gpd_lat
    daily_dataframe["boundary_id"] = boundaryId
    daily_dataframe["boundary_name"] = propertyName
    daily_dataframe["workspace_id"] = workspaceId
    daily_dataframe["property_id"] = propertyId

    daily_dataframe.to_csv(historical_output_daily_filename, index=False)

    if hourly_historical is True:
        historical_output_hourly_filename = os.path.join(storage_directory, f"{date_from}_{date_end}_historical_hourly.csv")

        hourly = response.Hourly()

        hourly_dataframe = process_weather_data(hourly, historical_hourly, utc=True)

        hourly_dataframe["location_timezone"] = responsetimezone
        hourly_dataframe["location_timezone_abbreviation"] = responsetimezoneAbbreviation
        hourly_dataframe["longitude"] = gpd_lon
        hourly_dataframe["latitude"] = gpd_lat
        hourly_dataframe["boundary_id"] = boundaryId
        hourly_dataframe["boundary_name"] = notebook_key
        hourly_dataframe["workspace_id"] = workspaceId
        hourly_dataframe["property_id"] = propertyId

        hourly_dataframe.to_csv(historical_output_hourly_filename, index=False)


In [None]:
#papermill_description=establish_directory

storage_directory = f"/tmp/{notebook_key}"

In [None]:
#papermill_description=processing_file_io

req = geojson
geojson_data = req['body']  # Directly accessing the 'body' since it's already a dictionary in this mock setup

# Convert the GeoJSON string to a GeoDataFrame
gdf = gpd.read_file(StringIO(json.dumps(geojson_data)))

In [None]:
#papermill_description=processing_bounding_box

geom = gdf.geometry #for data-harvester clip function

# Get bounding box from GeoJSON
bbox = get_bbox_from_geodf(geojson_data)

gpd_lon = (bbox[0] + bbox[2]) / 2
gpd_lat = (bbox[1] + bbox[3]) / 2

centroid = [gpd_lon, gpd_lat]

In [None]:
#papermill_description=process_forecast_weather_data

api = OpenMeteoAPI(cache=False)

if forecast:
    process_forecast(
        storage_directory=storage_directory,
        gpd_lat=gpd_lat,
        gpd_lon=gpd_lon,
        days=forecast_days,
        hourly=hourly_forecast
    )

In [None]:
#papermill_description=process_historical_weather_data

if historical:
    years = [start_year, end_year]
    date_ranges = get_date_ranges(years, months)
    for date_range, dates in date_ranges.items():
        process_historical(
            storage_directory=storage_directory,
            date_range = date_range,
            dates=dates,
            gpd_lat=gpd_lat,
            gpd_lon=gpd_lon,
            hourly=hourly_historical
        )
    
