In [237]:
#papermill_description=imports

import json
import os
import logging
import sys
import geopandas as gpd
from io import StringIO
import pandas as pd
from datetime import datetime
from gis_utils.dataframe import get_bbox_from_geodf
from retry_requests import retry
from aws_utils import S3Utils
import calendar
import time
from gis_utils.meteo import OpenMeteoAPI

# Configure logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logger = logging.getLogger(__name__)

In [238]:
def calculate_days_between(date_str1: str, date_str2: str, format = "%Y-%m-%d") -> int:
    """
    Calculate the number of days between two dates given as strings.

    Parameters:
    - date_str1 (str): The first date string.
    - date_str2 (str): The second date string.

    Returns:
    - int: The difference in days between the two dates.
    
    Example:
    ```python
    days_difference = calculate_days_between('2024-05-01', '2024-04-25')
    print(f"The difference in days is: {days_difference}")
        
    """
    # convert the date strings into datetime objects
    date_format = format  # allow for custom date formats because merica
    datetime1 = datetime.strptime(date_str1, date_format)
    datetime2 = datetime.strptime(date_str2, date_format)
    
    # calculate the difference in days
    delta = datetime1 - datetime2
    return abs(delta.days)  # use abs to ensure a non-negative results

In [239]:
#papermill_description=parameters

model = "weather"
notebook_key = "WoodleighE"
geojson = {
	'body':	{
    "type": "FeatureCollection",
    "name": "Woodleigh E (TP542914N)",
    "crs": {
      "type": "name",
      "properties": {
        "name": "urn:ogc:def:crs:OGC:1.3:CRS84"
      }
    },
    "features": [
      {
        "type": "Feature",
        "geometry": {
          "type": "Polygon",
          "coordinates": [
            [
              [
                145.64368388352284,
                -38.40046895461024
              ],
              [
                145.64497724708457,
                -38.4006517336222
              ],
              [
                145.645486111436,
                -38.400385873088354
              ],
              [
                145.64603738115176,
                -38.39990399837715
              ],
              [
                145.64650384014067,
                -38.399222721754604
              ],
              [
                145.64694909644828,
                -38.39830880400577
              ],
              [
                145.6477972037024,
                -38.394204340030306
              ],
              [
                145.64497724708457,
                -38.39392183798809
              ],
              [
                145.64368388352284,
                -38.40046895461024
              ]
            ]
          ]
        },
        "properties": {}
      }
    ]
  }
}
years = [2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023]
months = ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]
propertyName = "test"
output_type = "weather"
datetime_from="2023-12-01"
datetime_end="2023-12-31"
boundaryName = "boundaryName"
historical = True
local=True


In [240]:
def process_weather_data(weather_data, variable_order):
    variables = {name: weather_data.Variables(index).ValuesAsNumpy() for index, name in enumerate(variable_order)}
    time_range = pd.date_range(
        start=pd.to_datetime(weather_data.Time(), unit="s", utc=True),
        end=pd.to_datetime(weather_data.TimeEnd(), unit="s", utc=True),
        freq=pd.Timedelta(seconds=weather_data.Interval()),
        inclusive="left"
    )
    data = {"date": time_range}
    data.update(variables)
    return pd.DataFrame(data)

def map_months_to_numbers(months):
    """
    Map month names to their corresponding calendar numbers using the calendar module.
    """
    # Using calendar.month_abbr which is case-sensitive, ensure input is properly formatted
    month_to_number = {calendar.month_name[i].lower(): i for i in range(1, 13)}
    return {month.lower(): month_to_number[month.lower()] for month in months}

def get_month_date_range(year, month, month_numbers):
    """
    Generate the start (from) and end (to) date strings for a given month and year,
    using the month numbers dictionary provided.
    """
    month_number = month_numbers[month.lower()]  # Access using lowercase to avoid case sensitivity issues
    first_day = 1
    last_day = calendar.monthrange(year, month_number)[1]  # Get the last day of the month
    datetime_from = f"{year}-{month_number:02d}-{first_day:02d}"
    datetime_end = f"{year}-{month_number:02d}-{last_day:02d}"
    return datetime_from, datetime_end

def get_date_ranges(years, months):
    """
    Compute date ranges for each month in each given year.
    """
    month_numbers = map_months_to_numbers(months)
    date_ranges = {}
    for year in years:
        for month in months:
            datetime_from, datetime_end = get_month_date_range(year, month, month_numbers)
            date_ranges[f"{year} {month}"] = (datetime_from, datetime_end)
    return date_ranges

In [241]:
# Get date ranges for all specified years and months
date_ranges = get_date_ranges(years, months)

# Print results
for date_range, dates in date_ranges.items():
    print(f"For {date_range}: Start Date = {dates[0]}, End Date = {dates[1]}")

For 2014 January: Start Date = 2014-01-01, End Date = 2014-01-31
For 2014 February: Start Date = 2014-02-01, End Date = 2014-02-28
For 2014 March: Start Date = 2014-03-01, End Date = 2014-03-31
For 2014 April: Start Date = 2014-04-01, End Date = 2014-04-30
For 2014 May: Start Date = 2014-05-01, End Date = 2014-05-31
For 2014 June: Start Date = 2014-06-01, End Date = 2014-06-30
For 2014 July: Start Date = 2014-07-01, End Date = 2014-07-31
For 2014 August: Start Date = 2014-08-01, End Date = 2014-08-31
For 2014 September: Start Date = 2014-09-01, End Date = 2014-09-30
For 2014 October: Start Date = 2014-10-01, End Date = 2014-10-31
For 2014 November: Start Date = 2014-11-01, End Date = 2014-11-30
For 2014 December: Start Date = 2014-12-01, End Date = 2014-12-31
For 2015 January: Start Date = 2015-01-01, End Date = 2015-01-31
For 2015 February: Start Date = 2015-02-01, End Date = 2015-02-28
For 2015 March: Start Date = 2015-03-01, End Date = 2015-03-31
For 2015 April: Start Date = 2015-04

In [242]:
hourly = [
		"temperature_2m",
		"relative_humidity_2m",
		"dew_point_2m",
		"precipitation",
		"weather_code",
		"cloud_cover",
		"et0_fao_evapotranspiration",
		"wind_speed_10m",
		"wind_speed_40m"
]
daily = [
		"weather_code", 
		"temperature_2m_max", 
		"temperature_2m_min",
		"apparent_temperature_max", 
		"apparent_temperature_min",
		"sunrise",
		"sunset",
		"daylight_duration",
		"sunshine_duration",
		"uv_index_max",
		"uv_index_clear_sky_max",
		"precipitation_sum",
		"precipitation_hours",
		"wind_direction_10m_dominant",
		"shortwave_radiation_sum",
		"et0_fao_evapotranspiration"
]

In [243]:
#papermill_description=processing_file_io

req = geojson
geojson_data = req['body']  # Directly accessing the 'body' since it's already a dictionary in this mock setup

# Convert the GeoJSON string to a GeoDataFrame
gdf = gpd.read_file(StringIO(json.dumps(geojson_data)))

INFO:botocore.credentials:Found credentials in environment variables.


In [244]:
#papermill_description=processing_bounding_box

# Get bounding box from GeoJSON
bbox = get_bbox_from_geodf(geojson_data)

# get the central latitude and longitude of the bounding box
gpd_lon = (bbox[0] + bbox[2]) / 2
gpd_lat = (bbox[1] + bbox[3]) / 2
centroid = [gpd_lon, gpd_lat]
print(f"Centroid: {centroid}")

Centroid: [145.64574054361262, -38.39728678580514]


In [245]:
#papermill_description=process_variables

# Set up the initial directory based on the environment
storage_directory = "/tmp"
if local:
    storage_directory = "/workspace/notebook_outputs"

# Ensure the storage directory exists
os.makedirs(storage_directory, exist_ok=True)

# Iterate over each date range calculated previously
for date_range, dates in date_ranges.items():
    print(f"For {date_range}: Start Date = {dates[0]}, End Date = {dates[1]}")
    calculated_date_from = dates[0]
    calculated_date_end = dates[1]

    # Ensure the specific output directory for this date range exists
    output_directory = f"{storage_directory}/{notebook_key}/{date_range}"
    os.makedirs(output_directory, exist_ok=True)

    # Define the filenames for daily and hourly weather data outputs
    weather_output_daily_filename = os.path.join(output_directory, f"{model}_{propertyName}_{calculated_date_from}_{calculated_date_end}_daily_weather.csv")
    weather_output_hourly_filename = os.path.join(output_directory, f"{model}_{propertyName}_{calculated_date_from}_{calculated_date_end}_hourly_weather.csv")

    api = OpenMeteoAPI()

    responses = api.fetch_weather_data(
		latitude=gpd_lat,
		longitude=gpd_lon,
		start_date=calculated_date_from,
		end_date=calculated_date_end,
		historical=True,
		daily=daily,
		hourly=hourly
	)
    response = responses[0]

    hourly_dataframe = process_weather_data(response.Hourly(), hourly)
    daily_dataframe = process_weather_data(response.Daily(), daily)

    # Example function calls (you'll need to define these functions or processing steps):
    daily_dataframe.to_csv(weather_output_daily_filename, index=False)
    hourly_dataframe.to_csv(weather_output_hourly_filename, index=False)

    time.sleep(2)


For 2014 January: Start Date = 2014-01-01, End Date = 2014-01-31
For 2014 February: Start Date = 2014-02-01, End Date = 2014-02-28
For 2014 March: Start Date = 2014-03-01, End Date = 2014-03-31
For 2014 April: Start Date = 2014-04-01, End Date = 2014-04-30
For 2014 May: Start Date = 2014-05-01, End Date = 2014-05-31
For 2014 June: Start Date = 2014-06-01, End Date = 2014-06-30
For 2014 July: Start Date = 2014-07-01, End Date = 2014-07-31
For 2014 August: Start Date = 2014-08-01, End Date = 2014-08-31
For 2014 September: Start Date = 2014-09-01, End Date = 2014-09-30
For 2014 October: Start Date = 2014-10-01, End Date = 2014-10-31
For 2014 November: Start Date = 2014-11-01, End Date = 2014-11-30
For 2014 December: Start Date = 2014-12-01, End Date = 2014-12-31
For 2015 January: Start Date = 2015-01-01, End Date = 2015-01-31
For 2015 February: Start Date = 2015-02-01, End Date = 2015-02-28
For 2015 March: Start Date = 2015-03-01, End Date = 2015-03-31
For 2015 April: Start Date = 2015-04

In [246]:
# aws_s3_notebook_output = os.getenv('AWS_S3_BUCKET_NOTEBOOK_OUTPUT')
# aws_default_region = os.getenv('AWS_DEFAULT_REGION')
# s3_client = S3Utils(
# 		region_name=aws_default_region,
# 		s3_bucket=aws_s3_notebook_output,
# 		prefix=notebook_key
# )

In [247]:
# Save dataframes to CSV
# output_dir = f"{storage_directory}/{notebook_key}/{calculated_date_from}_{calculated_date_end}"
# os.makedirs(output_dir, exist_ok=True)
# daily_dataframe.to_csv(weather_output_daily_filename, index=False)
# hourly_dataframe.to_csv(weather_output_hourly_filename, index=False)


# s3_client.upload_file(
# 		file_path=weather_output_daily_filename,
# )

# s3_client.upload_file(
# 		file_path=weather_output_hourly_filename,
# )