In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import json
from collections import defaultdict
import importlib
import utils
importlib.reload(utils)
import os
from pathlib import Path
from scipy.optimize import curve_fit, differential_evolution
import os
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from dotenv import load_dotenv
import warnings
warnings.filterwarnings('ignore')

In [2]:
def load_weather(weather_path, obs_year):    
    # Load the "Hourly" weather data
    
    weather_data = pd.read_csv(weather_path)
    # Slicing the weather data to the dates within the observation year
    weather_data['Date'] = pd.to_datetime(weather_data['Date'], errors='coerce')
    weather_data = weather_data[
        (weather_data['Date'] >= pd.to_datetime(f'1/1/{obs_year}')) &
        (weather_data['Date'] <= pd.to_datetime(f'31/12/{obs_year}'))]     
    # Renaming the columns and add doy column         
    weather_data = weather_data[['Date', 'Time', 'Air Temp. (°C)']].rename(columns={'Date': 'date', 'Time':'Time', 'Air Temp. (°C)':'temp'}, inplace=False)
    weather_data.insert(1, 'doy', [d.dayofyear for d in pd.to_datetime(weather_data['date'], format="mixed")])
    # Interpolate missing values
    # Convert '-' to NaN
    weather_data['temp'].replace('-', np.nan, inplace=True)
    weather_data['temp']=weather_data['temp'].apply(pd.to_numeric, errors='coerce')

    # Interpolate missing values
    if weather_data['temp'].isna().any():
        weather_data['temp'].interpolate(method='linear', inplace=True)


    return weather_data.reset_index(drop=True) 


In [3]:
# Load datasets for observations
def load_observations(config_file):
    with open(config_file, "r") as file:
        config = json.load(file)

    # Load environment variables
    load_dotenv()
    FOLDER_PATH = os.getenv('FOLDER_PATH')
    weather_df = defaultdict(lambda: dict())
    budburstdoy_df = defaultdict(lambda: dict())
    for file_name, file_info in config.items():
            file_path = os.path.join(FOLDER_PATH, file_name)
            # Process each sheet
            for sheet_info in file_info['sheets']:
                # Extract sheet names, cultivars, and treatments
                sheet_name = sheet_info['sheet_name']
                location = sheet_info['location']
                # max_observed_buds = sheet_info['max_observed_buds']
                cultivar_col, cultivar_name = list(sheet_info['cultivar'].items())[0]
                treatment_col, treatment_name = list(sheet_info['treatments'].items())[0]

                # Load and calculate the observed budburst doy
                # Define the expected column names for the final concatenated DataFrame
                TREATMENT = "Treatment"
                xls = pd.ExcelFile(file_path, engine="openpyxl")
                raw_df = pd.read_excel(xls, sheet_name = sheet_name)

                # Rename the Dates to Days of Year (DOY)
                doy_column = [pd.to_datetime(col).dayofyear for col in raw_df.columns if utils.is_date_column(col)]
                df = raw_df.rename(columns={col:pd.to_datetime(col).dayofyear for col in raw_df.columns if utils.is_date_column(col)})
                obs_year = [pd.to_datetime(col).year for col in raw_df.columns if utils.is_date_column(col)][0]


                # Apply filtering for the given cultivar & treatments
                filtered_df = df[df[cultivar_col].isin([cultivar_name]) & df[treatment_col].isin([treatment_name])]

                # Rename columns
                columns_to_select = [treatment_col] + doy_column  # Ensure it's a flat list
                filtered_df = filtered_df[columns_to_select]
                filtered_df.rename(columns={treatment_col: TREATMENT}, inplace=True)

                # Remove rows with any NaN values
                filtered_df = filtered_df.dropna()

                # Reset index 
                filtered_df.reset_index(drop=True)

                # Observed BudBurst DOY
                _, _, BudBurstDOY = utils.BB_specifications(location,filtered_df[doy_column])
                budburstdoy_df[location][obs_year] = BudBurstDOY

                # Load and store the weather data
                weather_path = sheet_info['weather_path']
                weather_data = load_weather(weather_path, obs_year)
                weather_df[location][obs_year] = weather_data

    return weather_df, budburstdoy_df

In [4]:
def Chill(Tc,T):
    return round((((4*T)/(Tc**2))*(Tc-T)),3)

def Wangengel(MinTemp,OptTemp,MaxTemp,RefTemp,T):
    RelEff = 0.0
    RelEffRefTemp = 1.0
    p = 0.0

    if MinTemp < T < MaxTemp:
        p = math.log(2.0) / math.log((MaxTemp - MinTemp) / (OptTemp - MinTemp))
        RelEff = (2 * (T - MinTemp) ** p * (OptTemp - MinTemp) ** p - (T - MinTemp) ** (2 * p)) / (OptTemp - MinTemp) ** (2 * p)

    if MinTemp < RefTemp < MaxTemp:
        p = math.log(2.0) / math.log((MaxTemp - MinTemp) / (OptTemp - MinTemp))
        RelEffRefTemp = (2 * (RefTemp - MinTemp) ** p * (OptTemp - MinTemp) ** p - (RefTemp - MinTemp) ** (2 * p)) / (OptTemp - MinTemp) ** (2 * p)

    return round(float(RelEff / RelEffRefTemp),3)

def GDH(Tb,Tu,Tc,T):
    if (T > Tb) and (T <= Tu):
        return float(0.5 * (1 + np.cos(np.pi + np.pi * ((T - Tb) / (Tu - Tb)))))
    elif (T > Tu) and (T <= Tc):
        return float((1 + np.cos(0.5*np.pi + 0.5*np.pi * ((T - Tu) / (Tc - Tu)))))
    else:
        return 0


In [5]:
config_file = "config_MultiDataFiles.json"
weather_df, budburstdoy_df = load_observations(config_file)


In [24]:
def objective_function(params, locations, weather_df, budburstdoy_df): #params = [Tc, Tmin, Topt, Tmax, chill_target, heat_target, start_day_tp, start_day_kk]
    Tc, Tmin, Topt, Tmax, chill_target, heat_target = params[:6]
    chill_start_days = params[6:]
    
    predictions = []
    observed = []
    for i, location in enumerate(locations): # locations = ['Te Puke', 'Kerikeri']
        start_day = int(round(chill_start_days[i]))
        for year, weather_data in weather_df[location].items(): 
            weather_since_start_doy = weather_data[weather_data['doy']>=start_day].reset_index(drop=True)  

            # Chill accumulation
            chillVal = 0
            for idx_endodormancy_release, temp in enumerate(weather_since_start_doy['temp']):
                chillVal += Chill(Tc, temp) 
                if chillVal >= chill_target:
                    break

            # Heat accumulation
            RefTemp = Topt # RefTemp = OptTemp scales the function to 0-1
            # from the date of endodormancy release, start accumulating heat
            heatVal = 0
            predicted_doy = 366  # Default value if heat target not reached
            for idx_bb, temp in enumerate(weather_since_start_doy['temp'][idx_endodormancy_release+1:]):
                heatVal += Wangengel(Tmin, Topt, Tmax, RefTemp, temp)
                if heatVal >= heat_target:
                    predicted_doy = int(weather_since_start_doy['doy'].iloc[idx_endodormancy_release + idx_bb + 1])
                    break

            predictions.append(predicted_doy)
            observed.append(budburstdoy_df[location][year])

    return mean_squared_error(observed, predictions)

# Define a wrapper function that takes both params and fixed args
def wrapped_objective(params):
    return objective_function(params, ['Te Puke', 'Kerikeri'], weather_df, budburstdoy_df)


In [25]:
bounds = [
    (16, 20),     # Tc
    (7, 10),     # Tmin
    (15, 25),    # Topt
    (26, 35),    # Tmax
    (800, 1000),   # chill target
    (400, 500), # heat target
] + [(1, 60)] * 2  # chill start days

result = differential_evolution(
    func=wrapped_objective,
    bounds=bounds,
    strategy='best1bin',
    popsize=10,
    maxiter=500,
    workers=-1,
    seed=42)

In [27]:
result.x

array([ 17.74086872,   7.4887144 ,  24.88149309,  34.82128821,
       995.48923257, 439.63466679,  52.75472116,  43.59583469])

In [None]:
import cProfile
import pstats

cProfile.run("wrapped_objective(sample_params)", "profile_stats")

p = pstats.Stats("profile_stats")
p.sort_stats("cumtime").print_stats(10)  # Show top 10 slowest functions
