In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import json
from collections import defaultdict
import importlib
import utils
importlib.reload(utils)
import os
from pathlib import Path
from scipy.optimize import curve_fit, differential_evolution
import os
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from dotenv import load_dotenv
import warnings
warnings.filterwarnings('ignore')

In [2]:
def load_weather(weather_path, obs_year):    
    # Load the "Hourly" weather data
    
    weather_data = pd.read_csv(weather_path)
    # Slicing the weather data to the dates within the observation year
    weather_data['Date'] = pd.to_datetime(weather_data['Date'], errors='coerce')
    weather_data = weather_data[
        (weather_data['Date'] >= pd.to_datetime(f'1/1/{obs_year}')) &
        (weather_data['Date'] <= pd.to_datetime(f'31/12/{obs_year}'))]     
    # Renaming the columns and add doy column         
    weather_data = weather_data[['Date', 'Time', 'Air Temp. (°C)']].rename(columns={'Date': 'date', 'Time':'Time', 'Air Temp. (°C)':'temp'}, inplace=False)
    weather_data.insert(1, 'doy', [d.dayofyear for d in pd.to_datetime(weather_data['date'], format="mixed")])
    # Interpolate missing values
    # Convert '-' to NaN
    weather_data['temp'].replace('-', np.nan, inplace=True)
    weather_data['temp']=weather_data['temp'].apply(pd.to_numeric, errors='coerce')

    # Interpolate missing values
    if weather_data['temp'].isna().any():
        weather_data['temp'].interpolate(method='linear', inplace=True)


    return weather_data.reset_index(drop=True) 


In [3]:
# Load datasets for observations
def load_observations(config_file):
    with open(config_file, "r") as file:
        config = json.load(file)

    # Load environment variables
    load_dotenv()
    FOLDER_PATH = os.getenv('FOLDER_PATH')
    weather_df = defaultdict(lambda: defaultdict(lambda: dict()))
    budburstdoy_df = defaultdict(lambda: defaultdict(lambda: dict()))
    max_observed_buds_df = defaultdict(lambda: defaultdict(lambda: dict()))
    PBB_df = defaultdict(lambda: defaultdict(lambda: dict()))
    PBB_doy_df = defaultdict(lambda: defaultdict(lambda: dict()))
    FlwrDOY_df = defaultdict(lambda: defaultdict(lambda: dict()))

    for file_name, file_info in config.items():
            file_path = os.path.join(FOLDER_PATH, file_name)
            # Process each sheet
            for sheet_info in file_info['sheets']:
                # Extract sheet names, cultivars, and treatments
                BB_sheet_name = sheet_info['BB_sheet_name']
                Flwr_sheet_name = sheet_info['Flwr_sheet_name']
                location = sheet_info['location']
                # max_observed_buds = sheet_info['max_observed_buds']
                cultivar_col, cultivar_name = list(sheet_info['cultivar'].items())[0]
                treatment_col, treatment_name = list(sheet_info['treatments'].items())[0]

                # Load and calculate the observed budburst doy
                # Define the expected column names for the final concatenated DataFrame
                TREATMENT = "Treatment"
                LOCATION = "Location"
                xls = pd.ExcelFile(file_path, engine="openpyxl")
                raw_df_BB = pd.read_excel(xls, sheet_name = BB_sheet_name)
                raw_df_Flwr = pd.read_excel(xls, sheet_name = Flwr_sheet_name)
                # Rename the Dates to Days of Year (DOY)
                doy_column_BB = [pd.to_datetime(col).dayofyear for col in raw_df_BB.columns if utils.is_date_column(col)]
                df_BB = raw_df_BB.rename(columns={col:pd.to_datetime(col).dayofyear for col in raw_df_BB.columns if utils.is_date_column(col)})

                doy_column_Flwr = [pd.to_datetime(col).dayofyear for col in raw_df_Flwr.columns if utils.is_date_column(col)]
                df_Flwr = raw_df_Flwr.rename(columns={col:pd.to_datetime(col).dayofyear for col in raw_df_Flwr.columns if utils.is_date_column(col)})

                obs_year = [pd.to_datetime(col).year for col in raw_df_BB.columns if utils.is_date_column(col)][0]

                PBB_doy_df [location][obs_year][BB_sheet_name+treatment_name] =  doy_column_BB



                # Apply filtering for the given cultivar & treatments
                def filter_df(df, doy_column):
                    filtered_df = df[df[cultivar_col].isin([cultivar_name]) & df[treatment_col].isin([treatment_name])]
                    # Rename columns
                    columns_to_select = [treatment_col] + doy_column  # Ensure it's a flat list
                    filtered_df = filtered_df[columns_to_select]
                    filtered_df.rename(columns={treatment_col: TREATMENT}, inplace=True)
                    # Add the LOCATION column to the DataFrame
                    filtered_df[LOCATION] = location
                    # Remove rows with any NaN values
                    filtered_df = filtered_df.dropna()
                    # Reset index 
                    filtered_df.reset_index(drop=True)
                    return filtered_df
                filtered_df_BB = filter_df(df_BB,doy_column_BB)
                filtered_df_Flwr = filter_df(df_Flwr,doy_column_Flwr) 

                # Observed BudBurst DOY
                BB_percent = False

                PBB, BB, BudBurstDOY, max_observed_buds = utils.BB_specifications(location,filtered_df_BB[doy_column_BB], BB_percent)
                budburstdoy_df[location][obs_year][BB_sheet_name+treatment_name] = BudBurstDOY
                max_observed_buds_df[location][obs_year][BB_sheet_name+treatment_name] = max_observed_buds
                PBB_df[location][obs_year][BB_sheet_name+treatment_name] = PBB
                FlwrDOY, Flowers = utils.Flwr_specifications(np.max(BB),filtered_df_Flwr[doy_column_Flwr])
                FlwrDOY_df[location][obs_year][BB_sheet_name+treatment_name] = FlwrDOY

                # Load and store the weather data
                weather_path = sheet_info['weather_path']
                weather_data = load_weather(weather_path, obs_year)
                weather_df[location][obs_year][BB_sheet_name+treatment_name] = weather_data

    return weather_df, budburstdoy_df, max_observed_buds_df, PBB_df, PBB_doy_df, FlwrDOY_df

In [4]:
def Chill(Tc, T):
    if T>=0 and T <= Tc:
        return (((4*T)/(Tc**2))*(Tc-T))
    else:
        return 0


def HrsBelowBase(Tc, MeanT):
    if MeanT<0: chill_unit = 0 
    if MeanT >= 0 and MeanT <= Tc: chill_unit = -MeanT + Tc 
    if MeanT > Tc: chill_unit = 0
    return chill_unit

def Wangengel(MinTemp,OptTemp,MaxTemp,RefTemp,T):
    RelEff = 0.0
    RelEffRefTemp = 1.0
    p = 0.0

    if MinTemp < T < MaxTemp:
        p = math.log(2.0) / math.log((MaxTemp - MinTemp) / (OptTemp - MinTemp))
        RelEff = (2 * (T - MinTemp) ** p * (OptTemp - MinTemp) ** p - (T - MinTemp) ** (2 * p)) / (OptTemp - MinTemp) ** (2 * p)

    if MinTemp < RefTemp < MaxTemp:
        p = math.log(2.0) / math.log((MaxTemp - MinTemp) / (OptTemp - MinTemp))
        RelEffRefTemp = (2 * (RefTemp - MinTemp) ** p * (OptTemp - MinTemp) ** p - (RefTemp - MinTemp) ** (2 * p)) / (OptTemp - MinTemp) ** (2 * p)

    return round(float(RelEff / RelEffRefTemp),3)

def GDH(Tb,Tu,Tc,T):
    if (T > Tb) and (T <= Tu):
        return float(0.5 * (1 + np.cos(np.pi + np.pi * ((T - Tb) / (Tu - Tb)))))
    elif (T > Tu) and (T <= Tc):
        return float((1 + np.cos(0.5*np.pi + 0.5*np.pi * ((T - Tu) / (Tc - Tu)))))
    else:
        return 0

def HallChillModel(T, S, T1, T2, T_hi, T_lo, c1, T_base, r, k):    
    # T1 = 2.45
    # T2 = 21
    # T_hi = 16.45
    # T_lo = -40.82
    # c1 = 0.000411

    if T < T1 : chill_unit = 0
    if (T >= T1) and (T <= T2): chill_unit = c1 * (T_hi - T) * (T - T_lo)
    if (T > T2): chill_unit =  c1 * (T_hi - T2) * (T2 - T_lo)


    # T_base = 2
    # r = 0.0002924
    if T < T_base: heat_unit = 0
    if T >= T_base: heat_unit = r * (T - T_base)

    # k = 4
    Weight = ((1+np.exp(-.5*k))/(1+np.exp(-k*(S-.5)))) * ((1-np.exp(-k*S))/(1-np.exp(-k)))
    
    delta_S = (1- Weight) * chill_unit +  Weight * heat_unit

    return delta_S


In [5]:
config_file = "config_MultiDataFiles_BBFlwr.json"
weather_df, budburstdoy_df, max_observed_buds_df, PBB_df, PBB_doy_df, FlwrDOY_df = load_observations(config_file)


# Optimize for Flowering:

In [None]:
locations = ['Te Puke', 'Kerikeri']
chill_start_day = pd.to_datetime(utils.base_model_config()['StartDay']).day_of_year 

bb_predictions = defaultdict(lambda: defaultdict(lambda: dict()))

for i, location in enumerate(locations): # locations = ['Te Puke', 'Kerikeri']
    start_day = int(round(chill_start_day))
    for year, sheet_info in weather_df[location].items(): 
        for sheet_info, weather_data in weather_df[location][year].items():
            weather_since_start_doy = weather_data[weather_data['doy']>=start_day].reset_index(drop=True)  

            # Chill accumulation
            chillVal = 0
            for idx_endodormancy_release, temp in enumerate(weather_since_start_doy['temp']):
                chillVal += Chill(utils.base_model_config()['Tc_chill'], temp)
                if chillVal >= utils.base_model_config()['ChillRequirement']:
                    break

            # Heat accumulation
            # from the date of endodormancy release, start accumulating heat
            heatVal = 0
            predicted_doy = 365  # Default value if heat target not reached
            for idx_bb, temp in enumerate(weather_since_start_doy['temp'][idx_endodormancy_release+1:]):
                heatVal += Wangengel(utils.base_model_config()['MinTemp_BB'],utils.base_model_config()['OptTemp_BB'],utils.base_model_config()['MaxTemp_BB'],utils.base_model_config()['RefTemp_BB'],temp)
                if heatVal >= utils.base_model_config()['HeatRequirement']:
                    predicted_doy = int(weather_since_start_doy['doy'].iloc[idx_endodormancy_release + idx_bb + 1])
                    break

            bb_predictions[location][year][sheet_info] = predicted_doy



def objective_function(params, locations, weather_df, bb_predictions, FlwrDOY_df): 
    MinTemp, OptTemp, MaxTemp, flwr_heat_target = params
    predictions = []
    observed = []
    for i, location in enumerate(locations): # locations = ['Te Puke', 'Kerikeri']
        for year, sheet_info in weather_df[location].items(): 
            for sheet_info, weather_data in weather_df[location][year].items():
                start_day = bb_predictions[location][year][sheet_info]+1 # start from the day after budbreak
                weather_since_start_doy = weather_data[weather_data['doy']>=start_day].reset_index(drop=True)  

                predicted_doy = 365  # Default value if heat target not reached

                # from the date of endodormancy release, start accumulating heat
                heatVal = 0
                for idx_flwr, temp in enumerate(weather_since_start_doy['temp']):
                    heatVal += Wangengel(MinTemp, OptTemp, MaxTemp, OptTemp, temp)
                    if heatVal >= flwr_heat_target:
                        predicted_doy = int(weather_since_start_doy['doy'].iloc[idx_flwr])
                        break


                predictions.append(predicted_doy)
                observed.append(FlwrDOY_df[location][year][sheet_info])

    return mean_squared_error(observed, predictions)

# Define a wrapper function that takes both params and fixed args
def wrapped_objective(params):
    return objective_function(params, ['Te Puke', 'Kerikeri'], weather_df, budburstdoy_df, FlwrDOY_df)



bounds = [
        (5,18), # MinTemp
        (18,29), # OptTemp
        (29,38), # MaxTemp
        (300, 1000)] # heat target


result = differential_evolution(
    func=wrapped_objective,
    bounds=bounds,
    strategy='best1bin',
    popsize=10,
    maxiter=1000)

In [7]:
result.x

array([ 14.95838104,  23.63311623,  37.73666457, 385.67640156])

# Optimize for all BB parameters (same start days per loc)

In [None]:
def objective_function(params, locations, weather_df, budburstdoy_df): 
    Tc, MinTemp, OptTemp, MaxTemp, chill_target, heat_target = params
    chill_start_day = pd.to_datetime(utils.base_model_config()['StartDay']).day_of_year 
    
    predictions = []
    observed = []
    for i, location in enumerate(locations): # locations = ['Te Puke', 'Kerikeri']
        start_day = int(round(chill_start_day))
        for year, sheet_info in weather_df[location].items(): 
            for sheet_info, weather_data in weather_df[location][year].items():
                weather_since_start_doy = weather_data[weather_data['doy']>=start_day].reset_index(drop=True)  

                predicted_doy = 365  # Default value if  target not reached

                # Chill accumulation
                chillVal = 0
                for idx_endodormancy_release, temp in enumerate(weather_since_start_doy['temp']):
                    chillVal += Chill( Tc, temp)
                    if chillVal >= chill_target:
                        break

                # Heat accumulation
                # from the date of endodormancy release, start accumulating heat
                heatVal = 0
                for idx_bb, temp in enumerate(weather_since_start_doy['temp'][idx_endodormancy_release+1:]):
                    heatVal += Wangengel(MinTemp,OptTemp,MaxTemp,OptTemp,temp)
                    if heatVal >= heat_target:
                        predicted_doy = int(weather_since_start_doy['doy'].iloc[idx_endodormancy_release + idx_bb + 1])
                        break

                predictions.append(predicted_doy)
                observed.append(budburstdoy_df[location][year][sheet_info])

    return mean_squared_error(observed, predictions)

# Define a wrapper function that takes both params and fixed args
def wrapped_objective(params):
    return objective_function(params, ['Te Puke', 'Kerikeri'], weather_df, budburstdoy_df)



bounds = [
    (14,18),  # Tc
    (3,7), # MinTemp
    (15,22), # OptTemp
    (22,30), # MaxTemp
    (1500, 2200),   # chill target
    (100, 400)] # heat target


result = differential_evolution(
    func=wrapped_objective,
    bounds=bounds,
    strategy='best1bin',
    popsize=15,
    maxiter=300)

In [None]:
result.x

# Optimize for all BB parameters (different start days per loc)

In [None]:
def objective_function(params, locations, weather_df, budburstdoy_df): #params = [Tc, Tmin, Topt, Tmax, chill_target, heat_target, start_day_tp, start_day_kk]
    Tc, Tmin, Topt, Tmax, chill_target, heat_target = params[:6]
    chill_start_days = params[6:]
    
    predictions = []
    observed = []
    for i, location in enumerate(locations): # locations = ['Te Puke', 'Kerikeri']
        start_day = int(round(chill_start_days[i]))
        for year, sheet_info in weather_df[location].items(): 
            for sheet_info, weather_data in weather_df[location][year].items():
                weather_since_start_doy = weather_data[weather_data['doy']>=start_day].reset_index(drop=True)  

                # Chill accumulation
                chillVal = 0
                for idx_endodormancy_release, temp in enumerate(weather_since_start_doy['temp']):
                    HrlyChillVal = Chill(utils.base_model_config()['Tc_chill'], temp)
                    if HrlyChillVal > 0:
                        chillVal += HrlyChillVal  
                    if chillVal >= chill_target:
                        break

                # Heat accumulation
                RefTemp = Topt # RefTemp = OptTemp scales the function to 0-1
                # from the date of endodormancy release, start accumulating heat
                heatVal = 0
                predicted_doy = 366  # Default value if heat target not reached
                for idx_bb, temp in enumerate(weather_since_start_doy['temp'][idx_endodormancy_release+1:]):
                    heatVal += Wangengel(Tmin, Topt, Tmax, RefTemp, temp)
                    if heatVal >= heat_target:
                        predicted_doy = int(weather_since_start_doy['doy'].iloc[idx_endodormancy_release + idx_bb + 1])
                        break

                predictions.append(predicted_doy)
                observed.append(budburstdoy_df[location][year][sheet_info])

    return mean_squared_error(observed, predictions)

# Define a wrapper function that takes both params and fixed args
def wrapped_objective(params):
    return objective_function(params, ['Te Puke', 'Kerikeri'], weather_df, budburstdoy_df)



bounds = [
    (10, 25),     # Tc
    (7, 10),     # Tmin
    (17, 25),    # Topt
    (26, 35),    # Tmax
    (1400, 2000),   # chill target
    (500, 1000), # heat target
] + [(1, 60)] * 2  # chill start days

result = differential_evolution(
    func=wrapped_objective,
    bounds=bounds,
    strategy='best1bin',
    popsize=10,
    maxiter=500,
    seed=42)

In [None]:
result.x