In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
from dotenv import load_dotenv
import json
from collections import defaultdict
import importlib
import warnings
warnings.filterwarnings('ignore')
import utils
importlib.reload(utils)
import os
from scipy.stats import linregress
from sklearn.metrics import mean_absolute_error

# Load observations

In [2]:
# Load datasets for observations
def load_observations(file_path,sheet_name, cultivar_col,cultivar_name,treatment_col,treatment_name,location,max_observed_buds):
    # Define the expected column names for the final concatenated DataFrame
    TREATMENT = "Treatment"
    LOCATION = "Location"
    xls = pd.ExcelFile(file_path, engine="openpyxl")
    raw_df = pd.read_excel(xls, sheet_name = sheet_name)

    # Rename the Dates to Days of Year (DOY)
    doy_column = [pd.to_datetime(col).dayofyear for col in raw_df.columns if utils.is_date_column(col)]
    df = raw_df.rename(columns={col:pd.to_datetime(col).dayofyear for col in raw_df.columns if utils.is_date_column(col)})

    # Apply filtering for the given cultivar & treatments
    filtered_df = df[df[cultivar_col].isin([cultivar_name]) & df[treatment_col].isin([treatment_name])]

    # Rename columns
    columns_to_select = [treatment_col] + doy_column  # Ensure it's a flat list
    filtered_df = filtered_df[columns_to_select]
    filtered_df.rename(columns={treatment_col: TREATMENT}, inplace=True)
    # Add the LOCATION column to the DataFrame
    filtered_df[LOCATION] = location

    # Remove rows with any NaN values
    filtered_df = filtered_df.dropna()

    # Reset index 
    filtered_df.reset_index(drop=True)

    PBB, BB, BudBurstDOY = utils.BB_specifications(filtered_df[doy_column], max_observed_buds)
    return PBB, BB, BudBurstDOY
    


# WangEngel & GDH & Chill models

In [3]:
def Chill(Tc,T):
    return float(((4*T)/(Tc**2))*(Tc-T))

def Wangengel(MinTemp,OptTemp,MaxTemp,RefTemp,T):
    RelEff = 0.0
    RelEffRefTemp = 1.0
    p = 0.0

    if (T > MinTemp) and (T < MaxTemp):
        p = np.log(2.0) / np.log((MaxTemp - MinTemp) / (OptTemp - MinTemp))
        RelEff = (2 * np.pow(T - MinTemp, p) * np.pow(OptTemp - MinTemp, p) - np.pow(T - MinTemp, 2 * p)) / np.pow(OptTemp - MinTemp, 2 * p)

    if (RefTemp > MinTemp) and (RefTemp < MaxTemp):
        p = np.log(2.0) / np.log((MaxTemp - MinTemp) / (OptTemp - MinTemp))
        RelEffRefTemp = (2 * np.pow(RefTemp - MinTemp, p) * np.pow(OptTemp - MinTemp, p) - np.pow(RefTemp - MinTemp, 2 * p)) / np.pow(OptTemp - MinTemp, 2 * p)

    return float(RelEff / RelEffRefTemp)

def GDH(Tb,Tu,Tc,T):
    if (T > Tb) and (T <= Tu):
        return float(0.5 * (1 + np.cos(np.pi + np.pi * ((T - Tb) / (Tu - Tb)))))
    elif (T > Tu) and (T <= Tc):
        return float((1 + np.cos(0.5*np.pi + 0.5*np.pi * ((T - Tu) / (Tc - Tu)))))
    else:
        return 0


# Chill/Heat Accumulation

In [4]:
# Chill & heat accumulation
def chill_heat_accumulation(model_config, buds_targets, predicted_endo_release_doy, predicted_budbreak_doy, weather_since_start_doy, sheet_name, treatment_name):
    for bud_position,target in buds_targets.items():
        chill_requirement = target['chill']
        heat_requirement = target['heat']
        
        # Chill model parameter
        chillVal = 0
        for idx_endodormancy_release, temp in enumerate(weather_since_start_doy['temp']):
            chillVal += Chill(model_config["Tc_chill"], temp)
            if chillVal >= chill_requirement:
                predicted_endo_release_doy[sheet_name+treatment_name][bud_position] = int(weather_since_start_doy['doy'].iloc[idx_endodormancy_release])  # Get the date at this index
                break
        
        # wangengel model parameters
        RefTemp = model_config["OptTemp"] # RefTemp = OptTemp scales the function to 0-1
        # from the date of endodormancy release, start accumulating heat
        heatVal = 0
        for idx_bb, temp in enumerate(weather_since_start_doy['temp'].iloc[idx_endodormancy_release::]):
            if model_config["HeatAccFunc"] == 'WangEngel':
                heatVal += Wangengel(model_config["MinTemp"], model_config["OptTemp"],model_config["MaxTemp"],RefTemp,temp)
            elif model_config["HeatAccFunc"] == 'GDH':
                heatVal += GDH(model_config["Tb_GDH"],model_config["Tu_GDH"],model_config["Tc_GDH"],temp)
            if heatVal >= heat_requirement:
                predicted_budbreak_doy[sheet_name+treatment_name][bud_position] = int(weather_since_start_doy['doy'].iloc[idx_endodormancy_release+idx_bb+1])  # Get the date at this index
                break
    return predicted_endo_release_doy, predicted_budbreak_doy


In [5]:
with open("config_MultiDataFiles.json", "r") as file:
    config = json.load(file)

def run(model_config):
    start_doy = pd.to_datetime('2016-03-01').dayofyear # 1st of March to start accumulation of chill units (year selection does not matter here)

    buds_targets = {1:{'chill':model_config["ChillRequirement"], 'heat':model_config["HeatRequirement"]}}


    predicted_endo_release_doy = defaultdict(lambda: dict())
    predicted_budbreak_doy = defaultdict(lambda: dict())
    observed_budbreak_doy = defaultdict(lambda: dict())

    # Load environment variables
    load_dotenv()
    FOLDER_PATH = os.getenv('FOLDER_PATH')
    for file_name, file_info in config.items():
            file_path = os.path.join(FOLDER_PATH, file_name)
            # Process each sheet
            for sheet_info in file_info['sheets']:
                # Extract sheet names, cultivars, and treatments
                sheet_name = sheet_info['sheet_name']
                location = sheet_info['location'].replace(' ','')
                max_observed_buds = sheet_info['max_observed_buds']
                cultivar_col, cultivar_name = list(sheet_info['cultivar'].items())[0]
                treatment_col, treatment_name = list(sheet_info['treatments'].items())[0]
                # Load the "Hourly" weather data
                weather_path = sheet_info['weather_path']
                weather_data = pd.read_csv(weather_path)
                weather_data = weather_data[['Date', 'Time', 'Air Temp. (°C)']].rename(columns={'Date': 'date', 'Time':'Time', 'Air Temp. (°C)':'temp'}, inplace=False)
                weather_data.insert(1, 'doy', [d.dayofyear for d in pd.to_datetime(weather_data['date'], format="mixed")])
                # Interpolate missing values
                # Convert '-' to NaN
                weather_data['temp'].replace('-', np.nan, inplace=True)
                weather_data['temp']=weather_data['temp'].apply(pd.to_numeric, errors='coerce')

                # Interpolate missing values
                if weather_data['temp'].isna().any():
                    if model_config["InterpolationMethod"] == 'linear':
                        weather_data['temp'].interpolate(method='linear', inplace=True)
                    elif model_config["InterpolationMethod"] == 'seasonal':
                        # Compute seasonal average of hourly temperatures
                        weather_data = utils.seasonal_ave_fillna(weather_data)
                    elif model_config["InterpolationMethod"] == 'longtermaverage':
                        long_term_ave = weather_data['temp'].mean()
                        weather_data['temp'].fillna(long_term_ave, inplace=True)
                    
                # start from the start_doy to accumulate chill/heat units
                weather_since_start_doy = weather_data[weather_data['doy']>start_doy]

                # Predict the endodormancy release doy and budbreak doy
                predicted_endo_release_doy, predicted_budbreak_doy = chill_heat_accumulation(model_config, buds_targets, predicted_endo_release_doy, predicted_budbreak_doy, weather_since_start_doy, sheet_name, treatment_name)

                # Load and calculate the observed budburst doy
                _, _, BudBurstDOY = load_observations(file_path,sheet_name, cultivar_col,cultivar_name,treatment_col,treatment_name,location,max_observed_buds)
                observed_budbreak_doy[sheet_name+treatment_name][1] = BudBurstDOY

    # Calculate stats
    observed_all = []
    predicted_all = []
    for (trtmnt1,bb1),(trtmnt2,bb2) in zip(predicted_budbreak_doy.items(), observed_budbreak_doy.items()):
        predicted_all.append(list(bb1.values())[0])
        observed_all.append(list(bb2.values())[0])

    stats = utils.calculate_fit_stats(observed_all, predicted_all)

    # Fit linear regression
    observed_all = np.array(observed_all)
    predicted_all = np.array(predicted_all)
    slope, intercept, _, _, _ = linregress(observed_all, predicted_all)
    regression_line = slope * observed_all + intercept
    
    return predicted_budbreak_doy, observed_budbreak_doy, stats, (observed_all,regression_line)
            

In [6]:
def BB_plot(predicted_budbreak_doy, observed_budbreak_doy, regression_xy):
    all_bb_days = []
    fig = go.Figure()
    for (trtmnt1,bb1),(trtmnt2,bb2) in zip(predicted_budbreak_doy.items(), observed_budbreak_doy.items()):
        predicted_bb= list(bb1.values())[0]
        observed_bb = list(bb2.values())[0]
        all_bb_days.append(observed_bb); all_bb_days.append(predicted_bb) # to collect all the observation days in all treatments for x-axis doy
        fig.add_trace(go.Scatter(x=[observed_bb], y=[predicted_bb], mode='markers', marker=dict(size=15), name=trtmnt2))

    # Add 1:1 dashed line
    fig.add_trace(go.Scatter(x=[min(all_bb_days), max(all_bb_days)], y=[min(all_bb_days), max(all_bb_days)], mode='lines', line=dict(color="white", dash="dash"), name="1:1 Line"))  # Dashed black line


    # Plot regression line
    # fig.add_trace(go.Scatter(x=regression_xy[0], y=regression_xy[1], mode='lines', line=dict(color="white", dash="solid"), name=f"Regression"))

    # Update layout
    fig.update_layout(
        title= f"BudBurst",
        xaxis_title="Observed BudBurst DOY",
        yaxis_title="Predicted BudBurst DOY",
        template="plotly_dark")
    fig.show()


# Test parameter values

In [7]:
class Test:
    def __init__(self, config_param_name):
        self.config_param_name = config_param_name

    def _test_parameter(self, list_values_to_test):
        self.list_values_to_test = list_values_to_test
        # Load default configuration
        self.model_config = utils.base_model_config()

        min_mae = np.inf # find the value with the minimum mae
        min_rmse = np.inf
        self.opt_param = 0
        for t in self.list_values_to_test:
            self.model_config[self.config_param_name] = t
            _, _, stats, _ = run(self.model_config)
            print('T = ',t,':')
            print(stats,'\n')
            if stats["MAE"] < min_mae:
                self.opt_param = t
                min_mae = stats["MAE"]
                min_rmse = stats['RMSE']
            elif stats["MAE"] == min_mae and stats["RMSE"] < min_rmse: # in case the MAE is the same, check for minimum RMSE
                    self.opt_param = t
                    min_rmse = stats["RMSE"]
                # Pick the one with lowest error to plot
        print('Best value: ', self.opt_param)
        return self.opt_param

    def _best_value_plot(self, opt_param):
        self.model_config[self.config_param_name] = opt_param
        predicted_budbreak_doy, observed_budbreak_doy, stats,  regression_xy = run(self.model_config)
        BB_plot(predicted_budbreak_doy, observed_budbreak_doy, regression_xy)
        return predicted_budbreak_doy, observed_budbreak_doy

    def _apsim_compare(self, apsim_bb_predictions, predicted_budbreak_doy, observed_budbreak_doy):

        observed_all = []; predicted_all = []
        fig = go.Figure()
        for (trtmnt1,bb1),(trtmnt2,bb2) in zip(predicted_budbreak_doy.items(), observed_budbreak_doy.items()):
            predicted_all.append(list(bb1.values())[0])
            observed_all.append(list(bb2.values())[0])

        # Calculate MAE
        mae_real_hourly = mean_absolute_error(observed_all, predicted_all)
        mae_interpolated_hourly = mean_absolute_error(observed_all, apsim_bb_predictions)

        apsim_python_comparison = pd.DataFrame({'Observed': observed_all, 'APSIM': apsim_bb_predictions, 'Python': predicted_all})

        fig = px.scatter(apsim_python_comparison, x="Observed", y=["Python","APSIM"])
        # Update legend labels
        fig.for_each_trace(lambda t: t.update(name=f"{t.name} (MAE={mae_real_hourly:.2f})") if t.name == "Python" else t.update(name=f"{t.name} (MAE={mae_interpolated_hourly:.2f})"))

        # Add the 1:1 line for comparison
        fig.add_trace(go.Scatter(x=[min(observed_all), max(observed_all)], y=[min(observed_all), max(observed_all)], mode='lines', line=dict(color="black", dash="dash"), name="1:1"))  # Dashed black line


        fig.update_traces(marker=dict(size=12,
                                    line=dict(width=2,
                                                color='DarkSlateGrey')),
                        selector=dict(mode='markers'))
        fig.update_layout(
            title="APSIM & Python Predictions (interpolated hourly temp. vs real hourly temp.)",
            xaxis_title="Observed BudBurst DOY",
            yaxis_title="Predicted BudBurst DOY")
        fig.update_xaxes(
                    tickmode="array", 
                    tickvals=np.arange(np.min(observed_all),np.max(observed_all),1),  # Use all available dates as ticks
                    tickangle=45,
                    tickfont=dict(size=12))  # Rotate for better visibility

        fig.show()
        

In [8]:
# Example MinTemp test:
config_param_name = "MinTemp"
T = [7,8,9,10]

TestObject = Test(config_param_name)
best_val = TestObject._test_parameter(T)
predicted_budbreak_doy, observed_budbreak_doy = TestObject._best_value_plot(best_val)

T =  7 :
{'RMSE': 16.38, 'MAE': 13.53, 'R2': -2.09, 'NRMSE': 0.41} 

T =  8 :
{'RMSE': 14.27, 'MAE': 12.16, 'R2': -1.35, 'NRMSE': 0.36} 

T =  9 :
{'RMSE': 13.91, 'MAE': 11.32, 'R2': -1.23, 'NRMSE': 0.35} 

T =  10 :
{'RMSE': 17.26, 'MAE': 13.79, 'R2': -2.43, 'NRMSE': 0.43} 

Best value:  9


In [None]:
# If the predictions from APSIM is also available:
apsim_bb_predictions = np.array([274,274,292,292,272,277,277,272,272,275,275,258,275,274,274,270,264,279,273]) # MinTemp = 9
TestObject._apsim_compare(apsim_bb_predictions, predicted_budbreak_doy, observed_budbreak_doy)
