In [94]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from hyperopt import hp, rand, tpe, Trials, fmin, STATUS_FAIL, STATUS_OK

import os
import subprocess
from tempfile import TemporaryDirectory
from functools import partial

from sklearn.metrics import mean_absolute_percentage_error, r2_score

import ray
from ray import tune
from ray.tune.search.hyperopt import HyperOptSearch
from ray.tune.search.bayesopt import BayesOptSearch

from ray.tune.search.optuna import OptunaSearch
import optuna

In [95]:
def nse(predictions, targets):
    return 1 - (np.sum((targets - predictions) ** 2) / np.sum((targets - np.mean(targets)) ** 2))

In [96]:
def run_single_model(
    X: dict[str, float],
    in_names: list,
    out_names: list,
    params: pd.DataFrame,
    POP_NUM: int,
    CONFIG_FILE: str,
    MODEL_DIR: str,
    ground
) -> list[np.ndarray]:
    
    results = {}
    
    with TemporaryDirectory() as tmp:

        # Get unique TMP_DIR and make directory for specific process
        TMP_PARAM_FILE = f"{tmp}/params.csv"

        # Overwrite parameters with sample params
        for i, name in enumerate(in_names):
            params.at[POP_NUM - 1, name] = X[name]

        # Setup parameter, configuration, and output files
        params.to_csv(TMP_PARAM_FILE, index=False)

        # Check if verbosity is enabled for saving model stdout
        out = subprocess.DEVNULL

        CONFIG_FILE = os.path.abspath(CONFIG_FILE)

        p = subprocess.run(
            [
                "./run",
                TMP_PARAM_FILE,
                CONFIG_FILE,
                str(POP_NUM),
                tmp
            ],
            cwd=MODEL_DIR,
            stdout=out,
            stderr=out
        )

        if p.returncode != 0:
            results['status'] = STATUS_FAIL
        else:
            results['status'] = STATUS_OK

        # Get species, region, and site to determine output file
        species = params.at[POP_NUM - 1, 'i_sp']
        region = params.at[POP_NUM - 1, 'i_region']
        site = params.at[POP_NUM - 1, 'i_site']

        output_file = os.path.join(
            tmp, f"timesteps_output_{species}_{region}_{site}.csv"
        )
        if not os.path.exists(output_file):
            raise FileNotFoundError(
                f"Expected output file not found: {output_file}"
            )

        output_file = pd.read_csv(output_file)

        output = output_file[out_names].to_numpy(dtype=float)  # T x Y_D
            
        def calc_errors(
            outputs,
            ground,
            start_day,
            end_day
        ):

            errors = {}
            for idx, output_name in enumerate(out_names):

                # Filter ground data based on julian-day and drop NaN values
                col_ground = ground[
                    ground['julian-day'].between(start_day, end_day)
                ][output_name].dropna()

                # Align predictions with the filtered ground data
                col_pred = outputs[:, idx]  # (T)
                col_pred = pd.DataFrame(col_pred)
                pred_values = col_pred.loc[col_ground.index].T.to_numpy().squeeze(axis=0)

                ground_values = np.array([col_ground.to_numpy()]).squeeze(axis=0)

                # err = mean_absolute_percentage_error(ground_values, pred_values)
                # err = nse(ground_values, pred_values)
                err = r2_score(ground_values, pred_values)
                errors[output_name] = err

            return errors

        errors = calc_errors(output, ground, 201, 237)

        # loss = np.mean([v for v in errors.values()])
        # loss = errors['leaftemp']
        # results['loss'] = loss

        results['P-PD'] = errors['P-PD']
        results['P-MD'] = errors['P-MD']

    return results

In [None]:
in_names = [
    "i_fieldCapFrac",
    "i_fieldCapPercInit",
    "i_leafAreaIndex",
    "i_rootBeta",
    'i_kmaxTree'
]
out_names = [
    "P-PD", 
    "P-MD",
    # "GW",
    # "K-plant",
    # "E-MD"
]
# out_names = ['leaftemp']
params = pd.read_csv("./DBG/parameters.csv")
POP_NUM = 1
CONFIG_FILE = os.path.abspath("./DBG/configuration.csv")
MODEL_DIR = os.path.abspath("./garisom/02_program_code/")

In [98]:
ground = None
match POP_NUM:
    case 1:
        ground = pd.read_csv(os.path.abspath("data/ccr_hourly_data.csv"))
        # ground = pd.read_csv(os.path.abspath("data/ccr_leaftemp.csv"))
    case 2:
        ground = pd.read_csv(os.path.abspath("data/jla_hourly_data.csv"))
    case 3:
        ground = pd.read_csv(os.path.abspath("data/tsz_hourly_data.csv"))
    case 4:
        ground = pd.read_csv(os.path.abspath("data/nrv_hourly_data.csv"))
    case _:
        raise Exception("Incorrect POP_NUM!")

In [99]:
objective = partial(
    run_single_model,
    in_names=in_names,
    out_names=out_names,
    params=params,
    POP_NUM=POP_NUM,
    CONFIG_FILE=CONFIG_FILE,
    MODEL_DIR=MODEL_DIR,
    ground=ground
)

In [100]:
def model(config):
    res = objective(config)['loss']
    tune.report({'score' : res})

In [101]:
def hyperopt_model(config):
    res = objective(config)
    tune.report(res)

In [102]:
config = {
    'i_leafAreaIndex' : tune.uniform(0.2192, 7.0008),
    'i_rootBeta' : tune.uniform(0.9, 1),
    'i_fieldCapFrac' : tune.uniform(0, 1),
    'i_fieldCapPercInit' : tune.uniform(0, 100),
    'i_kmaxTree' : tune.uniform(14, 463)
}
current_best_param = [{
    'i_leafAreaIndex' : 3.61,
    'i_rootBeta' : 0.9996,
    'i_fieldCapFrac' : 0.06,
    'i_fieldCapPercInit' : 81,
    'i_kmaxTree' : 410
}]

In [103]:
# trials = Trials()

# space = {
#     'i_leafAreaIndex' : hp.uniform('i_leafAreaIndex', 0.2192, 7.0008),
#     'i_rootBeta' : hp.uniform('i_rootBeta', 0.9, 1),
#     'i_fieldCapFrac' : hp.uniform('i_fieldCapFrac', 0, 1),
#     'i_fieldCapPercInit' : hp.uniform('i_fieldCapPercInit', 0, 100),
#     'i_kmaxTree' : hp.uniform('i_kmaxTree', 14, 463)
# }

# tpe_algo = tpe.suggest

# best = fmin(fn=objective,
#             space=space,
#             algo=tpe_algo,
#             trials=trials,
#             max_evals=1000,
#             rstate=np.random.default_rng(42))

# best

In [104]:
ray.shutdown()
ray.init(ignore_reinit_error=True, num_cpus=30)

2025-07-07 15:18:51,850	INFO worker.py:1917 -- Started a local Ray instance.


0,1
Python version:,3.13.2
Ray version:,2.47.1


In [105]:
# search = HyperOptSearch(metric="score", mode='min', points_to_evaluate=current_best_param)
# search = BayesOptSearch(metric="score", mode='min', points_to_evaluate=current_best_param)
search = OptunaSearch(
    metric=['P-PD', 'P-MD'],
    mode=['max', 'max'],
    points_to_evaluate=current_best_param
)

In [None]:
tuner = tune.Tuner(
    hyperopt_model,
    tune_config=tune.TuneConfig(
        search_alg=search,
        num_samples=500
    ),
    run_config=tune.RunConfig(
        name="garisom_hyperparam_search",
        storage_path=os.getcwd()
    ),
    param_space=config
)

In [107]:
results = tuner.fit()

0,1
Current time:,2025-07-07 15:30:59
Running for:,00:12:07.18
Memory:,12.6/16.0 GiB

Trial name,status,loc,i_fieldCapFrac,i_fieldCapPercInit,i_kmaxTree,i_leafAreaIndex,i_rootBeta,iter,total time (s),P-PD,P-MD
hyperopt_model_943c124f,TERMINATED,127.0.0.1:21495,0.06,81.0,410.0,3.61,0.9996,1,4.31993,-0.479883,-0.609061
hyperopt_model_8c7e47ce,TERMINATED,127.0.0.1:21506,0.229549,23.366,204.992,0.943697,0.927639,1,4.97727,-11.0764,-0.488181
hyperopt_model_04fad456,TERMINATED,127.0.0.1:21533,0.133938,13.053,139.615,2.05921,0.950897,1,0.0900178,-18.3638,-89.0874
hyperopt_model_64e981b4,TERMINATED,127.0.0.1:21535,0.886331,12.3934,223.346,2.97367,0.94273,1,4.90392,-15.2985,-0.686711
hyperopt_model_6d45255a,TERMINATED,127.0.0.1:21538,0.917338,84.0763,409.17,5.79277,0.912374,1,5.38653,-11.1031,-2.51034
hyperopt_model_0816e716,TERMINATED,127.0.0.1:21544,0.283913,2.12077,434.768,0.631637,0.974612,1,0.0928929,-18.3638,-89.0874
hyperopt_model_6dfc213c,TERMINATED,127.0.0.1:21552,0.841812,8.07747,184.136,6.66677,0.917418,1,4.49768,-14.564,-0.803641
hyperopt_model_eeb804d2,TERMINATED,127.0.0.1:21559,0.0512106,56.0387,430.136,1.55675,0.99588,1,4.75859,-11.0106,-2.54149
hyperopt_model_aa22419e,TERMINATED,127.0.0.1:21562,0.0893224,27.1627,394.084,3.54261,0.96255,1,5.24112,-31.8635,-2.14458
hyperopt_model_797e3df8,TERMINATED,127.0.0.1:21574,0.249375,31.6819,421.688,4.19943,0.94691,1,5.33837,-7.96908,-2.60635


2025-07-07 15:30:59,651	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/Users/colinpannikkat/Documents/schoolwork/FEL/garisom/03_test_data/fremont-poplar-data/garisom_hyperparam_search' in 0.1907s.
2025-07-07 15:30:59,708	INFO tune.py:1041 -- Total run time: 727.26 seconds (726.98 seconds for the tuning loop).


In [109]:
display(results.get_best_result("P-PD", "max"))
display(results.get_best_result("P-MD", "max").config)

Result(
  metrics={'status': 'ok', 'P-PD': -0.0037138962442742063, 'P-MD': 0.09269597421387055},
  path='/Users/colinpannikkat/Documents/schoolwork/FEL/garisom/03_test_data/fremont-poplar-data/garisom_hyperparam_search/hyperopt_model_85d0575e_488_i_fieldCapFrac=0.0441,i_fieldCapPercInit=95.9813,i_kmaxTree=21.7049,i_leafAreaIndex=4.8172,i_rootBeta=_2025-07-07_15-30-36',
  filesystem='local',
  checkpoint=None
)

{'i_leafAreaIndex': 4.905114404239196,
 'i_rootBeta': 0.9300485222503206,
 'i_fieldCapFrac': 0.04723708484739657,
 'i_fieldCapPercInit': 97.87318689475508,
 'i_kmaxTree': 38.851930323089974}