In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from hyperopt import hp, rand, tpe, Trials, fmin, STATUS_FAIL, STATUS_OK

import os
import subprocess
from tempfile import TemporaryDirectory
from functools import partial

from sklearn.metrics import mean_absolute_percentage_error, r2_score

import ray
from ray import tune
from ray.tune.search.hyperopt import HyperOptSearch
from ray.tune.search.bayesopt import BayesOptSearch

from ray.tune.search.optuna import OptunaSearch
import optuna

In [None]:
def nse(predictions, targets):
    denom = np.sum((targets - np.mean(targets)) ** 2)
    if denom == 0:
        return 1
    return 1 - (np.sum((targets - predictions) ** 2) / denom)

In [None]:
def run_single_model(
    X: dict[str, float],
    in_names: list,
    out_names: list,
    params: pd.DataFrame,
    POP_NUM: int,
    CONFIG_FILE: str,
    MODEL_DIR: str,
    ground
) -> list[np.ndarray]:
    
    results = {}
    
    with TemporaryDirectory() as tmp:

        # Get unique TMP_DIR and make directory for specific process
        TMP_PARAM_FILE = f"{tmp}/params.csv"

        # Overwrite parameters with sample params
        for i, name in enumerate(in_names):
            params.at[POP_NUM - 1, name] = X[name]

        # Setup parameter, configuration, and output files
        params.to_csv(TMP_PARAM_FILE, index=False)

        # Check if verbosity is enabled for saving model stdout
        out = subprocess.DEVNULL

        CONFIG_FILE = os.path.abspath(CONFIG_FILE)

        p = subprocess.run(
            [
                "./run",
                TMP_PARAM_FILE,
                CONFIG_FILE,
                str(POP_NUM),
                tmp
            ],
            cwd=MODEL_DIR,
            stdout=out,
            stderr=out
        )

        if p.returncode != 0:
            results['status'] = STATUS_FAIL
        else:
            results['status'] = STATUS_OK

        # Get species, region, and site to determine output file
        species = params.at[POP_NUM - 1, 'i_sp']
        region = params.at[POP_NUM - 1, 'i_region']
        site = params.at[POP_NUM - 1, 'i_site']

        output_file = os.path.join(
            tmp, f"timesteps_output_{species}_{region}_{site}.csv"
        )
        if not os.path.exists(output_file):
            raise FileNotFoundError(
                f"Expected output file not found: {output_file}"
            )

        output_file = pd.read_csv(output_file)

        output = output_file[out_names].to_numpy(dtype=float)  # T x Y_D
            
        def calc_errors(
            outputs,
            ground,
            start_day,
            end_day
        ):

            errors = {}
            for idx, output_name in enumerate(out_names):

                # Filter ground data based on julian-day and drop NaN values
                col_ground = ground[
                    ground['julian-day'].between(start_day, end_day)
                ][output_name].dropna()

                # Align predictions with the filtered ground data
                col_pred = outputs[:, idx]  # (T)
                col_pred = pd.DataFrame(col_pred)
                pred_values = col_pred.loc[col_ground.index].T.to_numpy().squeeze(axis=0)

                ground_values = np.array([col_ground.to_numpy()]).squeeze(axis=0)

                # err = mean_absolute_percentage_error(ground_values, pred_values)
                err = nse(ground_values, pred_values)
                # err = r2_score(ground_values, pred_values)
                errors[output_name] = err

            return errors

        errors = calc_errors(output, ground, 201, 237)

        # loss = np.mean([v for v in errors.values()])
        loss = errors['leaftemp']
        results['loss'] = loss

        # results['P-PD'] = errors['P-PD']
        # results['P-MD'] = errors['P-MD']

    return results

In [None]:
in_names = [
    "i_fieldCapFrac",
    "i_fieldCapPercInit",
    "i_leafAreaIndex",
    "i_rootBeta",
    'i_kmaxTree'
]
# out_names = [
#     "P-PD", 
#     "P-MD",
#     # "GW",
#     # "K-plant",
#     # "E-MD"
# ]
out_names = ['leaftemp']
params = pd.read_csv("../DBG/parameters.csv")
POP_NUM = 1
CONFIG_FILE = os.path.abspath("../DBG/configuration.csv")
MODEL_DIR = os.path.abspath("../garisom/02_program_code/")

In [None]:
ground = None
match POP_NUM:
    case 1:
        ground = pd.read_csv(os.path.abspath("../data/ccr_hourly_data.csv"))
        # ground = pd.read_csv(os.path.abspath("data/ccr_leaftemp.csv"))
    case 2:
        ground = pd.read_csv(os.path.abspath("../data/jla_hourly_data.csv"))
    case 3:
        ground = pd.read_csv(os.path.abspath("../data/tsz_hourly_data.csv"))
    case 4:
        ground = pd.read_csv(os.path.abspath("../data/nrv_hourly_data.csv"))
    case _:
        raise Exception("Incorrect POP_NUM!")

In [None]:
objective = partial(
    run_single_model,
    in_names=in_names,
    out_names=out_names,
    params=params,
    POP_NUM=POP_NUM,
    CONFIG_FILE=CONFIG_FILE,
    MODEL_DIR=MODEL_DIR,
    ground=ground
)

In [None]:
def model(config):
    res = objective(config)['loss']
    tune.report({'score' : res})

In [None]:
def hyperopt_model(config):
    res = objective(config)
    tune.report(res)

In [None]:
config = {
    'i_leafAreaIndex' : tune.uniform(0.2192, 7.0008),
    'i_rootBeta' : tune.uniform(0.9, 1),
    'i_fieldCapFrac' : tune.uniform(0, 1),
    'i_fieldCapPercInit' : tune.uniform(0, 100),
    'i_kmaxTree' : tune.uniform(14, 463)
}
current_best_param = [{
    'i_leafAreaIndex' : 3.61,
    'i_rootBeta' : 0.9996,
    'i_fieldCapFrac' : 0.06,
    'i_fieldCapPercInit' : 81,
    'i_kmaxTree' : 410
}]

In [None]:
trials = Trials()

space = {
    'i_leafAreaIndex' : hp.uniform('i_leafAreaIndex', 0.2192, 7.0008),
    'i_rootBeta' : hp.uniform('i_rootBeta', 0.9, 1),
    'i_fieldCapFrac' : hp.uniform('i_fieldCapFrac', 0, 1),
    'i_fieldCapPercInit' : hp.uniform('i_fieldCapPercInit', 0, 100),
    'i_kmaxTree' : hp.uniform('i_kmaxTree', 14, 463)
}

tpe_algo = tpe.suggest

best = fmin(fn=objective,
            space=space,
            algo=tpe_algo,
            trials=trials,
            max_evals=1000,
            rstate=np.random.default_rng(42))

best

In [None]:
# ray.shutdown()
# ray.init(ignore_reinit_error=True, num_cpus=30)

In [None]:
# # search = HyperOptSearch(metric="score", mode='min', points_to_evaluate=current_best_param)
# # search = BayesOptSearch(metric="score", mode='min', points_to_evaluate=current_best_param)
# search = OptunaSearch(
#     metric=['P-PD', 'P-MD'],
#     mode=['max', 'max'],
#     points_to_evaluate=current_best_param
# )

In [None]:
# tuner = tune.Tuner(
#     hyperopt_model,
#     tune_config=tune.TuneConfig(
#         search_alg=search,
#         num_samples=500
#     ),
#     run_config=tune.RunConfig(
#         name="garisom_hyperparam_search",
#         storage_path=os.getcwd()
#     ),
#     param_space=config
# )

In [None]:
# results = tuner.fit()

In [None]:
# display(results.get_best_result("P-PD", "max"))
# display(results.get_best_result("P-MD", "max").config)