In [None]:
from xgboost import XGBRegressor
import random

import sys

sys.path.append("../..")

from src.data_loaders import get_wake_datasets
from src.evaluation import evaluate_model
import src.plotting as plotting

In [None]:
FACTORS_FOLDER = "discr_factors_x2_30_y-2_2_step0.125_TIstep0.01_CTstep0.01"
DATA_FOLDER = f"../../data/{FACTORS_FOLDER}/"

INPUT_VAR_TO_TRAIN_REDUCTION_FACTOR = {"ti": 4, "ct": 4}
train_reduc_factor_string = "training_factors=" + "-".join(
    [f"{k}{v}" for k, v in INPUT_VAR_TO_TRAIN_REDUCTION_FACTOR.items()]
)
# INPUT_VAR_TO_TRAIN_RANGES = {'ti': [(0.15, 0.4)], 'ct': [(0.3, 0.7)]}
# train_range_string = "training_ranges=" + '-'.join([f"{var}{r[0]}-{r[1]}" for var, ranges in INPUT_VAR_TO_TRAIN_RANGES.items() for r in ranges])
CONSIDER_WS = False
COORDS_AS_INPUT = True  # univariate setting

MODEL_NAME = f"univariate_XGB_{train_reduc_factor_string}"
if CONSIDER_WS:
    MODEL_NAME += "_consider_ws"
MODEL_DESCRIPTION = f"{MODEL_NAME}_{FACTORS_FOLDER}"
print(MODEL_DESCRIPTION)

In [None]:
train_dataset, valid_dataset, test_dataset = get_wake_datasets(
    DATA_FOLDER,
    consider_ws=CONSIDER_WS,
    coords_as_input=COORDS_AS_INPUT,
    # train_perc=0.6,
    # validation_perc=0.2,
    # test_perc=0.2
    input_var_to_train_reduction_factor=INPUT_VAR_TO_TRAIN_REDUCTION_FACTOR,
)

train_x, train_y = train_dataset.inputs, train_dataset.outputs
print("Train shapes: ", train_x.shape, train_y.shape)

grid_size = train_dataset.num_cells
print(f"{grid_size=}")

In [None]:
train_x, train_y = train_dataset.inputs, train_dataset.outputs
print("Train shapes: ", train_x.shape, train_y.shape)
train_size = train_x.shape[0]

test_x, test_y = test_dataset.inputs, test_dataset.outputs
print("Test shapes: ", test_x.shape, test_y.shape)
test_size = test_x.shape[0]

valid_x, valid_y = valid_dataset.inputs, valid_dataset.outputs
print("Valid shapes: ", valid_x.shape, valid_y.shape)
valid_size = valid_x.shape[0]

size = train_size + test_size + valid_size
print(train_size / size, test_size / size, valid_size / size)

# Training

In [None]:
model = XGBRegressor()
model.fit(train_x, train_y)
# tree_text = export_text(model)
# print(tree_text)

In [None]:
importances = model.feature_importances_
for featurenum, importance in enumerate(importances):
    featurename = train_dataset.featurenum_to_featurename(featurenum)
    print(f"{featurename} feature importance={importance}")

# Evaluation and testing

In [None]:
# evaluation on trainset
save_results = True
evaluate_model(
    model,
    data=(train_x, train_y),
    data_type="train",
    model_description=MODEL_DESCRIPTION,
    save_results=save_results,
)

print("\n")

# evalution on testset
evaluate_model(
    model,
    data=(test_x, test_y),
    data_type="test",
    model_description=MODEL_DESCRIPTION,
    save_results=save_results,
)

In [None]:
num_cells = test_dataset.num_cells
num_fields = len(test_dataset) // num_cells
field_indices = list(range(num_fields))
random.shuffle(field_indices)

for field_idx in field_indices[:10]:
    ti, ct, ws, wake_field, predicted_wake_field = (
        test_dataset.get_parameters_for_plotting_univariate(model, field_idx)
    )
    plotting.plot_maps(
        test_dataset.X_grid,
        test_dataset.Y_grid,
        wake_field,
        predicted_wake_field,
        ti,
        ct,
        ws,
        error_to_plot="absolute",
    )