# Model Tuning

In [None]:
import os
import pandas as pd
from addmo.util.definitions import results_dir_model_tuning, results_dir_data_tuning_auto, results_dir_data_tuning_fixed
from addmo.util.load_save_utils import root_dir
from addmo.util.experiment_logger import LocalLogger
from addmo.util.experiment_logger import WandbLogger
from addmo.util.experiment_logger import ExperimentLogger
from addmo.s3_model_tuning.models.keras_models import SciKerasSequential
from addmo.s3_model_tuning.config.model_tuning_config import ModelTuningExperimentConfig
from addmo.s3_model_tuning.config.model_tuning_config import ModelTunerConfig
from addmo.s3_model_tuning.model_tuner import ModelTuner
from addmo.util.load_save import load_data
from addmo.util.load_save import load_config_from_json
from addmo.util.data_handling import split_target_features
from addmo.s5_insights.model_plots.scatter_plot import scatter
from addmo.util.plotting_utils import save_pdf


Please define the missing TODOs in the section below according to the docstrings.


In [None]:
"""
Executes model tuning process and returns the best model.
Parameters:
    user_input : str, optional
        If 'y', the contents of the target results directory will be overwritten.
        If 'd', the directory contents will be deleted. Default is 'y'.
    config_exp : DataTuningExperimentConfig
    config_tuner : ModelTunerConfig
"""
user_input = 'y'
config_exp = None
config_tuner = None


Configure the logger


In [None]:
LocalLogger.active = True
if LocalLogger.active:
    LocalLogger.directory = results_dir_model_tuning( config_exp,user_input)
WandbLogger.project = "addmo-test_model_tuning"
WandbLogger.active = False
if WandbLogger.active:
    WandbLogger.directory = results_dir_model_tuning(config_exp,user_input)


Initialize logging


In [None]:
ExperimentLogger.start_experiment(config=config_exp)


Create the model tuner


In [None]:
model_tuner = ModelTuner(config=config_tuner)


Load the system_data


In [None]:
xy_tuned = load_data(config_exp.abs_path_to_data)


Select training and validation period


In [None]:
if config_exp.start_train_val and config_exp.stop_train_val:
    xy_tuned_train_val = xy_tuned.loc[config_exp.start_train_val:config_exp.stop_train_val]
else:
    xy_tuned_train_val = xy_tuned
x_train_val, y_train_val = split_target_features(config_exp.name_of_target, xy_tuned_train_val)


log start and end of the system_data


In [None]:
ExperimentLogger.log({"xy_tuned_train_val": pd.concat([xy_tuned_train_val.head(5), xy_tuned_train_val.tail(5)])})


Tune the models


In [None]:
model_dict = model_tuner.tune_all_models(x_train_val, y_train_val)


Get the best model


In [None]:
best_model_name = model_tuner.get_best_model_name(model_dict)
best_model = model_tuner.get_model(model_dict, best_model_name)
y_pred = best_model.predict(x_train_val)


Log the best model


In [None]:
if isinstance(best_model, SciKerasSequential):
    art_type = 'keras'
else:
    art_type = 'joblib'
name = 'best_model'
ExperimentLogger.log_artifact(best_model, name, art_type)
saved_data_name = config_exp.abs_path_to_data.split(".")[0]
ExperimentLogger.log_artifact(xy_tuned,saved_data_name , "system_data")
plt = scatter(y_train_val, y_pred, config_exp.name_of_target, best_model.fit_error)
save_pdf(plt, os.path.join(LocalLogger.directory, 'model_fit_scatter'))
plt.show()


print("Finished")
