## Generate Figures 2 and 3 from the database which contains all our models

In [1]:

from utils_aa import *


study = optuna.create_study(
    study_name='study_real',
    storage="sqlite:///alejandro.db",
    sampler=optuna.samplers.NSGAIISampler(), # NSGAIISampler(),
    load_if_exists=True, 
)


study_sampling = optuna.create_study(
    study_name='study_real_sampling',
    storage="sqlite:///alejandro.db",
    sampler=optuna.samplers.NSGAIISampler(), # NSGAIISampler(),
    load_if_exists=True,
)



make_pareto_plot(study_sampling)

make_ranking_plots(
    study_sampling, get_median_dict(study), data ="data/data.csv", CUTOFF_AUC =.9, CUTOFF_PHE=.0)

[32m[I 2023-05-06 19:32:03,414][0m Using an existing study with name 'study_real' instead of creating a new one.[0m
[32m[I 2023-05-06 19:32:03,433][0m Using an existing study with name 'study_real_sampling' instead of creating a new one.[0m


## Recreate and re-run models 

In [None]:


from utils_aa import *

with open("params.yml", "r") as f:
    ext_params = yaml.load(f, Loader=yaml.FullLoader)
    
def objective(trial, data ,tuned_params = None, finetunning: bool = False) -> float:
    """
    """

    if  finetunning:
        seed = random.randint(1, 10_000)

        params={
                "objective":   "binary:logistic",
                "eval_metric": "logloss",
                'max_depth':   trial.suggest_int("max_depth", 2, 6, ),
                "eta":         trial.suggest_float("eta", 0.01, 0.3),
                "subsample":   trial.suggest_float("subsample", 0.5, 0.9),
                "lambda": trial.suggest_float("lambda", 0, 1),
                "alpha": trial.suggest_float("alpha",0,1),
                "scale_pos_weight": trial.suggest_float("scale_pos_weight",0,2)
            }
    else:
        seed = trial.suggest_int("seed", 1, 10_000)
        params = tuned_params

    model_instance = generate_model(
        "HOMA-IR alterado",
        data, #
        removed_features=ext_params["feature_engineering"]["removed_features"],
        xg_params=params,
        kfold_splits=5,
        seed=seed,
    )

    return (
        model_instance.get_AUC_on_test_data(),
        model_instance.get_feature_metrics()[ext_params['metrics'][0]][ext_params['metrics'][1]],
    )
    
    
    
hyperparameters_fine_tuning = optuna.create_study(
    directions=ext_params["directions"],
    sampler=optuna.samplers.NSGAIISampler()
    )

hyperparameters_fine_tuning.optimize(lambda trial: objective(trial, "data/data.csv", 
                                                             finetunning = True), n_trials=5000, n_jobs=-1)


In [None]:
models_sampling = optuna.create_study(
    directions=ext_params["directions"],
    sampler=optuna.samplers.NSGAIISampler()
    )



models_sampling.optimize(lambda trial: objective(trial, "data/data.csv",  tuned_params = get_median_dict(hyperparameters_fine_tuning),
                                                 finetunning = False), n_trials=5000, n_jobs=-1)

make_pareto_plot(models_sampling)
make_ranking_plots(
    models_sampling, get_median_dict(hyperparameters_fine_tuning), data ="data/data.csv", CUTOFF_AUC =.9, CUTOFF_PHE=.0)
