### Generate Figures 2 and 3 from the database which contains all of the models

In [3]:
%%time
from utils_aa import *


study = optuna.create_study(
    study_name='study_real',
    storage="sqlite:///alejandro.db",
    sampler=optuna.samplers.NSGAIISampler(), # NSGAIISampler(),
    load_if_exists=True, 
)


study_sampling = optuna.create_study(
    study_name='study_real_sampling',
    storage="sqlite:///alejandro.db",
    sampler=optuna.samplers.NSGAIISampler(), # NSGAIISampler(),
    load_if_exists=True,
)



make_pareto_plot(study_sampling)

make_ranking_plots(
    study_sampling, get_median_dict(study), data ="data/data.csv", CUTOFF_AUC =.9, CUTOFF_PHE=.0)

[32m[I 2023-05-28 15:10:40,788][0m Using an existing study with name 'study_real' instead of creating a new one.[0m
[32m[I 2023-05-28 15:10:40,815][0m Using an existing study with name 'study_real_sampling' instead of creating a new one.[0m


CPU times: user 8.65 s, sys: 5.12 s, total: 13.8 s
Wall time: 7.15 s


## Alternative: Recreate and re-run models locally. 

In [2]:

%%time
from utils_aa import *

with open("params.yml", "r") as f:
    ext_params = yaml.load(f, Loader=yaml.FullLoader)
    
def objective(trial, data ,tuned_params = None, finetunning: bool = False) -> float:
    """
    """

    if  finetunning:
        seed = random.randint(1, 10_000)

        params={
                "objective":   "binary:logistic",
                "eval_metric": "logloss",
                'max_depth':   trial.suggest_int("max_depth", 2, 6, ),
                "eta":         trial.suggest_float("eta", 0.01, 0.3),
                "subsample":   trial.suggest_float("subsample", 0.5, 0.9),
                "lambda": trial.suggest_float("lambda", 0, 1),
                "alpha": trial.suggest_float("alpha",0,1),
                "scale_pos_weight": trial.suggest_float("scale_pos_weight",0,2)
            }
    else:
        seed = trial.suggest_int("seed", 1, 10_000)
        params = tuned_params

    model_instance = generate_model(
        "HOMA-IR alterado",
        data, #
        removed_features=ext_params["feature_engineering"]["removed_features"],
        xg_params=params,
        kfold_splits=5,
        seed=seed,
    )

    return (
        model_instance.get_AUC_on_test_data(),
        model_instance.get_feature_metrics()[ext_params['metrics'][0]][ext_params['metrics'][1]],
    )
    
    
    
hyperparameters_fine_tuning = optuna.create_study(
    directions=ext_params["directions"],
    sampler=optuna.samplers.NSGAIISampler()
    )

hyperparameters_fine_tuning.optimize(lambda trial: objective(trial, "data/data.csv", 
                                                             finetunning = True), n_trials=5000, n_jobs=-1)


[32m[I 2023-05-28 15:08:39,279][0m A new study created in memory with name: no-name-d1d2a561-341f-4d50-a0cf-402e19f4d944[0m
[32m[I 2023-05-28 15:08:40,867][0m Trial 6 finished with values: [0.7692307692307692, 0.0] and parameters: {'max_depth': 2, 'eta': 0.2696207289818098, 'subsample': 0.6657685314791127, 'lambda': 0.3135868768831108, 'alpha': 0.18465005871870666, 'scale_pos_weight': 0.2881265447265067}. [0m
[32m[I 2023-05-28 15:08:40,915][0m Trial 0 finished with values: [0.8269230769230769, 0.0] and parameters: {'max_depth': 6, 'eta': 0.27798723696149, 'subsample': 0.689683364034369, 'lambda': 0.19299268499659938, 'alpha': 0.8273727907619026, 'scale_pos_weight': 0.5240419230854463}. [0m
[32m[I 2023-05-28 15:08:41,009][0m Trial 13 finished with values: [0.6923076923076923, 0.0] and parameters: {'max_depth': 2, 'eta': 0.22716659232252673, 'subsample': 0.667450830525906, 'lambda': 0.3512406191974735, 'alpha': 0.27157909828361804, 'scale_pos_weight': 0.9608222325011924}. [0m

KeyboardInterrupt: 

In [None]:
models_sampling = optuna.create_study(
    directions=ext_params["directions"],
    sampler=optuna.samplers.NSGAIISampler()
    )




In [None]:

models_sampling.optimize(lambda trial: objective(trial, "data/data.csv",  tuned_params = get_median_dict(hyperparameters_fine_tuning),
                                                 finetunning = False), n_trials=1000, n_jobs=-1)



In [None]:


make_pareto_plot(models_sampling)
make_ranking_plots(
    models_sampling, get_median_dict(hyperparameters_fine_tuning), data ="data/data.csv", CUTOFF_AUC =.9, CUTOFF_PHE=.0)
