### Generate Figures 2 and 3 from the database which contains all of the models

In [1]:
%%time
from utils_aa import *


study = optuna.create_study(
    study_name='study_real',
    storage="sqlite:///alejandro.db",
    sampler=optuna.samplers.NSGAIISampler(), # NSGAIISampler(),
    load_if_exists=True, 
)


study_sampling = optuna.create_study(
    study_name='study_real_sampling',
    storage="sqlite:///alejandro.db",
    sampler=optuna.samplers.NSGAIISampler(), # NSGAIISampler(),
    load_if_exists=True,
)



make_pareto_plot(study_sampling)

make_ranking_plots(
    study_sampling, get_median_dict(study), data ="data/data.csv", CUTOFF_AUC =.9, CUTOFF_PHE=.0)

[32m[I 2023-05-28 15:11:43,627][0m Using an existing study with name 'study_real' instead of creating a new one.[0m
[32m[I 2023-05-28 15:11:43,644][0m Using an existing study with name 'study_real_sampling' instead of creating a new one.[0m


CPU times: user 9.14 s, sys: 5.67 s, total: 14.8 s
Wall time: 8.66 s


## Alternative: Recreate and re-run models locally. 

In [2]:

%%time
from utils_aa import *

with open("params.yml", "r") as f:
    ext_params = yaml.load(f, Loader=yaml.FullLoader)
    
def objective(trial, data ,tuned_params = None, finetunning: bool = False) -> float:
    """
    """

    if  finetunning:
        seed = random.randint(1, 10_000)

        params={
                "objective":   "binary:logistic",
                "eval_metric": "logloss",
                'max_depth':   trial.suggest_int("max_depth", 2, 6, ),
                "eta":         trial.suggest_float("eta", 0.01, 0.3),
                "subsample":   trial.suggest_float("subsample", 0.5, 0.9),
                "lambda": trial.suggest_float("lambda", 0, 1),
                "alpha": trial.suggest_float("alpha",0,1),
                "scale_pos_weight": trial.suggest_float("scale_pos_weight",0,2)
            }
    else:
        seed = trial.suggest_int("seed", 1, 10_000)
        params = tuned_params

    model_instance = generate_model(
        "HOMA-IR alterado",
        data, #
        removed_features=ext_params["feature_engineering"]["removed_features"],
        xg_params=params,
        kfold_splits=5,
        seed=seed,
    )

    return (
        model_instance.get_AUC_on_test_data(),
        model_instance.get_feature_metrics()[ext_params['metrics'][0]][ext_params['metrics'][1]],
    )
    
    
    
hyperparameters_fine_tuning = optuna.create_study(
    directions=ext_params["directions"],
    sampler=optuna.samplers.NSGAIISampler()
    )

hyperparameters_fine_tuning.optimize(lambda trial: objective(trial, "data/data.csv", 
                                                             finetunning = True), n_trials=5000, n_jobs=-1)


[32m[I 2023-05-28 15:11:51,046][0m A new study created in memory with name: no-name-80400474-b5d6-4d42-94f8-4f592bc3b3ab[0m
[32m[I 2023-05-28 15:11:52,285][0m Trial 1 finished with values: [0.8846153846153846, 0.0] and parameters: {'max_depth': 3, 'eta': 0.2564093367264163, 'subsample': 0.6842330143749339, 'lambda': 0.0422897874771061, 'alpha': 0.5262983423617751, 'scale_pos_weight': 1.0825499730901695}. [0m
[32m[I 2023-05-28 15:11:52,286][0m Trial 10 finished with values: [0.5, 0.0] and parameters: {'max_depth': 4, 'eta': 0.2965400969392987, 'subsample': 0.7842180062122874, 'lambda': 0.5210434569133615, 'alpha': 0.934686584768328, 'scale_pos_weight': 0.12006701003453157}. [0m
[32m[I 2023-05-28 15:11:52,514][0m Trial 7 finished with values: [0.5, 0.0] and parameters: {'max_depth': 4, 'eta': 0.13889986714606786, 'subsample': 0.8844939467758292, 'lambda': 0.4956241782650328, 'alpha': 0.594252716137535, 'scale_pos_weight': 0.16500134783342646}. [0m
[32m[I 2023-05-28 15:11:52,

CPU times: user 29min 40s, sys: 28min 58s, total: 58min 39s
Wall time: 14min 44s


In [3]:
%%time
models_sampling = optuna.create_study(
    directions=ext_params["directions"],
    sampler=optuna.samplers.NSGAIISampler()
    )



models_sampling.optimize(lambda trial: objective(trial, "data/data.csv",  tuned_params = get_median_dict(hyperparameters_fine_tuning),
                                                 finetunning = False), n_trials=1000, n_jobs=-1)

make_pareto_plot(models_sampling)
make_ranking_plots(
    models_sampling, get_median_dict(hyperparameters_fine_tuning), data ="data/data.csv", CUTOFF_AUC =.9, CUTOFF_PHE=.0)

[32m[I 2023-05-28 15:26:35,391][0m A new study created in memory with name: no-name-9aabcc63-c0b3-466e-9e19-af0195c1f3db[0m
[32m[I 2023-05-28 15:26:43,353][0m Trial 1 finished with values: [0.7692307692307693, 0.0] and parameters: {'seed': 8213}. [0m
[32m[I 2023-05-28 15:26:43,563][0m Trial 2 finished with values: [0.75, 0.4479905366897583] and parameters: {'seed': 2709}. [0m
[32m[I 2023-05-28 15:26:43,706][0m Trial 0 finished with values: [0.7307692307692307, 0.0] and parameters: {'seed': 4606}. [0m
[32m[I 2023-05-28 15:26:43,744][0m Trial 6 finished with values: [0.7307692307692307, 0.0] and parameters: {'seed': 4897}. [0m
[32m[I 2023-05-28 15:26:43,974][0m Trial 8 finished with values: [0.9615384615384616, 0.0] and parameters: {'seed': 8368}. [0m
[32m[I 2023-05-28 15:26:43,975][0m Trial 5 finished with values: [0.8611111111111112, 0.0] and parameters: {'seed': 4455}. [0m
[32m[I 2023-05-28 15:26:44,458][0m Trial 3 finished with values: [0.6538461538461539, 0.0]

CPU times: user 11min 43s, sys: 5min 15s, total: 16min 58s
Wall time: 8min 49s
