In [64]:
using JLD2
using MLJ

include("../src/reduction.jl")
include("../src/ensembles.jl")

@JLD2.load "variables/train_dataset.jld2" train_inputs train_targets
@JLD2.load "variables/test_dataset.jld2" test_inputs test_targets
@JLD2.load "variables/cross_val_index.jdl2" cross_val_index


1-element Vector{Symbol}:
 :cross_val_index

### Cargamos los modelos

In [65]:
BaggingClassifier = MLJ.@load BaggingClassifier pkg=MLJScikitLearnInterface verbosity=0
AdaBoost = MLJ.@load AdaBoostStumpClassifier pkg=DecisionTree verbosity=0
EvoTreeClassifier = MLJ.@load EvoTreeClassifier pkg=EvoTrees verbosity=0
kNNClassifier = MLJ.@load KNNClassifier pkg=NearestNeighborModels verbosity=0
XGBoostClassifier = MLJ.@load XGBoostClassifier pkg=XGBoost verbosity=0
LGBMClassifier = MLJ.@load LGBMClassifier pkg=LightGBM verbosity=0
# CatBoostClassifier = MLJ.@load CatBoostClassifier pkg=CatBoost verbosity=0 #No funciona debido a paqueteria

LightGBM.MLJInterface.LGBMClassifier

### Entrenamos las diferentes configuraciones

In [66]:
configs = [Dict("modelType" => :AdaB, "n_iter" => 100), Dict("modelType" => :BaggingC, "n_estimators" => 10), Dict("modelType" => :BaggingC, "n_estimators" => 50), Dict("modelType" => :EvoT, "n_estimators" => 50), Dict("modelType" => :EvoT, "n_estimators" => 100)]
Emodels = [:AdaB, :BaggingC, :EvoT]
metrics = Dict(model => [] for model in Emodels);
for config in configs
    push!(metrics[config["modelType"]], (TrainCrossValEnsembles(config["modelType"], config, (train_inputs, train_targets), cross_val_index), config))
end;


### SelecciÃ³n de los mejores de cada tipo

In [73]:
bestconfigs = Dict();
for model in Emodels
    bestres = sort(metrics[model], by=x -> x[1][1][1], rev=true)[1]
    bestconfigs[model] = bestres
    println(metrics[model])
end;


Any[(((0.4312213279950857, 0.10981907569094637), (0.6552604079954993, 0.22144527989003301), (0.565383532164093, 0.11811587238914315), (0.5734476667677151, 0.0438603536803528)), Dict{String, Any}("modelType" => :AdaB, "n_iter" => 100))]
Any[(((0.44141925259288584, 0.11232664602194947), (0.624662757438719, 0.10120688072576423), (0.48115931185961075, 0.07900653578081827), (0.5423503292784996, 0.0828105047290537)), Dict{String, Any}("modelType" => :BaggingC, "n_estimators" => 10)), (((0.4430826345372031, 0.10746200118023104), (0.6506269233153407, 0.08286744152401068), (0.49000438332579355, 0.06845699991696672), (0.557225072684479, 0.0652903171162202)), Dict{String, Any}("modelType" => :BaggingC, "n_estimators" => 50))]
Any[(((0.42109172280226276, 0.18234460906324007), (0.6385598691987382, 0.08676276910590759), (0.4696319751501689, 0.06448331200835494), (0.5374374712780343, 0.05302476510982498)), Dict{String, Any}("modelType" => :EvoT, "n_estimators" => 50)), (((0.4228187662941957, 0.176231

### Entrenamiento en conjunto de test de las mejores combinaciones

In [69]:
for model in Emodels[1:2]
    config = bestconfigs[model][2]
    println(config, TrainEnsembles(model, config, ((train_inputs, train_targets), (test_inputs, test_targets))))
end;

Dict{String, Any}("modelType" => :AdaB, "n_iter" => 100)(0.37855787476280833, 1.0, 0.7727272727272727, 0.8717948717948718)
Dict{String, Any}("modelType" => :BaggingC, "n_estimators" => 50)(0.6698292220113852, 0.9893617021276596, 0.6914498141263941, 0.8140043763676149)


In [74]:
println(TrainEnsembles(:XGB, Dict(), ((train_inputs, train_targets), (test_inputs, test_targets))))
println(TrainEnsembles(:LGBM, Dict(), ((train_inputs, train_targets), (test_inputs, test_targets))))
# println(TrainEnsembles(:CatB, Dict(), ((train_inputs, train_targets), (test_inputs, test_targets))))


(0.5208728652751423, 1.0, 0.49740932642487046, 0.6643598615916955)
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 140297
[LightGBM] [Info] Number of data points in the train set: 9245, number of used features: 561
[LightGBM] [Info] Start training from score -1.688174
[LightGBM] [Info] Start training from score -1.763498
[LightGBM] [Info] Start training from score -1.669049
[LightGBM] [Info] Start training from score -1.781322
[LightGBM] [Info] Start training from score -1.895499
[LightGBM] [Info] Start training from score -1.990593
(0.5218216318785579, 1.0, 0.493573264781491, 0.6609294320137694)
