In [None]:
using Random
using CSV
using MLJ
using MLJDecisionTreeInterface
using DecisionTree
using DelimitedFiles
using DataFrames
import DataFramesMeta as DFM

In [None]:
features_csv = "../../data/features.csv"
data, header = readdlm(features_csv, ',', header=true)
df = DataFrame(data, vec(header))

select!(df, Not([
    :CountHandWood,
    :CountHandBrick,
    :CountHandPasture,
    :CountHandStone,
    :CountHandGrain,
    :HasMostPoints,
    :CountVictoryPoint
    ]))

In [None]:
coerce!(df, :WonGame => Multiclass{2})
df = DFM.@transform(df, :WonGame)
df_train, df_test = partition(df, 0.1, rng=123)

y, X = unpack(df_train, ==(:WonGame));
y_test, X_test = unpack(df_test, ==(:WonGame));

In [None]:
function get_features_labels(df, label=:WonGame)
    labels = df[!,label]
    features = Matrix(df[!, setdiff(names(df), [String(label)])])
    return labels, features
end

(labels, features) = get_features_labels(df_train)
(l_test, f_test) = get_features_labels(df_test)

#Matrix(df)
#labels = convert(Matrix, y)

In [None]:
model = build_forest(labels, features)#, 2, 10, 0.5, 6)

In [None]:
y_pred = apply_forest(model, f_test)
p_pred = apply_forest_proba(model, f_test, [0.0, 1.0])

In [None]:
for c=sort(collect(zip(names(df_train), impurity_importance(model))), by= t -> -t[2])
    println(c)
end

In [None]:
models("boost")

In [None]:


gboost = Base.invokelatest(@load GradientBoostingClassifier pkg=MLJScikitLearnInterface verbosity=0)

loaded_models = Base.invokelatest.(
    [
        (@load RandomForestClassifier pkg=DecisionTree verbosity=0),
        (@load GradientBoostingClassifier pkg=MLJScikitLearnInterface verbosity=0)
    ]
)
#thresholded_models = BinaryThresholdPredictor.(loaded_models, threshold=0.5)
#r = range(thresholded_models, :threshold, lower=0.1, upper=0.9)

In [None]:
Tree = @load RandomForestClassifier pkg=DecisionTree verbosity=0
tree = Base.invokelatest(Tree)

thresholded_tree = BinaryThresholdPredictor(tree, threshold=0.5)
ranges = [
    range(thresholded_tree, :threshold, lower=0.1, upper=0.9),
    range(thresholded_tree, :(model.min_purity_increase), lower=0.0, upper=0.9),
    range(thresholded_tree, :(model.min_samples_leaf), lower=4, upper=10),
    range(thresholded_tree, :(model.min_samples_split), lower=2, upper=8),
    #range(thresholded_tree, :(model.partial_sampling), lower=0.5, upper=0.9),
    range(thresholded_tree, :(model.n_trees), lower=5, upper=20)
]

tuned_tree = TunedModel(
    thresholded_tree,
    tuning=RandomSearch(),
    resampling=CV(nfolds=6),
    range = ranges,
    measure = MatthewsCorrelation(),
    n=100
)
#mach = machine(tree, X, y) |> MLJ.fit!

In [None]:
levels!(y, [0.0, 1.0])
tuned_mach = machine(tuned_tree, X, y) |> MLJ.fit!
report(tuned_mach)


In [None]:
optimized_tree = report(tuned_mach).best_model
optimized_tree

In [None]:
mach = machine(optimized_tree, X_test, y_test) |> MLJ.fit!
y_hat = predict_proba(mach, X_test)

In [None]:
convert(Int64, y_hat[1])
#v = coerce([0.0, 1.0], Multiclass)
#pdf(y_hat, levels(v))

In [None]:
using Plots
plot(tuned_mach)

In [None]:
#MLJ.feature_importances(tuned_mach.model.model.model, tuned_mach.fitresult , report(tuned_mach))
#impurity_importance(tuned_mach.model.model.model)
fitted_params(tuned_mach)[1].model

In [None]:
m = MatthewsCorrelation()
m(mode.(y_hat), y_test)

In [None]:
mlj_m = tuned_mach.model.model.model

In [None]:
convert(DecisionTree.RandomForestClassifier, mlj_m)

In [None]:
MLJ.save("model20250407.jls", tuned_mach)

In [None]:
y_hat