# Bayesian Personalized Ranking Trees
* Creates a model for pairwise classification

In [None]:
using LightGBM
import NBInclude: @nbinclude
@nbinclude("BPRBase.ipynb");

## Lightgbm Datasets

In [None]:
# TODO move to a shared LGBM package

function augment_dataset(ds, y, w)
    LightGBM.LGBM_DatasetSetField(ds, "label", y)
    LightGBM.LGBM_DatasetSetField(ds, "weight", w)
    ds
end

function create_train_dataset(X, y, w, estimator)
    augment_dataset(
        LightGBM.LGBM_DatasetCreateFromMat(X, LightGBM.stringifyparams(estimator), false),
        y,
        w,
    )
end

function create_test_dataset(X, y, w, estimator, train_ds)
    augment_dataset(
        LightGBM.LGBM_DatasetCreateFromMat(
            X,
            LightGBM.stringifyparams(estimator),
            train_ds,
            false,
        ),
        y,
        w,
    )
end;

In [None]:
function get_pairwise_dataset(
    split,
    user_features,
    training;
    batch_size = 1024,
    epochs = 10000,
)
    @info "getting pairwise dataset"
    Xs = []
    ys = []
    @showprogress for _ = 1:epochs
        batch = get_batch(split, user_features, batch_size, training = training)
        push!(Xs, batch[1][1]')
        push!(ys, batch[1][2]')
    end
    X = vcat(Xs...)
    y = vec(vcat(ys...))
    w = copy(y)
    w .= 1
    X, y, w
end;

## Save Model

In [None]:
function train_alpha(outdir)
    set_logging_outdir(outdir)
    ensemble_alphas = [
        "Explicit"
        "LinearExplicit"
        "LinearImplicit"
        "ErrorExplicit"
        "ErrorImplicit"
    ]
    all_alphas = [
        ensemble_alphas
        explicit_raw_alphas
        implicit_raw_alphas
        nondirectional_raw_alphas
    ]

    training, test, user_features = get_data(all_alphas)
    estimator = LGBMClassification(
        objective = "binary",
        num_iterations = 1000,
        learning_rate = 0.01,
        early_stopping_round = 10,
        feature_fraction = 0.8,
        bagging_fraction = 0.9,
        bagging_freq = 1,
        num_leaves = 1000,
        num_class = 1,
        metric = ["binary_logloss", "auc"],
    )
    X_train, y_train, w_train = get_pairwise_dataset(training, user_features, false)
    X_test, y_test, w_test = get_pairwise_dataset(test, user_features, false)

    train_ds = create_train_dataset(X_train, y_train, w_train, estimator)
    test_ds = create_test_dataset(X_test, y_test, w_test, estimator, train_ds)
    fit!(estimator, train_ds, test_ds)
    write_params(Dict("model" => estimator, "alphas" => all_alphas), outdir)
end;

In [None]:
train_alpha("BPRT")

In [None]:
# Iteration: 1000, test_1's binary_logloss: 0.11233399764707044, 
# Iteration: 1000, test_1's auc: 0.992262893711801