# Bayesian Personalized Ranking using Trees
* Used as the ranking model of the recommender system
* This is trained to learn the partial ordering implied by each user's watches
* This is trained on pairs of items, both of which have been watched by a user
* The impression metadata determines which one, if any, is liked more

In [None]:
using LightGBM
import NBInclude: @nbinclude
@nbinclude("BPR.ipynb")
@nbinclude("EnsembleInputs.ipynb");

## Lightgbm Datasets

In [None]:
# TODO move to a shared LGBM package

function augment_dataset(ds, y, w)
    LightGBM.LGBM_DatasetSetField(ds, "label", y)
    LightGBM.LGBM_DatasetSetField(ds, "weight", w)
    ds
end

function create_train_dataset(X, y, w, estimator)
    augment_dataset(
        LightGBM.LGBM_DatasetCreateFromMat(X, LightGBM.stringifyparams(estimator), false),
        y,
        w,
    )
end

function create_test_dataset(X, y, w, estimator, train_ds)
    augment_dataset(
        LightGBM.LGBM_DatasetCreateFromMat(
            X,
            LightGBM.stringifyparams(estimator),
            train_ds,
            false,
        ),
        y,
        w,
    )
end;

In [None]:
function get_pairwise_dataset(
    split,
    user_features,
    training;
    batch_size = 1024,
    epochs = 10000,
)
    @info "getting pairwise dataset"
    Xs = [[] for _ in 1:Threads.nthreads()]
    ys = [[] for _ in 1:Threads.nthreads()]
    @tprogress Threads.@threads for _ = 1:epochs
        batch = get_batch(split, user_features, batch_size)
        push!(Xs[Threads.threadid()], cpu(batch[1][1])')
        push!(ys[Threads.threadid()], cpu(batch[1][2])')
    end
    Xs = [vcat(z...) for z in Xs]
    ys = [vcat(z...) for z in ys]
    X = vcat(Xs...)
    y = vec(vcat(ys...))
    w = copy(y)
    w .= 1
    X, y, w
end;

## Save Model

In [None]:
function train_alpha(outdir, allow_ptw)
    set_logging_outdir(outdir)
    alphas = [
        "LinearExplicit"
        "LinearImplicit"
        "Explicit"    
        "NonlinearImplicit"
        explicit_raw_alphas
        implicit_raw_alphas
        nondirectional_raw_alphas    
    ]
    if allow_ptw
        append!(alphas, ptw_raw_alphas)
        append!(alphas, ["NonlinearPtw"])
    end

    training, test, user_features = get_data(alphas, allow_ptw, false)
    estimator = LGBMClassification(
        objective = "binary",
        num_iterations = 100,
        learning_rate = 0.01,
        early_stopping_round = 10,
        feature_fraction = 0.8,
        bagging_fraction = 0.9,
        bagging_freq = 1,
        num_leaves = 1000,
        num_class = 1,
        metric = ["auc", "binary_logloss"],
    )
    X_train, y_train, w_train = get_pairwise_dataset(training, user_features, false)
    X_test, y_test, w_test = get_pairwise_dataset(test, user_features, false)

    train_ds = create_train_dataset(X_train, y_train, w_train, estimator)
    test_ds = create_test_dataset(X_test, y_test, w_test, estimator, train_ds)
    fit!(estimator, train_ds, test_ds)
    write_params(Dict("model" => estimator, "alphas" => alphas), outdir)
end;