# Bayesian Personalized Ranking
* Creates a model for pairwise classification

In [2]:
using Flux

import CUDA
import NLopt
import Random
import NBInclude: @nbinclude
import Setfield: @set
@nbinclude("BPRBase.ipynb")
@nbinclude("EnsembleInputs.ipynb");

## Hyperparameters

In [3]:
@with_kw struct Hyperparams
    allow_ptw::Bool
    batch_size::Int
    features::Vector{String}
    l2penalty::Float32
    learning_rate::Float32
    seed::UInt64
end

function to_dict(x::Hyperparams)
    Dict(string(key) => getfield(x, key) for key ∈ fieldnames(Hyperparams))
end

function Base.string(x::Hyperparams)
    fields = [x for x in fieldnames(Hyperparams)]
    max_field_size = maximum(length(string(k)) for k in fields)
    ret = "Hyperparameters:\n"
    for f in fields
        ret *= "$(rpad(string(f), max_field_size)) => $(getfield(x, f))\n"
    end
    ret
end;

## Models

In [4]:
function build_model(features::Vector{String})
    num_inputs = length(features) * 2
    Chain(Dense(num_inputs => 256, relu), Dense(256 => 1))
end;

## Loss Functions

In [5]:
function model_loss(m, x, y)
    Flux.logitbinarycrossentropy(m(x), y)
end

function split_loss(m, iters::Int, batches::Channel)
    losses = 0.0
    @showprogress for _ = 1:iters
        losses += model_loss(m, take!(batches)[1]...)
    end
    losses / iters
end;

## Training

In [6]:
function generate_batches(user_priorities, user_features, batch_size::Int, c::Channel)
    while true
        try
            put!(c, get_batch(user_priorities, user_features, batch_size))
        catch e
            if isa(e, InvalidStateException)
                break
            end
        end
    end
end;

In [7]:
# trains a model with the given hyperparams and returns its validation loss
function train_model(
    hyp;
    max_checkpoints = 100,
    epochs_per_checkpoint = 10,
    patience = 0,
    verbose = true,
)
    if verbose
        @info "Getting data"
    end
    opt = ADAMW(hyp.learning_rate, (0.9, 0.999), hyp.l2penalty)
    Random.seed!(hyp.seed)
    m = build_model(hyp.features) |> device
    best_model = m |> cpu
    ps = Flux.params(m)
    stopper = early_stopper(max_iters = max_checkpoints, patience = patience)
    training, test, user_features = get_data(hyp.features, hyp.allow_ptw)
    batchloss(x, y) = model_loss(m, x, y)
    epoch_size = Int(round(num_users() / hyp.batch_size))

    if verbose
        @info "Setting up batches"
    end
    training_batches = Channel(48)
    test_batches = Channel(48)
    for _ = 1:max(Threads.nthreads() - 1, 1)
        Threads.@spawn generate_batches(
            training,
            user_features,
            hyp.batch_size,
            training_batches,
        )
        Threads.@spawn generate_batches(test, user_features, hyp.batch_size, test_batches)
    end

    if verbose
        @info "Training..."
    end
    losses = []
    loss = Inf
    while (!stop!(stopper, loss))
        for i = 1:epochs_per_checkpoint
            for _ = 1:epoch_size
                Flux.train!(batchloss, ps, take!(training_batches), opt)
            end
        end

        loss = split_loss(m, epoch_size, test_batches)
        push!(losses, loss)
        if loss == minimum(losses)
            best_model = m |> cpu
        end
        if verbose
            @info "loss $loss"
        end
    end

    close(training_batches)
    close(test_batches)
    best_model, minimum(losses)
end;

## Hyperparameter Tuning

In [8]:
function create_hyperparams(hyp, λ)
    hyp = @set hyp.learning_rate = 10^(λ[1] - 3)
    hyp = @set hyp.l2penalty = 10^(λ[2] - 5)
    hyp
end;

In [9]:
function optimize_hyperparams(hyp; max_evals)
    function nlopt_loss(λ, grad)
        # nlopt internally converts to float64 because it calls a c library
        λ = convert.(Float32, λ)
        _, loss = train_model(create_hyperparams(hyp, λ))
        @info "$λ $loss"
        loss
    end
    opt = NLopt.Opt(:LN_NELDERMEAD, 2)
    opt.initial_step = 1
    opt.maxeval = max_evals
    opt.min_objective = nlopt_loss
    minf, λ, ret = NLopt.optimize(opt, zeros(Float32, 2))
    numevals = opt.numevals

    @info (
        "found minimum $minf at point $λ after $numevals function calls " *
        "(ended because $ret) and saved model at"
    )
    λ
end;

## Save Model

In [10]:
function train_alpha(hyp, outdir; tune_hyperparams = true)
    set_logging_outdir(outdir)

    if tune_hyperparams
        @info "Optimizing hyperparameters..."
        λ = optimize_hyperparams(hyp; max_evals = 10)
    else
        λ = zeros(2)
    end
    hyp = create_hyperparams(hyp, λ)

    @info "Training model..."
    m, validation_loss =
        train_model(hyp; max_checkpoints = 25, epochs_per_checkpoint = 1, patience = 10)
    @info "Trained model loss: $validation_loss"

    @info "Writing alpha..."
    write_params(Dict("m" => m, "λ" => λ, "hyp" => hyp), outdir)
    @info "Wrote alpha!"
end;

In [11]:
const alphas = [
    "Explicit"
    "LinearImplicit"
    "NonlinearImplicit"
    explicit_raw_alphas[1]
    implicit_raw_alphas[1]
];

In [11]:
train_alpha(
    Hyperparams(
        allow_ptw = true,
        batch_size = 1024,
        features = alphas,
        l2penalty = NaN,
        learning_rate = NaN,
        seed = 20220609,
    ),
    "BPR.neural";
    tune_hyperparams = false,
)

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220827 12:48:54 Training model...
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220827 12:48:54 Getting data
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220827 12:48:56 getting user features
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:02[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220827 12:48:58 getting test explicit alphas
[32mProgress: 100%|███████████████████████████| Time: 0:00:00 ( 0.87 μs/it)[39mm
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220827 12:49:02 getting test implicit alphas
[32mProgress: 100%|███████████████████████████| Time: 0:00:00 ( 0.73 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220827 12:49:02 getting test negative alphas
[32mProgress: 100%|███████████████████████████| Time: 0:00:25 ( 1.47 μs/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:01:49 ( 6.25 μs/it)[39m
[38;5;6m[1m[ [

In [12]:
const ptw_alphas = [
    alphas
    [
        "LinearPtw"
        "NonlinearPtw"
        ptw_raw_alphas[1]
    ]
];

In [13]:
train_alpha(
    Hyperparams(
        allow_ptw = false,
        batch_size = 1024,
        features = ptw_alphas,
        l2penalty = NaN,
        learning_rate = NaN,
        seed = 20220609,
    ),
    "BPR.neural.ptw";
    tune_hyperparams = false,
)

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220827 22:10:12 Training model...
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220827 22:10:12 Getting data
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220827 22:10:14 getting user features
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:02[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220827 22:10:16 getting test explicit alphas
[32mProgress: 100%|███████████████████████████| Time: 0:00:00 ( 0.89 μs/it)[39mm
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220827 22:10:20 getting test implicit alphas
[32mProgress: 100%|███████████████████████████| Time: 0:00:00 ( 0.77 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220827 22:10:21 getting test negative alphas
[32mProgress: 100%|███████████████████████████| Time: 0:00:28 ( 1.63 μs/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:02:15 ( 7.74 μs/it)[39m
[32mProgress: 10