# Bayesian Personalized Ranking
* Creates a model for pairwise classification

In [1]:
using Flux

import CUDA
import SparseArrays: sparse
import NBInclude: @nbinclude
import NLopt
import Random
import Setfield: @set
@nbinclude("../Alpha.ipynb")
@nbinclude("EnsembleInputs.ipynb")
@nbinclude("SuppressImplicit.ipynb");

In [2]:
# support both gpu and cpu training

function device(x)
    gpu(x)
end

if !CUDA.functional()
    LinearAlgebra.BLAS.set_num_threads(Threads.nthreads())
end;

## Hyperparameters

In [3]:
@with_kw struct Hyperparams
    batch_size::Int
    features::Vector{String}
    l2penalty::Float32
    learning_rate::Float32
    seed::UInt64
end

function to_dict(x::Hyperparams)
    Dict(string(key) => getfield(x, key) for key ∈ fieldnames(Hyperparams))
end

function Base.string(x::Hyperparams)
    fields = [x for x in fieldnames(Hyperparams)]
    max_field_size = maximum(length(string(k)) for k in fields)
    ret = "Hyperparameters:\n"
    for f in fields
        ret *= "$(rpad(string(f), max_field_size)) => $(getfield(x, f))\n"
    end
    ret
end;

## Models

In [4]:
function build_model(features)
    num_inputs = length(features) * 2
    Chain(Dense(num_inputs => 256, relu), Dense(256 => 1))
end;

## Data Preprocessing

In [5]:
function add_priorities!(user_priorities, split, priority)
    a = get_raw_split(split)
    @showprogress for i = 1:length(a.rating)
        user_priorities[a.user[i]][a.item[i]] = priority(a.rating[i])
    end
end

function get_user_priorities()
    user_priorities = Dict{Int32,Dict{Int32,Tuple{Int32,Float32}}}()
    @showprogress for i = 1:num_users()
        user_priorities[i] = Dict{Int32,Tuple{Int32,Float32}}()
    end
    add_priorities!(user_priorities, "explicit_test", r -> (1, r))
    add_priorities!(user_priorities, "implicit_test", r -> (1, NaN32))
    add_priorities!(user_priorities, "negative_test", r -> (0, NaN32))
    user_priorities
end

function training_test_split(user_priorities; p = 0.9)
    training = Dict{Int32,Dict{Int32,Tuple{Int32,Float32}}}()
    test = Dict{Int32,Dict{Int32,Tuple{Int32,Float32}}}()
    cutoff = num_users() * p
    @showprogress for k in keys(user_priorities)
        if k < cutoff
            training[k] = user_priorities[k]
        else
            test[k] = user_priorities[k]
        end
    end
    training, test
end;

In [6]:
function add_features!(user_features, alphas, split)
    as = [read_alpha(a, split, occursin("Implicit", a)).rating for a in alphas]
    df = get_raw_split(split)
    @showprogress for i = 1:length(df.rating)
        user_features[df.user[i]][df.item[i]] = [a[i] for a in as]
    end
end

function get_user_features(features)
    user_features = Dict{Int32,Dict{Int32,Vector{Float32}}}()
    @showprogress for i = 1:num_users()
        user_features[i] = Dict{Int32,Vector{Float32}}()
    end
    add_features!(user_features, features, "explicit_test")
    add_features!(user_features, features, "implicit_test")
    add_features!(user_features, features, "negative_test")
    user_features
end;

## Batching

In [7]:
function compare(x, y)
    if isnan(x) || isnan(y)
        return NaN
    elseif x == y
        return 0
    elseif x > y
        return 1
    else
        return -1
    end
end

function compare(x::Tuple, y::Tuple)
    results = compare.(x, y)
    for r in results
        if r == 0
            continue
        else
            return r
        end
    end
    0
end

function sample(user_priorities, user_features)
    while true
        u, items = rand(user_priorities, 1)[1]
        if length(items) > 1
            i, j = rand(items, 2)
            sign = compare(i[2], j[2])
            if !isnan(sign) && (sign != 0)
                features = user_features[u]
                x = vcat(features[i[1]], features[j[1]])
                return x, [Float32((sign + 1) / 2)]
            end
        end
    end
end

function get_batch(user_priorities, user_features, batch_size)
    Xs = []
    ys = []
    for i = 1:batch_size
        x, y = sample(user_priorities, user_features)
        push!(Xs, x)
        push!(ys, y)
    end
    [(Flux.batch(Xs) |> device, Flux.batch(ys) |> device)]
end;

## Loss Functions

In [8]:
function model_loss(m, x, y)
    Flux.logitbinarycrossentropy(m(x), y)
end

function split_loss(m, iters, batches::Channel)
    losses = 0.0
    for _ = 1:iters
        losses += model_loss(m, take!(batches)[1]...)
    end
    losses / iters
end;

## Training

In [9]:
@memoize function get_data(features)
    training, test = training_test_split(get_user_priorities())
    user_features = get_user_features(features)
    training, test, user_features
end;

In [10]:
function generate_batches(user_priorities, user_features, batch_size, c::Channel)
    while true
        try
            put!(c, get_batch(user_priorities, user_features, batch_size))
        catch e
            if isa(e, InvalidStateException)
                break
            end
        end
    end
end;

In [11]:
# trains a model with the given hyperparams and returns its validation loss
function train_model(
    hyp;
    max_checkpoints = 100,
    epochs_per_checkpoint = 10,
    patience = 0,
    verbose = true,
)
    opt = ADAMW(hyp.learning_rate, (0.9, 0.999), hyp.l2penalty)
    Random.seed!(hyp.seed)
    m = build_model(hyp.features) |> device
    best_model = m |> cpu
    ps = Flux.params(m)
    stopper = early_stopper(max_iters = max_checkpoints, patience = patience)
    training, test, user_features = get_data(hyp.features)
    batchloss(x, y) = model_loss(m, x, y)
    epoch_size = Int(round(num_users() / hyp.batch_size))

    training_batches = Channel(64)
    test_batches = Channel(64)
    for _ = 1:max(Threads.nthreads() / 2 - 1, 1)
        Threads.@spawn generate_batches(
            training,
            user_features,
            hyp.batch_size,
            training_batches,
        )
        Threads.@spawn generate_batches(test, user_features, hyp.batch_size, test_batches)
    end

    losses = []
    loss = Inf
    while (!stop!(stopper, loss))
        for i = 1:epochs_per_checkpoint
            for _ = 1:epoch_size
                Flux.train!(batchloss, ps, take!(training_batches), opt)
            end
        end

        loss = split_loss(m, epoch_size, test_batches)
        push!(losses, loss)
        if loss == minimum(losses)
            best_model = m |> cpu
        end
        if verbose
            @info "loss $loss"
        end
    end

    close(training_batches)
    close(test_batches)
    best_model, minimum(losses)
end;

## Hyperparameter Tuning

In [12]:
function create_hyperparams(hyp, λ)
    hyp = @set hyp.learning_rate = 10^(λ[1] - 3)
    hyp = @set hyp.l2penalty = 10^(λ[2] - 5)
    hyp
end;

In [13]:
function optimize_hyperparams(hyp; max_evals)
    function nlopt_loss(λ, grad)
        # nlopt internally converts to float64 because it calls a c library
        λ = convert.(Float32, λ)
        _, loss = train_model(create_hyperparams(hyp, λ))
        @info "$λ $loss"
        loss
    end
    opt = NLopt.Opt(:LN_NELDERMEAD, 2)
    opt.initial_step = 1
    opt.maxeval = max_evals
    opt.min_objective = nlopt_loss
    minf, λ, ret = NLopt.optimize(opt, zeros(Float32, 2))
    numevals = opt.numevals

    @info (
        "found minimum $minf at point $λ after $numevals function calls " *
        "(ended because $ret) and saved model at"
    )
    λ
end;

## Save Model

In [14]:
function train_alpha(hyp, outdir; tune_hyperparams = true)
    set_logging_outdir(outdir)

    if tune_hyperparams
        @info "Optimizing hyperparameters..."
        λ = optimize_hyperparams(hyp; max_evals = 10)
    else
        λ = zeros(2)
    end
    hyp = create_hyperparams(hyp, λ)

    @info "Training model..."
    m, validation_loss =
        train_model(hyp; max_checkpoints = 1000, epochs_per_checkpoint = 1, patience = 10)
    @info "Trained model loss: $validation_loss"

    @info "Writing alpha..."
    write_params(Dict("m" => m, "λ" => λ, "hyp" => hyp), outdir)
    @info "Wrote alpha!"
end;

In [15]:
restriced_alphas = [
    "Explicit"
    "LinearImplicit"
    "ErrorExplicit"
    "ErrorImplicit"
    "ExplicitUserItemBiases"
    "NeuralImplicitUserItemBiases"
]
ensemble_alphas = [
    "Explicit"
    "LinearExplicit"
    "LinearImplicit"
    "ErrorExplicit"
    "ErrorImplicit"
]
all_alphas = [
    ensemble_alphas
    explicit_raw_alphas
    implicit_raw_alphas
];

In [16]:
train_alpha(
    Hyperparams(
        batch_size = 1024,
        features = restriced_alphas,
        l2penalty = NaN,
        learning_rate = NaN,
        seed = 20220609,
    ),
    "BPR";
    tune_hyperparams = false,
)

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220704 21:25:41 Training model...
[38;5;6m[1m┌ [22m[39m[38;5;6m[1mInfo: [22m[39m20220704 21:25:41 The GPU function is being called but the GPU is not accessible. 
[38;5;6m[1m└ [22m[39mDefaulting back to the CPU. (No action is required if you want to run on the CPU).
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:02[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:19[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:12[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:01:03[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:01[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:04[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:32[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:22[39m
[32mProg

In [17]:
# 0.1206148589547739