# Ranking
* This is trained to learn the partial ordering implied by each user's watches
* Items that are watched are preferred to items that have not been watched
* If two items have been watched, then the impression metadata determines
  which one, if any, is liked more
* It uses the position aware maximum likehood estimation loss  
* The inputs to this model are features generated by other models

In [1]:
import NBInclude: @nbinclude
@nbinclude("BPR.Base.ipynb");

In [2]:
function get_features(alphas::Vector{String}, split::String, content::String)
    @info "getting $split $content alphas"
    df = get_raw_split(split, content)
    A = Matrix{Float16}(undef, length(df.user), length(alphas))
    @tprogress Threads.@threads for i = 1:length(alphas)
        A[:, i] = convert.(Float16, read_raw_alpha(alphas[i], split, content).rating)
    end
    collect(A')
end;

function normalize(x::AbstractArray; dims = 1)
    T = eltype(x)
    x = convert.(Float32, x)
    μ = mean(x, dims = dims)
    σ = std(x, dims = dims, mean = μ, corrected = false)
    convert.(T, (x .- μ) ./ σ), Dict("μ" => μ, "σ" => σ)
end

function get_user_features()
    df = get_split("training", "implicit")
    sparse(df.item, df.user, convert.(Float16, df.rating), num_items(), num_users())
end

function get_features(alphas::Vector{String}, allow_ptw::Bool)
    contents = all_contents
    if !allow_ptw
        contents = filter(x -> x != "ptw", contents)
    end
    hreduce(f; agg = hcat) = reduce(agg, f("test", content) for content in contents)
    user_features = get_user_features()
    query_features = hreduce((split, content) -> get_features(alphas, split, content))
    query_features, preprocessing_data = normalize(query_features; dims = 2)
    priorities = hreduce(get_priorities)
    user_to_indexes = get_user_to_indexes([("test", content) for content in contents])
    index_to_item =
        hreduce((split, content) -> get_raw_split(split, content).item; agg = vcat)
    user_features,
    query_features,
    priorities,
    user_to_indexes,
    index_to_item,
    preprocessing_data
end

function get_embedding(
    u::Integer,
    a::Integer,
    q::Integer,
    user_features::AbstractMatrix,
    query_features::AbstractMatrix,
)
    user_features[:, u], [a], query_features[:, q]
end;

In [3]:
function build_model(hyp::Hyperparams)
    Chain(
        Join(
            vcat,
            Dense(num_items() => 32, bias = false),
            Embedding(num_items() => 32),
            identity,
        ),
        Dense(length(hyp.alphas) + 32 * 2, 64, relu),
        Dense(64 => 32, relu),
        Dense(32, 1),
    )
end;

In [4]:
alphas = [
    "LinearExplicit"
    "LinearImplicit"
    "LinearPtw"
    "Explicit"
    "NonlinearImplicit"
    "NonlinearPtw"
    explicit_raw_alphas
    implicit_raw_alphas
    ptw_raw_alphas
    nondirectional_raw_alphas
];
hyp = Hyperparams(
    allow_ptw = false,
    alphas = alphas,
    batch_size = 1024,
    input_size = -1,
    l2penalty = NaN,
    learning_rate = NaN,
    list_size = 2,
    seed = 20220609,
)
hyp = create_hyperparams(hyp, [0.0f0, 0.0f0])

Hyperparams
  allow_ptw: Bool false
  alphas: Array{String}((19,))
  batch_size: Int32 1024
  input_size: Int32 22724
  l2penalty: Float32 1.0f-5
  learning_rate: Float32 0.0003f0
  list_size: Int32 2
  seed: UInt64 0x0000000001348ac1


In [None]:
train_alpha(hyp, "BPR.Neural.Test")

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20221121 11:56:32 Training model...
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20221121 11:56:32 Initializing model
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20221121 11:56:33 Getting data
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20221121 11:57:19 getting test explicit alphas
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20221121 11:57:22 getting test implicit alphas
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20221121 11:57:24 getting test negative alphas
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20221121 11:58:17 getting test explicit priorities
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 3.10 μs/it)[39mit)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20221121 11:58:26 getting test implicit priorities
[32mProgress: 100%|███████████████████████████| Time: 0:00:01 ( 1.84 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m

In [None]:
# 0.18465150250650614 using new mle loss formulation ( -> 64 -> 32 -> 1)
# 0.1770052581855019 using input normalization
# going wider by 4x didn't help
# going deeper by 2 layers didn't help
# 0.18148092587706433 using 50% drpout make things worse
# 0.05950891730785771 by scaling the loss function down (should be a no-op)