# Bayesian Personalized Ranking
* Creates a model for pairwise classification

In [None]:
using Flux

import CUDA
import SparseArrays: sparse
import NBInclude: @nbinclude
@nbinclude("../Alpha.ipynb")
@nbinclude("EnsembleInputs.ipynb")
@nbinclude("SuppressImplicit.ipynb");

In [None]:
# support both gpu and cpu training

function device(x)
    gpu(x)
end

if !CUDA.functional()
    LinearAlgebra.BLAS.set_num_threads(Threads.nthreads())
end;

## Data Preprocessing

In [None]:
function add_priorities!(user_priorities, split, priority)
    a = get_raw_split(split)
    @showprogress for i = 1:length(a.rating)
        user_priorities[a.user[i]][a.item[i]] = priority(a.rating[i])
    end
end

function get_user_priorities()
    user_priorities = Dict{Int32,Dict{Int32,Tuple{Int32,Float32}}}()
    @showprogress for i = 1:num_users()
        user_priorities[i] = Dict{Int32,Tuple{Int32,Float32}}()
    end
    add_priorities!(user_priorities, "explicit_test", r -> (1, r))
    add_priorities!(user_priorities, "implicit_test", r -> (1, NaN32))
    add_priorities!(user_priorities, "negative_test", r -> (0, NaN32))
    user_priorities
end

function training_test_split(user_priorities; p = 0.9)
    training = Dict{Int32,Dict{Int32,Tuple{Int32,Float32}}}()
    test = Dict{Int32,Dict{Int32,Tuple{Int32,Float32}}}()
    cutoff = num_users() * p
    @showprogress for k in keys(user_priorities)
        if k < cutoff
            training[k] = user_priorities[k]
        else
            test[k] = user_priorities[k]
        end
    end
    training, test
end;

In [None]:
function add_features!(user_features, alphas, split)
    @info "getting $split alphas"
    as = []
    @showprogress for a in alphas
        implicit = occursin("Implicit", a) && !occursin("Error", a)
        push!(as, read_alpha(a, split, implicit).rating)
    end
    df = get_raw_split(split)
    @showprogress for i = 1:length(df.rating)
        user_features[df.user[i]][df.item[i]] = [a[i] for a in as]
    end
end

function get_user_features(features)
    @info "getting user features"    
    user_features = Dict{Int32,Dict{Int32,Vector{Float32}}}()
    @showprogress for i = 1:num_users()
        user_features[i] = Dict{Int32,Vector{Float32}}()
    end
    add_features!(user_features, features, "explicit_test")
    add_features!(user_features, features, "implicit_test")
    add_features!(user_features, features, "negative_test")
    user_features
end;

In [None]:
@memoize function get_data(features)
    training, test = training_test_split(get_user_priorities())
    user_features = get_user_features(features)
    training, test, user_features
end;

## Batching

In [None]:
function compare(x, y)
    if isnan(x) || isnan(y)
        return NaN
    elseif x == y
        return 0
    elseif x > y
        return 1
    else
        return -1
    end
end

function compare(x::Tuple, y::Tuple)
    results = compare.(x, y)
    for r in results
        if r == 0
            continue
        else
            return r
        end
    end
    0
end

function sample(user_priorities, user_features; training=false)
    while true
        u, items = rand(user_priorities, 1)[1]
        if length(items) > 1
            i, j = rand(items, 2)
            sign = compare(i[2], j[2])
            if !isnan(sign) && (sign != 0)
                features = user_features[u]
                x = vcat(features[i[1]], features[j[1]])
                return x, [Float32((sign + 1) / 2)]
            end
        end
    end
end

function get_batch(user_priorities, user_features, batch_size; training=false)
    Xs = []
    ys = []
    for i = 1:batch_size
        x, y = sample(user_priorities, user_features; training=training)
        push!(Xs, x)
        push!(ys, y)
    end
    [(Flux.batch(Xs) |> device, Flux.batch(ys) |> device)]
end;