In [1]:
name = "CombineImplicitSignals"
residual_alphas = [];

In [2]:
using NBInclude
@nbinclude("Alpha.ipynb");

## Compute priors

In [3]:
training = get_split("training");

In [4]:
function get_base_uniform_prior()
    return fill(1 / num_items(), num_items())
end

function get_uniform_prior(split)
    fill(1.0f0 / num_items(), length(split.item))
end;

In [5]:
function get_base_popularity_prior()
    popularity_prior = zeros(Float32, maximum(training.item), Threads.nthreads())
    @tprogress Threads.@threads for i in training.item
        popularity_prior[i, Threads.threadid()] += 1
    end
    popularity_prior = sum(popularity_prior, dims = 2)
    popularity_prior = popularity_prior ./ sum(popularity_prior)
    popularity_prior
end

function get_popularity_prior(split)
    popularity_prior = get_base_popularity_prior()

    split_prior = zeros(Float32, length(split.rating))
    @tprogress Threads.@threads for j = 1:length(split_prior)
        split_prior[j] = popularity_prior[split.item[j]]
    end
    split_prior
end;

## Load alphas

In [6]:
alphas = [
    ["GNN.$K.Implicit" for K = 1:1]
    ["GNN2.$K.Implicit" for K = 1:1]
    ["GNN3.$K.Implicit" for K = 1:1]
    ["GNN.Implicit.$K" for K = 1:1]
]

4-element Vector{String}:
 "GNN.1.Implicit"
 "GNN2.1.Implicit"
 "GNN3.1.Implicit"
 "GNN.Implicit.1"

In [7]:
function get_indep(split, alphas)
    users = get_split(split).user
    X = zeros(Float32, length(users), length(alphas))
    @showprogress for j = 1:length(alphas)
        X[:, j] = get_alpha(alphas[j], split).rating
    end
    split_df = get_split(split)
    X = hcat(X, get_uniform_prior(split_df), get_popularity_prior(split_df))
    X
end;

In [8]:
X = get_indep("validation", alphas);

[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:01[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:02 ( 0.21 μs/it)[39m/it)[39m


In [9]:
for j = 1:size(X)[2]
    @debug sparse_crossentropy(X[:, j])
end

[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220404 05:33:01 6.6619294933991435
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220404 05:33:01 6.706953164943381
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220404 05:33:01 6.696576401161358
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220404 05:33:01 6.470029678062337
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220404 05:33:01 9.849664724534657
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220404 05:33:02 7.532273907683687


## Train a linear model

In [10]:
function loss(coefs)
    coefs = coefs .^ 2
    if sum(coefs) == 0
        coefs .+= 1 # prevent division by zero
    end
    coefs = coefs ./ sum(coefs)

    y = sum(X .* coefs', dims = 2)
    sparse_crossentropy(y)
end;

In [11]:
res = optimize(
    loss,
    fill(1.0, size(X)[2]),
    LBFGS(manifold = Optim.Sphere()),
    autodiff = :forward,
    Optim.Options(show_trace = true, extended_trace = true),
)

Iter     Function value   Gradient norm 
     0     6.796663e+00     2.920057e-01
 * Current step size: 1.0
 * time: 2.318207025527954
 * g(x): [-0.07783809191044475, -0.06191537066790139, -0.07316193316528184, -0.18250992506222413, 0.2920057342568874, 0.1034195865489645]
 * x: [0.4082482904638631, 0.4082482904638631, 0.4082482904638631, 0.4082482904638631, 0.4082482904638631, 0.4082482904638631]
     1     6.583098e+00     4.679164e-01
 * Current step size: 2.748015933651294
 * time: 10.122090101242065
 * g(x): [-0.005175209615991256, 0.02459293552403628, 0.005883047367364072, -0.22622338544305437, -0.4679164229048152, 0.05464890882418531]
 * x: [0.43021137297871875, 0.3999544825573392, 0.4213255788616091, 0.6291123106787653, -0.27257830416953904, 0.08577943627390971]
     2     6.501047e+00     9.689169e-02
 * Current step size: 0.9808484526340042
 * time: 13.5592520236969
 * g(x): [0.04978385680471064, 0.06652586517869213, 0.05721128603454044, -0.09689168963738311, 0.053152741564227

 * Status: success

 * Candidate solution
    Final objective value:     6.468716e+00

 * Found with
    Algorithm:     L-BFGS

 * Convergence measures
    |x - x'|               = 8.43e-08 ≰ 0.0e+00
    |x - x'|/|x'|          = 8.71e-08 ≰ 0.0e+00
    |f(x) - f(x')|         = 8.88e-16 ≰ 0.0e+00
    |f(x) - f(x')|/|f(x')| = 1.37e-16 ≰ 0.0e+00
    |g(x)|                 = 2.67e-10 ≤ 1.0e-08

 * Work counters
    Seconds run:   87  (vs limit Inf)
    Iterations:    16
    f(x) calls:    49
    ∇f(x) calls:   50


In [12]:
res

 * Status: success

 * Candidate solution
    Final objective value:     6.468716e+00

 * Found with
    Algorithm:     L-BFGS

 * Convergence measures
    |x - x'|               = 8.43e-08 ≰ 0.0e+00
    |x - x'|/|x'|          = 8.71e-08 ≰ 0.0e+00
    |f(x) - f(x')|         = 8.88e-16 ≰ 0.0e+00
    |f(x) - f(x')|/|f(x')| = 1.37e-16 ≰ 0.0e+00
    |g(x)|                 = 2.67e-10 ≤ 1.0e-08

 * Work counters
    Seconds run:   87  (vs limit Inf)
    Iterations:    16
    f(x) calls:    49
    ∇f(x) calls:   50


# Evaluate on the test set

In [13]:
β = Optim.minimizer(res) .^ 2
β = β / sum(β)

6-element Vector{Float64}:
 0.053780123794900936
 1.4149359354649425e-19
 0.01021680419467455
 0.9359956412357462
 7.430774678355392e-6
 4.60363406089922e-20

In [14]:
X_test = get_indep("test", alphas);

[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:01[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:02 ( 0.30 μs/it)[39m


In [15]:
sparse_crossentropy(X_test * β)

6.4687146803286275

In [16]:
# SOTA: 6.4687146803286275

In [17]:
write_params(
    Dict(
        "β" => β,
        "alphas" => alphas,
        "priors" => [get_base_uniform_prior(), get_base_popularity_prior()],
    ),
);

[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.42 μs/it)[39m
