In [1]:
name = "CombineImplicitSignals"
residual_alphas = [];

In [2]:
using NBInclude
@nbinclude("Alpha.ipynb");

## Compute priors

In [3]:
training = get_split("training");

In [4]:
function get_base_uniform_prior()
    return fill(1 / num_items(), num_items())
end

function get_uniform_prior(split)
    fill(1.0f0 / num_items(), length(split.item))
end;

In [5]:
function get_base_popularity_prior()
    popularity_prior = zeros(Float32, maximum(training.item), Threads.nthreads())
    @tprogress Threads.@threads for i in training.item
        popularity_prior[i, Threads.threadid()] += 1
    end
    popularity_prior = sum(popularity_prior, dims = 2)
    popularity_prior = popularity_prior ./ sum(popularity_prior)
    popularity_prior
end

function get_popularity_prior(split)
    popularity_prior = get_base_popularity_prior()

    split_prior = zeros(Float32, length(split.rating))
    @tprogress Threads.@threads for j = 1:length(split_prior)
        split_prior[j] = popularity_prior[split.item[j]]
    end
    split_prior
end;

## Load alphas

In [6]:
alphas = [["GNN.$K.Implicit" for K = 1:1]
#   ["GNN.Resid.$K.Implicit" for K = 1:1]
# ["Implicit"]
]

1-element Vector{String}:
 "GNN.1.Implicit"

In [7]:
function get_indep(split, alphas)
    users = get_split(split).user
    X = zeros(Float32, length(users), length(alphas))
    @showprogress for j = 1:length(alphas)
        X[:, j] = get_alpha(alphas[j], split).rating
    end
    split_df = get_split(split)
    X = hcat(X, get_uniform_prior(split_df), get_popularity_prior(split_df))
    X
end;

In [8]:
X = get_indep("validation", alphas);

[32mProgress: 100%|███████████████████████████| Time: 0:00:03 ( 0.38 μs/it)[39m/it)[39m


In [9]:
for j = 1:size(X)[2]
    @debug sparse_crossentropy(X[:, j])
end

[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220319 23:47:08 6.6619294933991435
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220319 23:47:08 9.849664724534657
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220319 23:47:09 7.532273907683687


## Train a linear model

In [10]:
function loss(coefs)
    coefs = coefs .^ 2
    if sum(coefs) == 0
        coefs .+= 1 # prevent division by zero
    end
    coefs = coefs ./ sum(coefs)

    y = sum(X .* coefs', dims = 2)
    sparse_crossentropy(y)
end;

In [11]:
res = optimize(
    loss,
    fill(1.0, size(X)[2]),
    LBFGS(manifold = Optim.Sphere()),
    autodiff = :forward,
    Optim.Options(show_trace = true, extended_trace = true),
)

Iter     Function value   Gradient norm 
     0     7.238512e+00     6.178381e-01
 * Current step size: 1.0
 * time: 1.3144700527191162
 * g(x): [-0.617838087568771, 0.5630877287636165, 0.05475035880515442]
 * x: [0.5773502691896258, 0.5773502691896258, 0.5773502691896258]
     1     6.782562e+00     6.019688e-01
 * Current step size: 2.5036021214034596
 * time: 5.301657199859619
 * g(x): [-0.2537442105176637, -0.6019687626751249, 0.08612503810816974]
 * x: [0.9141965501241675, -0.35824554235961, 0.18948561718647491]
     2     6.665540e+00     9.775219e-02
 * Current step size: 0.70899524006742
 * time: 6.673318147659302
 * g(x): [-0.008524687031078825, 0.09775219155606696, 0.0229239717756108]
 * x: [0.9952775625252133, 0.07136434707260042, 0.06580048252688774]
     3     6.662084e+00     1.035943e-02
 * Current step size: 0.9080220763330605
 * time: 8.045809030532837
 * g(x): [-0.0004995483394382731, -0.004536463538997861, 0.010359430681352803]
 * x: [0.9989637764211222, -0.007489693

 * Status: success

 * Candidate solution
    Final objective value:     6.661917e+00

 * Found with
    Algorithm:     L-BFGS

 * Convergence measures
    |x - x'|               = 3.23e-06 ≰ 0.0e+00
    |x - x'|/|x'|          = 3.23e-06 ≰ 0.0e+00
    |f(x) - f(x')|         = 1.33e-12 ≰ 0.0e+00
    |f(x) - f(x')|/|f(x')| = 1.99e-13 ≰ 0.0e+00
    |g(x)|                 = 1.82e-09 ≤ 1.0e-08

 * Work counters
    Seconds run:   20  (vs limit Inf)
    Iterations:    9
    f(x) calls:    25
    ∇f(x) calls:   26


In [12]:
res

 * Status: success

 * Candidate solution
    Final objective value:     6.661917e+00

 * Found with
    Algorithm:     L-BFGS

 * Convergence measures
    |x - x'|               = 3.23e-06 ≰ 0.0e+00
    |x - x'|/|x'|          = 3.23e-06 ≰ 0.0e+00
    |f(x) - f(x')|         = 1.33e-12 ≰ 0.0e+00
    |f(x) - f(x')|/|f(x')| = 1.99e-13 ≰ 0.0e+00
    |g(x)|                 = 1.82e-09 ≤ 1.0e-08

 * Work counters
    Seconds run:   20  (vs limit Inf)
    Iterations:    9
    f(x) calls:    25
    ∇f(x) calls:   26


# Evaluate on the test set

In [13]:
β = Optim.minimizer(res) .^ 2
β = β / sum(β)

3-element Vector{Float64}:
 0.9999023884411934
 2.195953015315322e-5
 7.565202865347872e-5

In [14]:
X_test = get_indep("test", alphas);

[32mProgress: 100%|███████████████████████████| Time: 0:00:02 ( 0.22 μs/it)[39m


In [15]:
sparse_crossentropy(X_test * β)

6.661984783967663

In [16]:
# SOTA: 6.6619409316212055

In [17]:
write_params(
    Dict(
        "β" => β,
        "alphas" => alphas,
        "priors" => [get_base_uniform_prior(), get_base_popularity_prior()],
    ),
);

[32mProgress: 100%|███████████████████████████| Time: 0:00:01 ( 0.19 μs/it)[39m
