# User Item Biases With Regularization
* Prediction for user $i$ and item $j$ is $\tilde r_{ij} = u_i + a_j$
* Loss function is $L = \sum_{\Omega}(r_{ij} - u_i - a_j)^2 + \lambda_u \sum_i (u_i - \bar u) ^2 + \lambda_a \sum_j (a_j - \bar a)^2 $
* $\bar u$ is the mean of $u_i$ and $\bar a$ is the mean of $a_j$ 
* $\Omega$ is the set of oberved pairs $(i, j)$
* $r_{ij}$ is the rating for user $i$ and item $j$

In [1]:
name = "UserItemBiases";
residual_alphas = [];

In [2]:
using NBInclude
@nbinclude("Alpha.ipynb");

In [None]:
const training = get_residuals("training", residual_alphas)
const validation = get_residuals("validation", residual_alphas);

## Alternating Least Squares Algorithm
* $u_i = \dfrac{\sum_{j \in \Omega_i}(r_{ij} - a_j) + \bar u \lambda_u}{|\Omega_i| + \lambda_u} = \dfrac{\rho_i + \bar u \lambda_u}{|\Omega_i| + \lambda_u}$
* $\Omega$ is the set of (user, item) pairs that we have ratings for
* $\Omega_i$ is subset of $\Omega$ for which the user is the $i$-th user

In [3]:
function get_residuals!(users, items, ratings, u, a, ρ, Ω)
    for row = 1:length(users)
        i = users[row]
        j = items[row]
        r = ratings[row]
        ρ[i] += r - a[j]
        Ω[i] += 1
    end
    ρ, Ω
end

# todo move to utils
function thread_range(n)
    tid = Threads.threadid()
    nt = Threads.nthreads()
    d, r = divrem(n, nt)
    from = (tid - 1) * d + min(r, tid - 1) + 1
    to = from + d - 1 + (tid ≤ r ? 1 : 0)
    from:to
end

function update_users!(users, items, ratings, u, a, λ_u, ρ, Ω)
    Threads.@threads for t = 1:Threads.nthreads()
        range = thread_range(length(ratings))
        ρ[:, Threads.threadid()] .= 0
        Ω[:, Threads.threadid()] .= 0
        @views get_residuals!(
            users[range],
            items[range],
            ratings[range],
            u,
            a,
            ρ[:, Threads.threadid()],
            Ω[:, Threads.threadid()],
        )
    end
    ρ = sum(ρ, dims = 2)
    Ω = sum(Ω, dims = 2)

    μ = mean(u)
    Threads.@threads for i = 1:length(u)
        u[i] = (ρ[i] + μ * λ_u) / (Ω[i] + λ_u)
    end
end;

In [4]:
function train_model(training, λ_u, λ_a, stop_criteria)
    @debug "training model with parameters [$λ_u, $λ_a]"
    users, items, ratings = training.user, training.item, training.rating
    u = zeros(eltype(λ_u), maximum(users))
    a = zeros(eltype(λ_a), maximum(items))

    ρ_u = zeros(eltype(u), length(u), Threads.nthreads())
    Ω_u = zeros(eltype(u), length(u), Threads.nthreads())
    ρ_a = zeros(eltype(a), length(a), Threads.nthreads())
    Ω_a = zeros(eltype(a), length(a), Threads.nthreads())

    while !stop!(stop_criteria, [u, a])
        update_users!(users, items, ratings, u, a, λ_u, ρ_u, Ω_u)
        update_users!(items, users, ratings, a, u, λ_a, ρ_a, Ω_a)
    end
    u, a
end;

In [5]:
function make_prediction(users, items, u, a)
    r = zeros(eltype(u), length(users))
    u_mean = mean(u)
    a_mean = mean(a)
    for i = 1:length(r)
        if users[i] > length(u)
            r[i] += mean(u)
        else
            r[i] += u[users[i]]
        end
        if items[i] > length(a)
            r[i] += mean(a)
        else
            r[i] += a[items[i]]
        end
    end
    r
end;

## Training

In [6]:
function validation_mse(λ)
    λ = exp.(λ) # ensure λ is nonnegative
    stop_criteria = convergence_stopper(1e-9)
    u, a = train_model(training, λ[1], λ[2], stop_criteria)
    pred_score = make_prediction(validation.user, validation.item, u, a)
    mse(validation.rating, pred_score)
end;

In [7]:
# Find the best regularization hyperparameters
res = optimize(
    validation_mse,
    fill(0.0, 2), # intial guess
    LBFGS(),
    autodiff = :forward,
    Optim.Options(show_trace = true, extended_trace = true),
);
λ = exp.(Optim.minimizer(res));

[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211210 14:18:52 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(1.0,1.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(1.0,0.0,1.0)]


Iter     Function value   Gradient norm 
     0     1.698360e+00     3.375168e-04
 * Current step size: 1.0
 * time: 0.026012897491455078
 * g(x): [-0.0003375167956379876, -1.602234195232483e-5]
 * x: [0.0, 0.0]


[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211210 14:19:01 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(1.0003375737608404,1.0003375737608404,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(1.0000160224703107,0.0,1.0000160224703107)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211210 14:19:05 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(1.0016890087493926,1.0016890087493926,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(1.0000801149187903,0.0,1.0000801149187903)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211210 14:19:08 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(1.0084736194764294,1.0084736194764294,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(1.0004006387830962,0.0,1.0004006387830962)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211210 14:19:12 training model wit

     1     1.698217e+00     1.233253e-04
 * Current step size: 1671.2383995296937
 * time: 28.419790029525757
 * g(x): [-0.0001233252628261006, -1.6127390726157897e-5]
 * x: [0.5640710293564212, 0.026777153121120816]


[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211210 14:19:27 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.437301470965261,2.437301470965261,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(1.0887370702970596,0.0,1.0887370702970596)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211210 14:19:31 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.018596420380201,2.018596420380201,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(1.0527739919872565,0.0,1.0527739919872565)]


     2     1.698206e+00     1.844736e-05
 * Current step size: 0.42326421720872914
 * time: 35.94985389709473
 * g(x): [-1.8447363460333914e-5, -1.6194140898360793e-5]
 * x: [0.7024024285030079, 0.051428577624254505]


[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211210 14:19:35 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.0762383206309023,2.0762383206309023,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(1.0817673301936994,0.0,1.0817673301936994)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211210 14:19:38 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.323742586012064,2.323742586012064,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(1.205948376771055,0.0,1.205948376771055)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211210 14:19:42 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.0954450956526296,2.0954450956526296,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(1.091421848688169,0.0,1.091421848688169)]


     3     1.698205e+00     1.626064e-05
 * Current step size: 1.3270510716141308
 * time: 47.24509906768799
 * g(x): [1.4846781556103093e-5, -1.62606410529331e-5]
 * x: [0.7397659869672605, 0.08748129452524286]


[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211210 14:19:46 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.097568473444909,2.097568473444909,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(1.128288638170845,0.0,1.128288638170845)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211210 14:19:50 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.106083523257102,2.106083523257102,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(1.2886367132618652,0.0,1.2886367132618652)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211210 14:19:53 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.14918008821939,2.14918008821939,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.5042744767541434,0.0,2.5042744767541434)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211210 14:19:57 training model with paramete

     4     1.698194e+00     3.900000e-05
 * Current step size: 25.43718362498589
 * time: 66.9376790523529
 * g(x): [3.900000136452024e-5, -8.72595566463623e-6]
 * x: [0.7655292021510566, 0.9325226132852517]


[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211210 14:20:06 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.074257174686084,2.074257174686084,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(6.639136529328637,0.0,6.639136529328637)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211210 14:20:09 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.139637121123348,2.139637121123348,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.896005621034208,0.0,2.896005621034208)]


     5     1.698193e+00     3.425799e-05
 * Current step size: 0.1361950496676593
 * time: 74.64708209037781
 * g(x): [3.425798632893179e-5, -4.74307153868007e-6]
 * x: [0.7606362450866863, 1.0633324154864372]


[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211210 14:20:13 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.060894960500445,2.060894960500445,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(3.375529743158777,0.0,3.375529743158777)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211210 14:20:17 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.069943093928012,2.069943093928012,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(3.315641273921235,0.0,3.315641273921235)]


     6     1.698192e+00     3.448171e-06
 * Current step size: 0.8831664369041667
 * time: 82.39909505844116
 * g(x): [3.4481707265952683e-6, 7.664867759345259e-7]
 * x: [0.7275211160433642, 1.1986510510596218]


[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211210 14:20:21 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.0606705959009566,2.0606705959009566,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(3.2395820899609213,0.0,3.2395820899609213)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211210 14:20:25 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.0626862007407953,2.0626862007407953,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(3.2559944884155483,0.0,3.2559944884155483)]


     7     1.698192e+00     2.983219e-07
 * Current step size: 0.7822432571416449
 * time: 90.19951891899109
 * g(x): [2.9832185660829067e-7, -6.24512528299402e-8]
 * x: [0.7240091142988343, 1.1804977553917937]


[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211210 14:20:29 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.0619781614547055,2.0619781614547055,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(3.260583139573367,0.0,3.260583139573367)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211210 14:20:33 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.061994927499684,2.061994927499684,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(3.2604743894816983,0.0,3.2604743894816983)]


     8     1.698192e+00     2.206522e-09
 * Current step size: 0.9763164904547647
 * time: 98.07614994049072
 * g(x): [-2.206521817579928e-9, -5.394583902064996e-10]
 * x: [0.7236739256012884, 1.1818727030377831]


In [8]:
@info "The optimal [λ_u, λ_a] is $λ, found in " *
      repr(Optim.f_calls(res)) *
      " function calls"

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20211210 14:20:37 The optimal [λ_u, λ_a] is [2.061994927499684, 3.2604743894816983], found in 26 function calls


In [9]:
stop_criteria = convergence_stopper(1e-9)
u, a = train_model(training, λ..., stop_criteria);

[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211210 14:20:37 training model with parameters [2.061994927499684, 3.2604743894816983]


## Inference

In [10]:
model(users, items) = make_prediction(users, items, u, a);

In [11]:
write_predictions(model, save_training = true);

In [12]:
write_params(Dict("u" => u, "a" => a, "λ" => λ));