# User Item Biases With Regularization
* Prediction for user $i$ and item $j$ is $\tilde r_{ij} = u_i + a_j$
* Loss function is $L = \sum_{\Omega}w_{ij}(r_{ij} - u_i - a_j)^2 + \lambda_u \sum_i (u_i - \bar u) ^2 + \lambda_a \sum_j (a_j - \bar a)^2 $
* $\bar u$ is the mean of $u_i$ and $\bar a$ is the mean of $a_j$ 
* $\Omega$ is the set of oberved pairs $(i, j)$
* $r_{ij}$ is the rating for user $i$ and item $j$
* $w_{ij}$ is the weight for the prediction $r_{ij}$

In [1]:
const name = "UserItemBiases"
const residual_alphas = []
const validation_weight_scheme = "inverse"

"inverse"

In [2]:
using NBInclude
@nbinclude("Alpha.ipynb");

In [3]:
const training = get_residuals("training", residual_alphas)
const validation = get_residuals("validation", residual_alphas);

[32mProgress: 100%|███████████████████████████| Time: 0:00:00 ( 1.43 μs/it)[39m


## Alternating Least Squares Algorithm
* $u_i = \dfrac{\sum_{j \in \Omega_i}(r_{ij} - a_j) w_{ij} + \bar u \lambda_u}{ \sum_{j \in \Omega_i} w_{ij} + \lambda_u}$
* $\Omega$ is the set of (user, item) pairs that we have ratings for
* $\Omega_i$ is subset of $\Omega$ for which the user is the $i$-th user

In [4]:
function get_residuals!(users, items, ratings, weights, a, ρ, Ω)
    for row = 1:length(users)
        i = users[row]
        j = items[row]
        r = ratings[row]
        w = weights[row]
        ρ[i] += (r - a[j]) * w
        Ω[i] += w
    end
    ρ, Ω
end

function thread_range(n)
    tid = Threads.threadid()
    nt = Threads.nthreads()
    d, r = divrem(n, nt)
    from = (tid - 1) * d + min(r, tid - 1) + 1
    to = from + d - 1 + (tid ≤ r ? 1 : 0)
    from:to
end

function update_users!(users, items, ratings, weights, u, a, λ_u, ρ, Ω)
    Threads.@threads for t = 1:Threads.nthreads()
        range = thread_range(length(ratings))
        ρ[:, Threads.threadid()] .= 0
        Ω[:, Threads.threadid()] .= 0
        @views get_residuals!(
            users[range],
            items[range],
            ratings[range],
            weights[range],
            a,
            ρ[:, Threads.threadid()],
            Ω[:, Threads.threadid()],
        )
    end

    ρ = sum(ρ, dims = 2)
    Ω = sum(Ω, dims = 2)

    μ = mean(u)
    Threads.@threads for i = 1:length(u)
        u[i] = (ρ[i] + μ * λ_u) / (Ω[i] + λ_u)
    end
end;

In [5]:
function train_model(training, λ_u, λ_a, λ_w, stop_criteria)
    @info "training model with parameters [$λ_u, $λ_a, $λ_w]"
    users, items, ratings = training.user, training.item, training.rating
    weights = get_weights("training", log(λ_w))
    u = zeros(eltype(λ_u), maximum(users))
    a = zeros(eltype(λ_a), maximum(items))

    ρ_u = zeros(eltype(u), length(u), Threads.nthreads())
    Ω_u = zeros(eltype(u), length(u), Threads.nthreads())
    ρ_a = zeros(eltype(a), length(a), Threads.nthreads())
    Ω_a = zeros(eltype(a), length(a), Threads.nthreads())

    while !stop!(stop_criteria, [u, a])
        update_users!(users, items, ratings, weights, u, a, λ_u, ρ_u, Ω_u)
        update_users!(items, users, ratings, weights, a, u, λ_a, ρ_a, Ω_a)
    end
    u, a
end;

In [6]:
function make_prediction(users, items, u, a)
    r = zeros(eltype(u), length(users))
    u_mean = mean(u)
    a_mean = mean(a)
    for i = 1:length(r)
        if users[i] > length(u)
            r[i] += mean(u)
        else
            r[i] += u[users[i]]
        end
        if items[i] > length(a)
            r[i] += mean(a)
        else
            r[i] += a[items[i]]
        end
    end
    r
end;

## Training

In [7]:
function validation_mse(λ)
    λ = exp.(λ) # ensure λ is nonnegative
    stop_criteria = convergence_stopper(1e-6, max_iters = 16)
    u, a = train_model(training, λ..., stop_criteria)
    pred_score = make_prediction(validation.user, validation.item, u, a)
    weights = get_weights("validation", validation_weight_scheme)
    mse(validation.rating, pred_score, weights)
end;

In [8]:
# Find the best regularization hyperparameters
res = optimize(
    validation_mse,
    fill(0.0f0, 3),
    LBFGS(),
    autodiff = :forward,
    Optim.Options(show_trace = true, extended_trace = true),
);
λ = exp.(Optim.minimizer(res));

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:06:55 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0,1.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0,0.0,1.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0,0.0,0.0,1.0)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.47 μs/it)[39m


Iter     Function value   Gradient norm 
     0     1.820281e+00     1.945127e-02
 * Current step size: 1.0
 * time: 0.021981000900268555
 * g(x): Float32[-0.0050086486, -1.0955863f-6, 0.019451266]
 * x: Float32[0.0, 0.0, 0.0]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:07:22 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0050212,1.0050212,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0000011,0.0,1.0000011,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9807367,0.0,0.0,0.9807367)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.44 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:07:43 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0253595,1.0253595,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0000055,0.0,1.0000055,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9073234,0.0,0.0,0.9073234)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.44 μs/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.44 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[

     2     1.814787e+00     5.672059e-03
 * Current step size: 0.15930836
 * time: 172.1668939590454
 * g(x): Float32[-0.00019501134, 1.0135125f-5, 0.005672059]
 * x: Float32[0.06837795, -4.0081133f-5, -0.33532402]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:10:12 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.057375,1.057375,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.99974656,0.0,0.99974656,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.66611916,0.0,0.0,0.66611916)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.44 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:10:34 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0630815,1.0630815,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.99983776,0.0,0.99983776,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6866397,0.0,0.0,0.6866397)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.45 μs/it)[39m


     3     1.814655e+00     1.279483e-03
 * Current step size: 0.57243454
 * time: 215.09497499465942
 * g(x): Float32[0.0012794825, 1.685819f-5, 0.0003836784]
 * x: Float32[0.061171822, -0.0001622274, -0.37594554]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:10:55 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.050458,1.050458,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9996906,0.0,0.9996906,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.68115366,0.0,0.0,0.68115366)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.44 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:11:17 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0014448,1.0014448,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.99910194,0.0,0.99910194,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6596442,0.0,0.0,0.6596442)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.44 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:11:38 training model with parameters [Dual{Forward

     4     1.814618e+00     2.670913e-03
 * Current step size: 3.8349497
 * time: 279.3286979198456
 * g(x): Float32[0.0018198843, 2.3722505f-5, -0.0026709128]
 * x: Float32[0.015360989, -0.00072693464, -0.40670887]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:12:00 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9732835,0.9732835,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9987188,0.0,0.9987188,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.66059315,0.0,0.0,0.66059315)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.44 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:12:21 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.8213195,0.8213195,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.99650365,0.0,0.99650365,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6400235,0.0,0.0,0.6400235)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.44 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:12:43 training model with parameters [Dual{Forwa

     5     1.814508e+00     1.583220e-04
 * Current step size: 4.0365014
 * time: 343.7667679786682
 * g(x): Float32[1.4126287f-5, 3.2654247f-5, -0.00015832199]
 * x: Float32[-0.15595144, -0.0029676505, -0.43863067]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:13:04 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.85938036,0.85938036,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9963259,0.0,0.9963259,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.64630777,0.0,0.0,0.64630777)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.44 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:13:26 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.87466663,0.87466663,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.99348754,0.0,0.99348754,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6518931,0.0,0.0,0.6518931)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.44 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:13:47 training model with parameters [Dual{F

     6     1.814508e+00     3.189936e-05
 * Current step size: 1.0509797
 * time: 408.28110790252686
 * g(x): Float32[-3.0036474f-6, 3.189936f-5, 1.5668218f-5]
 * x: Float32[-0.15131892, -0.0037172306, -0.4363698]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:14:09 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.8596825,0.8596825,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9958897,0.0,0.9958897,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6463945,0.0,0.0,0.6463945)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.44 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:14:30 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.8601186,0.8601186,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9942915,0.0,0.9942915,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.64645797,0.0,0.0,0.64645797)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.44 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:14:52 training model with parameters [Dual{Forward

     7     1.814507e+00     1.247400e-04
 * Current step size: 99.43661
 * time: 515.7348818778992
 * g(x): Float32[0.00011731776, 2.9382707f-5, -0.00012473996]
 * x: Float32[-0.13871118, -0.043644197, -0.433929]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:15:56 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.8693689,0.8693689,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9230548,0.0,0.9230548,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6476793,0.0,0.0,0.6476793)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.44 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:16:17 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.864941,0.864941,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.7979124,0.0,0.7979124,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.64656484,0.0,0.0,0.64656484)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.44 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:16:39 training model with parameters [Dual{ForwardDi

     8     1.814493e+00     4.815689e-04
 * Current step size: 34.729866
 * time: 623.0582418441772
 * g(x): Float32[-1.49605485f-5, 3.4864681f-6, -0.00048156892]
 * x: Float32[-0.18304613, -1.3085908, -0.4488824]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:17:43 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.8485954,0.8485954,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.2528292,0.0,0.2528292,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.64315075,0.0,0.0,0.64315075)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.44 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:18:05 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.915139,0.915139,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.19381663,0.0,0.19381663,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6627543,0.0,0.0,0.6627543)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.45 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:18:26 training model with parameters [Dual{Forward

     9     1.814490e+00     5.291142e-05
 * Current step size: 1.2514611
 * time: 687.5168528556824
 * g(x): Float32[-1.0872686f-5, 2.5033128f-6, 5.291142f-5]
 * x: Float32[-0.15942684, -1.3917508, -0.4394886]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:18:48 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.8525233,0.8525233,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.22073098,0.0,0.22073098,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.64418674,0.0,0.0,0.64418674)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.44 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:19:09 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.8520873,0.8520873,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.13709927,0.0,0.13709927,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6434708,0.0,0.0,0.6434708)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.45 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:19:31 training model with parameters [Dual{For

    10     1.814489e+00     1.893130e-04
 * Current step size: 6.0479097
 * time: 773.3292078971863
 * g(x): Float32[5.1028757f-5, 2.466934f-7, -0.00018931298]
 * x: Float32[-0.16020031, -2.111814, -0.44116998]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:20:14 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.8518708,0.8518708,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.11420415,0.0,0.11420415,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.64387995,0.0,0.0,0.64387995)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.44 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:20:35 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.8514616,0.8514616,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.09057453,0.0,0.09057453,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.64627194,0.0,0.0,0.64627194)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.45 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:20:57 training model with parameters [Dual{F

    11     1.814489e+00     1.294824e-05
 * Current step size: 1.2454377
 * time: 837.9163730144501
 * g(x): Float32[-3.9645433f-6, 1.9660862f-7, 1.2948236f-5]
 * x: Float32[-0.16034992, -2.1839917, -0.44001544]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:21:18 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.85188377,0.85188377,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.10547293,0.0,0.10547293,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6440196,0.0,0.0,0.6440196)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.45 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:21:40 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.8520362,0.8520362,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.08122456,0.0,0.08122456,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.64399207,0.0,0.0,0.64399207)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.44 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:22:01 training model with parameters [Dual{F

    12     1.814489e+00     3.270234e-04
 * Current step size: 86.12421
 * time: 945.1410758495331
 * g(x): Float32[0.00010829754, 1.4069728f-9, -0.00032702336]
 * x: Float32[-0.1564983, -7.808695, -0.4409366]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:23:05 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.8528792,0.8528792,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.0008979847,0.0,0.0008979847,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6439245,0.0,0.0,0.6439245)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.44 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:23:27 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.8439234,0.8439234,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.021450493,0.0,0.021450493,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.64589214,0.0,0.0,0.64589214)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.45 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:23:48 training model with parameters [Du

    13     1.814489e+00     1.126408e-05
 * Current step size: 1.2631932
 * time: 1009.5826728343964
 * g(x): Float32[3.6128788f-6, 7.663208f-9, -1.1264083f-5]
 * x: Float32[-0.15983194, -6.806556, -0.4399731]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:24:10 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.8522434,0.8522434,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.00094663055,0.0,0.00094663055,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6440759,0.0,0.0,0.6440759)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.45 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:24:31 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.85206914,0.85206914,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.00050710933,0.0,0.00050710933,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6441643,0.0,0.0,0.6441643)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.44 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:24:53 training model with paramete

    14     1.814489e+00     1.170827e-05
 * Current step size: 2.971413
 * time: 1073.9676308631897
 * g(x): Float32[-3.7363232f-6, 3.4650365f-9, 1.17082745f-5]
 * x: Float32[-0.15998389, -7.270232, -0.43987107]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:25:14 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.8522275,0.8522275,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.000560882,0.0,0.000560882,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6441066,0.0,0.0,0.6441066)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.44 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:25:36 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.8525076,0.8525076,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.00023661634,0.0,0.00023661634,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6440553,0.0,0.0,0.6440553)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.45 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:25:57 training model with parameters [Du

    15     1.814489e+00     6.699579e-06
 * Current step size: 2.6536384
 * time: 1138.324518918991
 * g(x): Float32[2.6128098f-6, 1.3550716f-9, -6.6995794f-6]
 * x: Float32[-0.15976588, -7.8428016, -0.43992394]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:26:19 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.8522702,0.8522702,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.0003174885,0.0,0.0003174885,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6440872,0.0,0.0,0.6440872)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.45 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:26:40 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.85197777,0.85197777,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.0001358259,0.0,0.0001358259,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.64409435,0.0,0.0,0.64409435)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.44 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:27:01 training model with parameters

    16     1.814489e+00     3.675404e-06
 * Current step size: 2.5021572
 * time: 1202.412027835846
 * g(x): Float32[-1.6368393f-6, 5.797353f-10, 3.6754036f-6]
 * x: Float32[-0.15998057, -8.373927, -0.439917]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:27:23 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.85222363,0.85222363,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.0001858231,0.0,0.0001858231,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.64409477,0.0,0.0,0.64409477)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.45 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:27:44 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.8524767,0.8524767,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(7.807297e-5,0.0,7.807297e-5,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.64411426,0.0,0.0,0.64411426)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.44 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:28:05 training model with parameters

    17     1.814489e+00     1.754345e-06
 * Current step size: 2.6644194
 * time: 1266.6238560676575
 * g(x): Float32[1.0560404f-6, 2.5550173f-10, -1.7543447f-6]
 * x: Float32[-0.15978275, -8.951541, -0.43989685]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:28:27 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.8522722,0.8522722,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.0001016012,0.0,0.0001016012,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.64409405,0.0,0.0,0.64409405)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.45 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:28:48 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.8520454,0.8520454,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(3.8451497e-5,0.0,3.8451497e-5,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6440587,0.0,0.0,0.6440587)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.44 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:29:10 training model with parameters [

    18     1.814489e+00     1.530033e-06
 * Current step size: 2.8103993
 * time: 1331.0338668823242
 * g(x): Float32[-5.980161f-7, 1.0321363f-10, 1.5300328f-6]
 * x: Float32[-0.15996976, -9.634228, -0.4399354]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:29:31 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.8521658,0.8521658,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(4.8567304e-5,0.0,4.8567304e-5,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.64407116,0.0,0.0,0.64407116)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.44 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:29:53 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.85215104,0.85215104,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.4726133e-5,0.0,1.4726133e-5,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6440437,0.0,0.0,0.6440437)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.45 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:30:14 training model with parameters

    19     1.814489e+00     2.001422e-06
 * Current step size: 1.9382879
 * time: 1395.4840078353882
 * g(x): Float32[1.7457256f-7, 4.9074248f-11, -2.001422f-6]
 * x: Float32[-0.1599782, -10.212481, -0.43995607]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:30:36 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.8521888,0.8521888,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(3.4956905e-5,0.0,3.4956905e-5,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6440717,0.0,0.0,0.6440717)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.44 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:30:57 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.85229456,0.85229456,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(2.8744842e-5,0.0,2.8744842e-5,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6440995,0.0,0.0,0.6440995)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.44 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:31:19 training model with parameters [

    20     1.814489e+00     1.364076e-06
 * Current step size: 2.6075046
 * time: 1459.9649980068207
 * g(x): Float32[3.9563918f-7, 4.4214424f-11, -1.3640758f-6]
 * x: Float32[-0.15989731, -10.340025, -0.4399279]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:31:40 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.8522434,0.8522434,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(2.437815e-5,0.0,2.437815e-5,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6440905,0.0,0.0,0.6440905)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.44 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:32:02 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.8522918,0.8522918,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(7.897093e-6,0.0,7.897093e-6,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.64412093,0.0,0.0,0.64412093)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.44 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:32:23 training model with parameters [Dual

    21     1.814489e+00     7.983481e-07
 * Current step size: 2.07719
 * time: 1524.253702878952
 * g(x): Float32[-1.5771829f-7, 2.2963187f-11, 7.983481f-7]
 * x: Float32[-0.15986782, -10.925373, -0.43990332]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:32:45 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.8522527,0.8522527,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.4079447e-5,0.0,1.4079447e-5,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.64409596,0.0,0.0,0.64409596)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.45 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:33:06 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.85223794,0.85223794,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(5.275278e-6,0.0,5.275278e-6,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.64408505,0.0,0.0,0.64408505)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.44 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:33:27 training model with parameters

    22     1.814489e+00     8.385715e-07
 * Current step size: 4.291169
 * time: 1588.694303035736
 * g(x): Float32[2.8909088f-7, 7.647784f-12, -8.385715f-7]
 * x: Float32[-0.15988646, -11.978518, -0.43992147]


In [9]:
@info "The optimal [λ_u, λ_a, λ_w] is $λ, found in " *
      repr(Optim.f_calls(res)) *
      " function calls"

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:33:49 The optimal [λ_u, λ_a, λ_w] is Float32[0.85224056, 6.277633f-6, 0.644087], found in 75 function calls


In [10]:
empty!(memoize_cache(get_weights))
stop_criteria = convergence_stopper(1e-6, max_iters = 16)
u, a = train_model(training, λ..., stop_criteria);

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:33:49 training model with parameters [0.85224056, 6.277633e-6, 0.644087]
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.46 μs/it)[39m


## Inference

In [11]:
model(users, items) = make_prediction(users, items, u, a);

In [12]:
write_predictions(model; residual_alphas = residual_alphas);

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:34:14 training set: RMSE 1.2844305 MAE 0.96078026 R2 0.46220618
[32mProgress: 100%|███████████████████████████| Time: 0:00:04 ( 0.45 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:34:27 training set weighted-loss: RMSE 1.2921827 MAE 0.96010107 R2 0.48657387
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:34:28 validation set: RMSE 1.3195703 MAE 0.9857642 R2 0.40952003
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220508 17:34:29 validation set weighted-loss: RMSE 1.3469635 MAE 1.0024703 R2 0.39467692


In [13]:
write_params(Dict("u" => u, "a" => a, "λ" => λ));