# User Item Biases With Regularization
* Prediction for user $i$ and item $j$ is $\tilde r_{ij} = u_i + a_j$
* Loss function is $L = \sum_{\Omega}w_{ij}\text{loss}(r_{ij}, \tilde r_{ij}) + \lambda_u \sum_i (u_i - \bar u) ^2 + \lambda_a \sum_j (a_j - \bar a)^2 $
* $\bar u$ is the mean of $u_i$ and $\bar a$ is the mean of $a_j$ 
* $\Omega$ is the set of oberved pairs $(i, j)$
* $r_{ij}$ is the rating for user $i$ and item $j$
* $w_{ij}$ is the weight for the prediction $r_{ij}$ and is modeled as a power-law in the number of items seen by $i$ and users than have seen $j$: $w_{ij} = |j' : (i, j') \in \Omega| ^ {\lambda_{wu}} |i' : (i', j) \in \Omega| ^ {\lambda_{wa}}$
* $\text{loss}$ is mean squared error

In [1]:
const name = "ExplicitUserItemBiases"
const implicit = false;

In [2]:
import NBInclude: @nbinclude
import SparseArrays: sparse
import Statistics: mean
@nbinclude("Alpha.ipynb");

In [3]:
const training = get_split("training", implicit)
const validation = get_split("validation", implicit);

## Alternating Least Squares
* Given some hyperparameters $\lambda$, we can solve for $U$ and $A$ via Alternating Least Squares
* This is an iterative algorithm where we fix $A$, then solve for the $U$ that minimizes the loss function
* Then we fix $U$ and solve for the best $A$
* These two steps are repeated until the matrices $U$ and $A$ converge

## ALS for Explicit data
* If we fix $a$, then for each user $i$, $u_i$ is optimized when
* $u_i = \dfrac{\sum_{j \in \Omega_i}(r_{ij} - a_j) w_{ij} + \bar u \lambda_u}{ \sum_{j \in \Omega_i} w_{ij} + \lambda_u}$
* $\Omega$ is the set of (user, item) pairs that we have ratings for
* $\Omega_i$ is subset of $\Omega$ for which the user is the $i$-th user

In [4]:
function make_prediction(users, items, u, a)
    r = Array{eltype(u)}(undef, length(users))
    Threads.@threads for i = 1:length(r)
        @inbounds r[i] = u[users[i]] + a[items[i]]
    end
    r
end;

In [5]:
function get_residuals!(users, items, ratings, weights, a, ρ, Ω)
    @inbounds for row = 1:length(users)
        i = users[row]
        j = items[row]
        r = ratings[row]
        w = weights[row]
        ρ[i] += (r - a[j]) * w
        Ω[i] += w
    end
    ρ, Ω
end

function update_users!(users, items, ratings, weights, u, a, λ_u, ρ, Ω)
    Threads.@threads for t = 1:Threads.nthreads()
        range = thread_range(length(ratings))
        ρ[:, Threads.threadid()] .= 0
        Ω[:, Threads.threadid()] .= 0
        @views get_residuals!(
            users[range],
            items[range],
            ratings[range],
            weights[range],
            a,
            ρ[:, Threads.threadid()],
            Ω[:, Threads.threadid()],
        )
    end

    ρ = sum(ρ, dims = 2)
    Ω = sum(Ω, dims = 2)
    μ = mean(u)
    Threads.@threads for i = 1:length(u)
        @inbounds u[i] = (ρ[i] + μ * λ_u) / (Ω[i] + λ_u)
    end
end;

In [6]:
function train_model(training, stop_criteria, λ)
    @info "training model with parameters $λ"
    λ_u, λ_a, λ_wu, λ_wa = λ
    users, items, ratings = training.user, training.item, training.rating
    weights =
        expdecay(get_counts("training", implicit), log(λ_wu)) .*
        expdecay(get_counts("training", implicit; by_item = true), log(λ_wa))
    u = zeros(eltype(λ_u), num_users())
    a = zeros(eltype(λ_a), num_items())

    ρ_u = zeros(eltype(u), length(u), Threads.nthreads())
    Ω_u = zeros(eltype(u), length(u), Threads.nthreads())
    ρ_a = zeros(eltype(a), length(a), Threads.nthreads())
    Ω_a = zeros(eltype(a), length(a), Threads.nthreads())

    while !stop!(stop_criteria, [u, a])
        update_users!(users, items, ratings, weights, u, a, λ_u, ρ_u, Ω_u)
        update_users!(items, users, ratings, weights, a, u, λ_a, ρ_a, Ω_a)
    end
    u, a
end;

## Training

In [7]:
function validation_mse(λ)
    λ = exp.(λ) # ensure λ is nonnegative
    stop_criteria = convergence_stopper(1e-6, max_iters = 16)
    u, a = train_model(training, stop_criteria, λ)
    r = make_prediction(validation.user, validation.item, u, a)
    residualized_loss([], implicit, r)
end;

In [8]:
# Find the best regularization hyperparameters
res = Optim.optimize(
    validation_mse,
    fill(0.0f0, 4),
    Optim.BFGS(),
    autodiff = :forward,
    Optim.Options(show_trace = true, extended_trace = true),
);
λ = exp.(Optim.minimizer(res));

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:07:11 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0,1.0,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0,0.0,1.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0,0.0,0.0,1.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0,0.0,0.0,0.0,1.0)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:00 (83.53 ns/it)[39mit)[39m


Iter     Function value   Gradient norm 
     0     1.813422e+00     5.013248e-02
 * Current step size: 1.0
 * time: 0.009202003479003906
 * g(x): Float32[-0.004593451, -3.0179515f-6, 0.018213123, 0.050132476]
 * ~inv(H): Float32[1.0 0.0 0.0 0.0; 0.0 1.0 0.0 0.0; 0.0 0.0 1.0 0.0; 0.0 0.0 0.0 1.0]
 * x: Float32[0.0, 0.0, 0.0, 0.0]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:07:32 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.004604,1.004604,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.000003,0.0,1.000003,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9819517,0.0,0.0,0.9819517,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9511034,0.0,0.0,0.0,0.9511034)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:07:45 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.023233,1.023233,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0000151,0.0,1.0000151,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9129578,0.0,0.0,0.9129578,0.0), Dual{ForwardDiff.Ta

     1     1.811670e+00     1.526936e-02
 * Current step size: 1.0274389
 * time: 39.213566064834595
 * g(x): Float32[-0.0010373178, -3.792534f-6, 0.01526936, -0.0015094323]
 * ~inv(H): Float32[1.0038893 7.92986f-6 -0.03469486 -0.020281833; 7.92986f-6 1.0 -4.4105018f-5 -7.198241f-5; -0.034694858 -4.4105018f-5 1.2139859 0.29076782; -0.020281829 -7.198241f-5 0.29076782 0.97943676]
 * x: Float32[0.00471949, 3.1007608f-6, -0.01871287, -0.051508054]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:08:11 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0062796,1.0062796,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0000075,0.0,1.0000075,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.96382385,0.0,0.0,0.96382385,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9469675,0.0,0.0,0.0,0.9469675)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:08:24 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0124995,1.0124995,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0000249,0.0,1.0000249,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.8963877,0.0,0.0,0.8963877,0.0), Dual{Forwar

     2     1.810921e+00     4.371707e-02
 * Current step size: 4.7984567
 * time: 77.22456908226013
 * g(x): Float32[0.0025886283, -4.252421f-6, 0.0076354197, -0.04371707]
 * ~inv(H): Float32[1.057679 0.00022056622 -0.86408544 0.07201086; 0.00022056622 1.0000007 -0.003103163 7.300267f-5; -0.86408544 -0.003103163 13.286741 -0.41576955; 0.07201086 7.300266f-5 -0.41576952 0.42045444]
 * x: Float32[0.012111545, 2.4048724f-5, -0.10572752, -0.065819375]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:08:49 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0193033,1.0193033,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0000546,0.0,1.0000546,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.8000214,0.0,0.0,0.8000214,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.95652324,0.0,0.0,0.0,0.95652324)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:09:02 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0482799,1.0482799,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0001769,0.0,1.0001769,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.50023663,0.0,0.0,0.50023663,0.0), Dual{Forw

     3     1.808746e+00     4.054847e-02
 * Current step size: 1.7069083
 * time: 115.44697999954224
 * g(x): Float32[0.0033613164, 3.743303f-6, -0.001765712, -0.040548466]
 * ~inv(H): Float32[1.0628649 0.00025761014 -1.1099218 0.22278668; 0.00025761014 1.000001 -0.0044170786 0.00077464397; -1.1099219 -0.004417078 20.067677 -3.4261866; 0.22278668 0.0007746438 -3.426186 1.2917662]
 * x: Float32[0.024073254, 7.622358f-5, -0.30610028, -0.029344104]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:09:27 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0279582,1.0279582,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0000952,0.0,1.0000952,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6664019,0.0,0.0,0.6664019,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0163691,0.0,0.0,0.0,1.0163691)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:09:40 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0270842,1.0270842,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0000906,0.0,1.0000906,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6827492,0.0,0.0,0.6827492,0.0), Dual{ForwardD

     4     1.808152e+00     6.345828e-03
 * Current step size: 0.7570751
 * time: 141.06355500221252
 * g(x): Float32[-0.00011022772, 1.1275619f-5, 0.0023928515, 0.006345828]
 * ~inv(H): Float32[1.0671633 0.00025438855 -1.207159 0.2425763; 0.00025438855 1.0000008 -0.0041882955 0.00053640053; -1.2071592 -0.004188295 22.119099 -3.6614635; 0.24257629 0.0005364003 -3.6614628 1.0785211]
 * x: Float32[0.02672395, 9.060961f-5, -0.38162765, 0.0051638484]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:09:53 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0285919,1.0285919,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.000086,0.0,1.000086,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6626864,0.0,0.0,0.6626864,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0071331,0.0,0.0,0.0,1.0071331)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:10:05 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0280057,1.0280057,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0000877,0.0,1.0000877,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6704118,0.0,0.0,0.6704118,0.0), Dual{ForwardDif

     5     1.808133e+00     9.977532e-04
 * Current step size: 0.61140007
 * time: 166.5411341190338
 * g(x): Float32[0.00041685946, 1.4935933f-5, 0.00016012554, 0.0009977532]
 * ~inv(H): Float32[1.026056 -3.9792398f-5 -0.6138984 0.18972552; -3.9792423f-5 1.0000031 0.002559055 0.0001413921; -0.6138986 0.0025590549 14.988773 -2.9083261; 0.1897255 0.00014139188 -2.9083252 1.0106363]
 * x: Float32[0.02762077, 8.777912f-5, -0.3998631, 0.0063523855]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:10:18 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0274726,1.0274726,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0000724,0.0,1.0000724,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6709199,0.0,0.0,0.6709199,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0057471,0.0,0.0,0.0,1.0057471)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:10:31 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.025343,1.025343,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0000104,0.0,1.0000104,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.67295605,0.0,0.0,0.67295605,0.0), Dual{ForwardD

     6     1.808133e+00     5.337144e-04
 * Current step size: 1.5742868
 * time: 204.72019505500793
 * g(x): Float32[0.0005337144, 1.4843829f-5, -5.9679725f-5, -0.0005137366]
 * ~inv(H): Float32[2.145679 0.033409495 -1.5336393 0.929179; 0.033409495 1.0010024 -0.024955554 0.022264007; -1.5336396 -0.024955554 15.287144 -3.1307192; 0.92917913 0.022264007 -3.1307182 1.174704]
 * x: Float32[0.026804157, 6.342453f-5, -0.39867043, 0.005373558]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:10:56 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0263864,1.0263864,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0000407,0.0,1.0000407,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.67129433,0.0,0.0,0.67129433,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.005308,0.0,0.0,0.0,1.005308)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:11:09 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0232716,1.0232716,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9999497,0.0,0.9999497,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6716243,0.0,0.0,0.6716243,0.0), Dual{ForwardD

     7     1.808110e+00     1.767041e-03
 * Current step size: 123.174866
 * time: 268.36217403411865
 * g(x): Float32[0.0001424325, 1.3022578f-5, 0.0017670408, 0.0013631135]
 * ~inv(H): Float32[330.4193 9.935938 -9.025001 27.810947; 9.935937 1.2996985 -0.26172793 0.8349421; -9.0250025 -0.26172793 9.127023 -2.7016697; 27.810947 0.83494216 -2.7016687 3.2043424]
 * x: Float32[-0.066790216, -0.0027377375, -0.38353676, -0.0044310065]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:12:00 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.87289155,0.87289155,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.99516624,0.0,0.99516624,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6738869,0.0,0.0,0.6738869,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9920349,0.0,0.0,0.0,0.9920349)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:12:13 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6619541,0.6619541,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9868112,0.0,0.9868112,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6444757,0.0,0.0,0.6444757,0.0), Dual{Forw

     8     1.808043e+00     2.564552e-04
 * Current step size: 3.7801864
 * time: 306.59770798683167
 * g(x): Float32[-2.0505795f-5, 2.6795082f-5, 6.225291f-5, 0.00025645518]
 * ~inv(H): Float32[718.48834 21.818722 53.668453 48.029358; 21.81872 1.6635449 1.6579509 1.4540064; 53.66845 1.657951 19.255276 0.56461465; 48.029358 1.4540064 0.5646156 4.2576118]
 * x: Float32[-0.32820436, -0.010705473, -0.42570955, -0.017911073]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:12:38 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.7191292,0.7191292,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.98927915,0.0,0.98927915,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6531183,0.0,0.0,0.6531183,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9820705,0.0,0.0,0.0,0.9820705)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:12:51 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.719137,0.719137,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9892797,0.0,0.9892797,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6531197,0.0,0.0,0.6531197,0.0), Dual{ForwardD

     9     1.808043e+00     2.693670e-05
 * Current step size: 0.99282455
 * time: 332.08660101890564
 * g(x): Float32[-7.939443f-7, 2.6936703f-5, -2.5686522f-6, 1.3287864f-7]
 * ~inv(H): Float32[709.68445 22.630978 50.89714 47.565845; 22.630976 1.7631025 1.7594098 1.5801946; 50.897137 1.7594097 18.519218 0.3449753; 47.565845 1.5801946 0.3449763 4.273092]
 * x: Float32[-0.3297034, -0.010778212, -0.4259949, -0.018090889]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:13:04 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.7191933,0.7191933,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9892547,0.0,0.9892547,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.65314615,0.0,0.0,0.65314615,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.98206735,0.0,0.0,0.0,0.98206735)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:13:17 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.71941847,0.71941847,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.98915493,0.0,0.98915493,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.65325207,0.0,0.0,0.65325207,0.0), Dua

    10     1.808043e+00     4.297682e-05
 * Current step size: 7.8223543
 * time: 383.51355814933777
 * g(x): Float32[2.2174095f-6, 2.6830161f-5, 7.2826865f-6, -4.2976815f-5]
 * ~inv(H): Float32[715.1224 1.4933622 52.08198 47.650707; 1.4933623 14.819118 -8.664169 2.6631124; 52.08198 -8.664169 18.28758 0.48240238; 47.650707 2.6631122 0.48240376 4.2455344]
 * x: Float32[-0.3290912, -0.010975453, -0.42567778, -0.018125951]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:13:55 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.7196082,0.7196082,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.98886365,0.0,0.98886365,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6533298,0.0,0.0,0.6533298,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.98203915,0.0,0.0,0.0,0.98203915)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:14:08 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.7197314,0.7197314,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.98798054,0.0,0.98798054,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6533416,0.0,0.0,0.6533416,0.0), Dual{Fo

    11     1.808041e+00     1.455820e-03
 * Current step size: 880.2223
 * time: 472.94957995414734
 * g(x): Float32[0.00016924721, 1.626071f-5, -0.00015801762, -0.0014558199]
 * ~inv(H): Float32[557.24475 -733.5581 32.00715 40.96135; -733.55817 11989.402 -68.68654 -29.222631; 32.00715 -68.68654 15.807616 -0.3697883; 40.96135 -29.222626 -0.369787 3.9621387]
 * x: Float32[-0.29142416, -0.20758206, -0.4216962, -0.016512072]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:15:25 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.7340941,0.7340941,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.7176436,0.0,0.7176436,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.65439975,0.0,0.0,0.65439975,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.98288834,0.0,0.0,0.0,0.98288834)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:15:37 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6839341,0.6839341,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.43666783,0.0,0.43666783,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.64830136,0.0,0.0,0.64830136,0.0), Dual{

    12     1.808034e+00     5.127881e-04
 * Current step size: 3.8411493
 * time: 511.2618250846863
 * g(x): Float32[-6.193695f-5, 4.085918f-6, 5.6120585f-5, 0.00051278813]
 * ~inv(H): Float32[679.67773 997.6922 50.058327 46.018852; 997.69226 30261.514 173.35641 43.11926; 50.058327 173.3564 18.440933 0.37763435; 46.01885 43.11926 0.37763578 4.170945]
 * x: Float32[-0.3593891, -0.68465316, -0.43068713, -0.019384047]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:16:03 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.70625734,0.70625734,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.45916525,0.0,0.45916525,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6508191,0.0,0.0,0.6508191,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.98130697,0.0,0.0,0.0,0.98130697)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:16:16 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.7398396,0.7398396,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.31565264,0.0,0.31565264,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.65385526,0.0,0.0,0.65385526,0.0), Dua

    13     1.808034e+00     2.190244e-04
 * Current step size: 1.706832
 * time: 549.7797920703888
 * g(x): Float32[2.463591f-5, 8.6288367f-7, -5.159972f-5, -0.00021902444]
 * ~inv(H): Float32[678.8032 -35.505672 52.08494 45.705074; -35.50542 46989.152 52.781334 -0.397181; 52.08494 52.781357 18.855814 0.439785; 45.70507 -0.39718345 0.43978655 4.1448793]
 * x: Float32[-0.3395669, -0.84456956, -0.4287011, -0.018506596]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:16:41 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.70924014,0.70924014,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.41411793,0.0,0.41411793,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.65118563,0.0,0.0,0.65118563,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9814721,0.0,0.0,0.0,0.9814721)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:16:54 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6979988,0.6979988,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.35709822,0.0,0.35709822,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.65051025,0.0,0.0,0.65051025,0.0), Dua

    14     1.808034e+00     2.681200e-05
 * Current step size: 1.3790847
 * time: 588.7607641220093
 * g(x): Float32[-3.3042002f-6, 1.4343088f-7, 7.852623f-6, 2.6811998f-5]
 * ~inv(H): Float32[701.7248 868.0409 55.450993 46.47703; 868.04126 61774.2 128.8737 40.517227; 55.45099 128.8737 19.196373 0.58163106; 46.47703 40.517227 0.58163273 4.165601]
 * x: Float32[-0.34507522, -0.8956439, -0.42905888, -0.018775683]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:17:20 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.7085302,0.7085302,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.4050542,0.0,0.4050542,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.65112054,0.0,0.0,0.65112054,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9814304,0.0,0.0,0.0,0.9814304)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:17:33 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.7099843,0.7099843,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.39215568,0.0,0.39215568,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6511164,0.0,0.0,0.6511164,0.0), Dual{Forw

    15     1.808034e+00     1.518278e-06
 * Current step size: 1.187046
 * time: 627.2488920688629
 * g(x): Float32[1.5653112f-7, 4.317415f-9, -1.5182777f-6, -1.4051899f-6]
 * ~inv(H): Float32[746.0159 274.41168 60.586273 48.460144; 274.4121 69479.305 47.48637 15.69807; 60.586266 47.486366 19.163885 0.8995602; 48.460144 15.698062 0.8995618 4.2420607]
 * x: Float32[-0.3444668, -0.9052477, -0.4290608, -0.018738288]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:17:59 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.70862794,0.70862794,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.4043411,0.0,0.4043411,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.65113384,0.0,0.0,0.65113384,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.98143584,0.0,0.0,0.0,0.98143584)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:18:12 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.70862323,0.70862323,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.404357,0.0,0.404357,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6511317,0.0,0.0,0.6511317,0.0), Dual{Fo

    16     1.808034e+00     7.718816e-07
 * Current step size: 0.84230554
 * time: 652.9967639446259
 * g(x): Float32[4.863387f-8, 1.4080562f-10, -1.9309024f-7, -7.7188156f-7]
 * ~inv(H): Float32[775.79315 229.5718 66.60399 50.342762; 229.57222 68970.36 76.24287 3.563901; 66.60398 76.242874 17.899004 1.8900824; 50.342762 3.5638928 1.890084 4.211076]
 * x: Float32[-0.3444313, -0.9054572, -0.42904338, -0.018738564]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:18:25 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.7086331,0.7086331,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.4043556,0.0,0.4043556,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.65113276,0.0,0.0,0.65113276,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.981437,0.0,0.0,0.0,0.981437)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:18:38 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.70862776,0.70862776,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.40435633,0.0,0.40435633,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.65113217,0.0,0.0,0.65113217,0.0), Dual{Fo

    17     1.808034e+00     5.600174e-07
 * Current step size: 0.4571615
 * time: 678.628494977951
 * g(x): Float32[2.7551085f-8, 2.9163533f-11, -8.036908f-10, -5.600174f-7]
 * ~inv(H): Float32[1311.246 -136.04721 49.66346 115.453926; -136.04681 69116.836 52.33392 -31.959457; 49.66346 52.33393 6.2359567 2.9028714; 115.45393 -31.959463 2.9028735 11.354621]
 * x: Float32[-0.34442493, -0.90545875, -0.4290426, -0.018738031]


In [9]:
@info "The optimal λ is $λ, found in " * repr(Optim.f_calls(res)) * " function calls"

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:18:50 The optimal λ is Float32[0.70862776, 0.40435633, 0.65113217, 0.98143643], found in 54 function calls


In [10]:
stop_criteria = convergence_stopper(1e-6, max_iters = 16)
u, a = train_model(training, stop_criteria, λ);

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:18:50 training model with parameters Float32[0.70862776, 0.40435633, 0.65113217, 0.98143643]


In [11]:
validation_mse(Optim.minimizer(res))

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:18:55 training model with parameters Float32[0.70862776, 0.40435633, 0.65113217, 0.98143643]


1.8080269f0

## Inference

In [12]:
model(users, items) = make_prediction(users, items, u, a)
write_alpha(model, [], implicit, name);

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:19:02 validation loss: 1.8080269, β: Float32[1.0015494]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220621 08:19:04 training loss: 1.6717248, β: Float32[1.0015494]


In [13]:
write_params(Dict("u" => u, "a" => a, "λ" => λ), name);