# User Item Biases With Regularization
* Prediction for user $i$ and item $j$ is $\tilde r_{ij} = u_i + a_j$
* Loss function is $L = \sum_{\Omega}w_{ij}(r_{ij} - u_i - a_j)^2 + \lambda_u \sum_i (u_i - \bar u) ^2 + \lambda_a \sum_j (a_j - \bar a)^2 $
* $\bar u$ is the mean of $u_i$ and $\bar a$ is the mean of $a_j$ 
* $\Omega$ is the set of oberved pairs $(i, j)$
* $r_{ij}$ is the rating for user $i$ and item $j$
* $w_{ij}$ is the weight for the prediction $r_{ij}$ and is modeled as a power-law in the number of items seen by $i$ and users than have seen $j$: $w_{ij} = |j' : (i, j') \in \Omega| ^ {\lambda_{wu}} |i' : (i', j) \in \Omega| ^ {\lambda_{wa}}$

In [1]:
const name = "UserItemBiases"
const residual_alphas = [];

In [2]:
using NBInclude
@nbinclude("Alpha.ipynb");

In [3]:
const training = get_residuals("training", residual_alphas)
const validation = get_residuals("validation", residual_alphas);

## Alternating Least Squares Algorithm
* $u_i = \dfrac{\sum_{j \in \Omega_i}(r_{ij} - a_j) w_{ij} + \bar u \lambda_u}{ \sum_{j \in \Omega_i} w_{ij} + \lambda_u}$
* $\Omega$ is the set of (user, item) pairs that we have ratings for
* $\Omega_i$ is subset of $\Omega$ for which the user is the $i$-th user

In [4]:
function get_residuals!(users, items, ratings, weights, a, ρ, Ω)
    for row = 1:length(users)
        i = users[row]
        j = items[row]
        r = ratings[row]
        w = weights[row]
        ρ[i] += (r - a[j]) * w
        Ω[i] += w
    end
    ρ, Ω
end

function thread_range(n)
    tid = Threads.threadid()
    nt = Threads.nthreads()
    d, r = divrem(n, nt)
    from = (tid - 1) * d + min(r, tid - 1) + 1
    to = from + d - 1 + (tid ≤ r ? 1 : 0)
    from:to
end

function update_users!(users, items, ratings, weights, u, a, λ_u, ρ, Ω)
    Threads.@threads for t = 1:Threads.nthreads()
        range = thread_range(length(ratings))
        ρ[:, Threads.threadid()] .= 0
        Ω[:, Threads.threadid()] .= 0
        @views get_residuals!(
            users[range],
            items[range],
            ratings[range],
            weights[range],
            a,
            ρ[:, Threads.threadid()],
            Ω[:, Threads.threadid()],
        )
    end

    ρ = sum(ρ, dims = 2)
    Ω = sum(Ω, dims = 2)

    μ = mean(u)
    Threads.@threads for i = 1:length(u)
        u[i] = (ρ[i] + μ * λ_u) / (Ω[i] + λ_u)
    end
end;

In [5]:
function train_model(training, stop_criteria, λ_u, λ_a, λ_wu, λ_wa)
    @info "training model with parameters [$λ_u, $λ_a, $λ_wu, $λ_wa]"
    users, items, ratings = training.user, training.item, training.rating
    weights =
        safe_exp.(get_counts("training"), log(λ_wu)) .*
        safe_exp.(get_counts("training"; by_item = true), log(λ_wa))
    u = zeros(eltype(λ_u), maximum(users))
    a = zeros(eltype(λ_a), maximum(items))

    ρ_u = zeros(eltype(u), length(u), Threads.nthreads())
    Ω_u = zeros(eltype(u), length(u), Threads.nthreads())
    ρ_a = zeros(eltype(a), length(a), Threads.nthreads())
    Ω_a = zeros(eltype(a), length(a), Threads.nthreads())

    while !stop!(stop_criteria, [u, a])
        update_users!(users, items, ratings, weights, u, a, λ_u, ρ_u, Ω_u)
        update_users!(items, users, ratings, weights, a, u, λ_a, ρ_a, Ω_a)
    end
    u, a
end;

In [6]:
function make_prediction(users, items, u, a)
    r = zeros(eltype(u), length(users))
    u_mean = mean(u)
    a_mean = mean(a)
    for i = 1:length(r)
        if users[i] > length(u)
            r[i] += mean(u)
        else
            r[i] += u[users[i]]
        end
        if items[i] > length(a)
            r[i] += mean(a)
        else
            r[i] += a[items[i]]
        end
    end
    r
end;

## Training

In [7]:
function validation_mse(λ)
    λ = exp.(λ) # ensure λ is nonnegative
    stop_criteria = convergence_stopper(1e-6, max_iters = 16)
    u, a = train_model(training, stop_criteria, λ...)
    pred_score = make_prediction(validation.user, validation.item, u, a)
    weights = get_weights("validation", "inverse")
    mse(validation.rating, pred_score, weights)
end;

In [9]:
# Find the best regularization hyperparameters
res = optimize(
    validation_mse,
    fill(0.0f0, 4),
    BFGS(linesearch = BackTracking()),
    autodiff = :forward,
    Optim.Options(show_trace = true, extended_trace = true),
);
λ = exp.(Optim.minimizer(res));

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 04:51:37 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0,1.0,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0,0.0,1.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0,0.0,0.0,1.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0,0.0,0.0,0.0,1.0)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:01 ( 0.12 μs/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:00 (35.22 ns/it)[39m


Iter     Function value   Gradient norm 
     0     1.820281e+00     5.464972e-02
 * Current step size: 1.0
 * time: 0.024962902069091797
 * g(x): Float32[-0.0050086514, -1.0955863f-6, 0.019451264, 0.05464972]
 * ~inv(H): Float32[1.0 0.0 0.0 0.0; 0.0 1.0 0.0 0.0; 0.0 0.0 1.0 0.0; 0.0 0.0 0.0 1.0]
 * x: Float32[0.0, 0.0, 0.0, 0.0]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 04:52:06 training model with parameters [1.0050212, 1.0000011, 0.9807367, 0.94681674]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 04:52:36 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0050212,1.0050212,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0000011,0.0,1.0000011,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9807367,0.0,0.0,0.9807367,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.94681674,0.0,0.0,0.0,0.94681674)]


     1     1.818474e+00     1.522718e-02
 * Current step size: 1.0
 * time: 74.55671286582947
 * g(x): Float32[-0.00035009012, -1.7580297f-6, 0.015227176, -0.010049476]
 * ~inv(H): Float32[1.0020685 2.7653282f-6 -0.02710825 -0.0034922624; 2.7653282f-6 1.0 -1.4911706f-5 -2.599969f-5; -0.027108252 -1.4911705f-5 1.1793545 0.22169183; -0.003492276 -2.599969f-5 0.22169183 0.8299486]
 * x: Float32[0.0050086514, 1.0955863f-6, -0.019451264, -0.05464972]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 04:53:18 training model with parameters [1.0057536, 1.0000029, 0.9654209, 0.951528]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 04:53:47 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0057536,1.0057536,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0000029,0.0,1.0000029,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9654209,0.0,0.0,0.9654209,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.951528,0.0,0.0,0.0,0.951528)]


     2     1.818195e+00     1.542231e-02
 * Current step size: 1.0
 * time: 145.5532968044281
 * g(x): Float32[-0.0006939955, -1.660279f-6, 0.015422314, -0.004781666]
 * ~inv(H): Float32[1.0729377 0.00014409694 -1.3492321 0.25831926; 0.00014409694 1.0000002 -0.002573532 0.00041360213; -1.349232 -0.0025735318 25.228006 -4.010547; 0.25831923 0.00041360204 -4.010547 1.1076775]
 * x: Float32[0.0057371524, 2.820364f-6, -0.035191108, -0.049686138]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 04:54:29 training model with parameters [1.028936, 1.0000463, 0.6412225, 1.0177982]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 04:54:58 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.028936,1.028936,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0000463,0.0,1.0000463,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6412225,0.0,0.0,0.6412225,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0177982,0.0,0.0,0.0,1.0177982)]


     3     1.814613e+00     6.846104e-03
 * Current step size: 1.0
 * time: 216.75958800315857
 * g(x): Float32[8.7170585f-5, 2.9965591f-5, -0.00078989303, 0.0068461043]
 * ~inv(H): Float32[1.0634234 3.6268862f-5 -1.1939304 0.22369981; 3.6268833f-5 0.9999998 -0.00066696305 8.2617815f-5; -1.1939301 -0.00066696294 22.718414 -3.4348257; 0.22369978 8.2617706f-5 -3.4348261 0.98616236]
 * x: Float32[0.028525244, 4.6248173f-5, -0.44437885, 0.0176416]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 04:55:41 training model with parameters [1.0262978, 1.0000151, 0.6684361, 1.008191]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 04:56:09 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0262978,1.0262978,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0000151,0.0,1.0000151,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6684361,0.0,0.0,0.6684361,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.008191,0.0,0.0,0.0,1.008191)]


     4     1.814578e+00     4.154017e-03
 * Current step size: 1.0
 * time: 287.8485119342804
 * g(x): Float32[0.000115395545, 2.1142601f-5, 0.0015710957, 0.0041540167]
 * ~inv(H): Float32[1.0523537 0.00053399673 -0.8495941 0.21955667; 0.0005339968 1.0000135 -0.006727563 0.0023659894; -0.849594 -0.0067275614 14.470269 -2.7577724; 0.21955663 0.0023659891 -2.7577727 1.1066228]
 * x: Float32[0.025957994, 1.5186986f-5, -0.40281448, 0.008157582]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 04:56:52 training model with parameters [1.0266072, 0.9999947, 0.6610045, 1.007899]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 04:57:20 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0266072,1.0266072,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9999947,0.0,0.9999947,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6610045,0.0,0.0,0.6610045,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.007899,0.0,0.0,0.0,1.007899)]


     5     1.814570e+00     1.930623e-03
 * Current step size: 1.0
 * time: 358.97175884246826
 * g(x): Float32[0.0006668911, 2.3912333f-5, -0.00036680984, -0.0019306227]
 * ~inv(H): Float32[1.0384935 0.0006808852 -0.60764086 0.2381363; 0.0006808852 1.0000573 0.006318353 0.0018676942; -0.60764074 0.006318357 15.59791 -3.1854324; 0.23813626 0.001867694 -3.1854327 1.0837115]
 * x: Float32[0.026259298, -5.2762352f-6, -0.41399464, 0.007867991]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 04:58:03 training model with parameters [1.0261394, 0.9999763, 0.6609891, 1.0086703]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 04:58:31 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0261394,1.0261394,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9999763,0.0,0.9999763,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6609891,0.0,0.0,0.6609891,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0086703,0.0,0.0,0.0,1.0086703)]


     6     1.814569e+00     5.011792e-04
 * Current step size: 1.0
 * time: 430.15250301361084
 * g(x): Float32[0.0005011792, 2.375323f-5, 2.5587076f-5, 7.351488f-5]
 * ~inv(H): Float32[1.3438689 0.013966363 -0.63936675 0.008915137; 0.013966364 1.0006326 0.0050816885 -0.008963842; -0.6393666 0.0050816922 15.593863 -3.117678; 0.008915096 -0.008963841 -3.1176782 0.9928346]
 * x: Float32[0.025803583, -2.3720568f-5, -0.41401798, 0.008632925]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 04:59:14 training model with parameters [1.0254643, 0.99994606, 0.6610886, 1.0086728]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 04:59:43 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0254643,1.0254643,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.99994606,0.0,0.99994606,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6610886,0.0,0.0,0.6610886,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0086728,0.0,0.0,0.0,1.0086728)]


     7     1.814569e+00     4.817730e-04
 * Current step size: 1.0
 * time: 501.2873339653015
 * g(x): Float32[0.00048177302, 2.371322f-5, 8.018714f-5, 0.00028652584]
 * ~inv(H): Float32[37.290684 1.6891623 -3.167449 1.1198162; 1.6891623 1.0786848 -0.11646908 0.041986518; -3.1674492 -0.11646908 14.870181 -3.3936033; 1.1198161 0.041986518 -3.3936036 0.98376626]
 * x: Float32[0.025145436, -5.3959524f-5, -0.41386747, 0.008635455]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 05:00:25 training model with parameters [1.007098, 0.9991044, 0.6619543, 1.008118]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 05:00:54 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.007098,1.007098,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9991044,0.0,0.9991044,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6619543,0.0,0.0,0.6619543,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.008118,0.0,0.0,0.0,1.008118)]


     8     1.814563e+00     3.214643e-03
 * Current step size: 1.0
 * time: 572.4413459300995
 * g(x): Float32[0.00020482035, 2.3456681f-5, 0.0008212494, 0.003214643]
 * ~inv(H): Float32[106.639 4.946641 -2.8898675 4.646069; 4.9466405 1.2316874 -0.10544033 0.20708753; -2.8898675 -0.105440326 14.466329 -3.4876072; 4.6460686 0.20708753 -3.4876075 1.1342049]
 * x: Float32[0.007072868, -0.00089602236, -0.41255876, 0.008085211]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 05:01:36 training model with parameters [0.9729261, 0.99748594, 0.6619049, 1.0063671]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 05:02:05 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9729261,0.9729261,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.99748594,0.0,0.99748594,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6619049,0.0,0.0,0.6619049,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0063671,0.0,0.0,0.0,1.0063671)]


     9     1.814552e+00     5.811167e-03
 * Current step size: 1.0
 * time: 643.5382189750671
 * g(x): Float32[-6.600928f-5, 2.3758697f-5, 0.0014739666, 0.005811167]
 * ~inv(H): Float32[309.36502 14.524213 6.274652 17.394503; 14.524212 1.6841534 0.32508123 0.8086615; 6.2746525 0.32508123 14.505029 -3.020613; 17.394503 0.8086615 -3.0206132 1.9040741]
 * x: Float32[-0.027447151, -0.002517206, -0.41263345, 0.0063468753]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 05:02:47 training model with parameters [0.8889607, 0.993246, 0.6596449, 1.0008637]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 05:03:16 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.8889607,0.8889607,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.993246,0.0,0.993246,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6596449,0.0,0.0,0.6596449,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0008637,0.0,0.0,0.0,1.0008637)]


    10     1.814529e+00     8.741460e-03
 * Current step size: 1.0
 * time: 714.6921598911285
 * g(x): Float32[-0.0004438867, 2.5632433f-5, 0.0022214265, 0.00874146]
 * ~inv(H): Float32[574.9169 27.153233 27.65357 36.266758; 27.15323 2.284725 1.337723 1.7051919; 27.65357 1.3377231 15.743986 -1.617935; 36.266758 1.705192 -1.6179352 3.2170553]
 * x: Float32[-0.11770225, -0.006776912, -0.41605362, 0.0008632522]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 05:03:58 training model with parameters [0.78532696, 0.9874198, 0.65400267, 0.9924191]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 05:04:27 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.78532696,0.78532696,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9874198,0.0,0.9874198,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.65400267,0.0,0.0,0.65400267,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9924191,0.0,0.0,0.0,0.9924191)]


    11     1.814501e+00     8.763490e-03
 * Current step size: 1.0
 * time: 785.8399248123169
 * g(x): Float32[-0.0006205321, 2.9828647f-5, 0.0022336992, 0.00876349]
 * ~inv(H): Float32[712.03534 33.820763 49.963585 48.55937; 33.820755 2.6087546 2.4076698 2.2995358; 49.963585 2.40767 18.177706 0.10944261; 48.55937 2.299536 0.10944238 4.256915]
 * x: Float32[-0.24165511, -0.012660037, -0.42464384, -0.0076097604]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 05:05:10 training model with parameters [0.7132122, 0.9828675, 0.64708716, 0.9850178]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 05:05:38 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.7132122,0.7132122,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9828675,0.0,0.9828675,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.64708716,0.0,0.0,0.64708716,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9850178,0.0,0.0,0.0,0.9850178)]


    12     1.814482e+00     4.859752e-03
 * Current step size: 1.0
 * time: 857.1278369426727
 * g(x): Float32[-0.00042056467, 3.5106947f-5, 0.0012093757, 0.0048597516]
 * ~inv(H): Float32[612.1782 29.23311 47.23201 43.678738; 29.233109 2.3984005 2.2990184 2.0811613; 47.23201 2.2990184 18.791058 0.21500203; 43.678745 2.0811615 0.21500207 4.1014323]
 * x: Float32[-0.33797634, -0.017280957, -0.43527427, -0.015095601]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 05:06:21 training model with parameters [0.70403224, 0.9821954, 0.6445124, 0.9831526]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 05:06:50 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.70403224,0.70403224,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9821954,0.0,0.9821954,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6445124,0.0,0.0,0.6445124,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9831526,0.0,0.0,0.0,0.9831526)]


    13     1.814477e+00     9.704104e-04
 * Current step size: 1.0
 * time: 928.3435018062592
 * g(x): Float32[-0.00010513201, 3.706835f-5, 0.00021851866, 0.00097041036]
 * ~inv(H): Float32[571.5144 27.257034 39.529312 39.624878; 27.257032 2.303093 1.9449629 1.8921424; 39.52931 1.9449629 17.901367 -0.32862687; 39.624886 1.8921427 -0.32862687 3.7856295]
 * x: Float32[-0.35093114, -0.017965049, -0.43926117, -0.016990889]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 05:07:32 training model with parameters [0.71251994, 0.9827053, 0.64482886, 0.98363835]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 05:08:01 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.71251994,0.71251994,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9827053,0.0,0.9827053,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.64482886,0.0,0.0,0.64482886,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.98363835,0.0,0.0,0.0,0.98363835)]


    14     1.814476e+00     3.677324e-05
 * Current step size: 1.0
 * time: 999.4623157978058
 * g(x): Float32[-7.3892475f-6, 3.6773235f-5, 3.2598882f-6, 1.372408f-5]
 * ~inv(H): Float32[654.7855 30.33923 44.20008 44.417217; 30.339228 2.4138737 2.1257844 2.0780945; 44.20008 2.1257844 18.144285 -0.0804076; 44.41723 2.078095 -0.080407575 4.0392094]
 * x: Float32[-0.33894736, -0.017446, -0.43877035, -0.016496988]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 05:08:43 training model with parameters [0.7146386, 0.9828035, 0.64495164, 0.98383176]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 05:09:12 training model with parameters [0.7135785, 0.9827544, 0.64489025, 0.983735]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 05:09:41 training model with parameters [0.71262574, 0.98271024, 0.644835, 0.983648]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 05:10:09 training model with parameters [0.71253055, 0.9827058, 0.64482945, 0.9836393]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 05:10:38 training model with parameters [0.71252525, 0.98270553, 0.64482915, 0.9836388]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 05:11:07 training model with parameters [0.7125222, 0.9827054, 0.644829, 0.9836385]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 05:11:35 training model with parameters [0.

    15     1.814476e+00     3.677324e-05
 * Current step size: 3.7724752e-5
 * time: 1329.2332727909088
 * g(x): Float32[-7.3962333f-6, 3.6773243f-5, 3.264613f-6, 1.358048f-5]
 * ~inv(H): Float32[654.7855 30.33923 44.20008 44.417217; 30.339228 2.4138737 2.1257844 2.0780945; 44.20008 2.1257844 18.144285 -0.0804076; 44.41723 2.078095 -0.080407575 4.0392094]
 * x: Float32[-0.33894724, -0.017445996, -0.43877035, -0.01649698]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 05:14:13 training model with parameters [0.7146464, 0.98280406, 0.64495176, 0.98383266]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 05:14:42 training model with parameters [0.7135824, 0.98275465, 0.6448903, 0.9837355]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 05:15:10 training model with parameters [0.7126262, 0.98271024, 0.644835, 0.98364806]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 05:15:39 training model with parameters [0.7125307, 0.9827058, 0.64482945, 0.9836393]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 05:16:08 training model with parameters [0.71252537, 0.98270553, 0.64482915, 0.9836388]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 05:16:36 training model with parameters [0.71252227, 0.9827054, 0.644829, 0.9836385]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 05:17:05 training model with parameters 

    16     1.814476e+00     3.677323e-05
 * Current step size: 3.7724752e-5
 * time: 1658.5070049762726
 * g(x): Float32[-7.4009163f-6, 3.677323f-5, 3.2764353f-6, 1.3772849f-5]
 * ~inv(H): Float32[348.81418 10.903069 69.03034 4.8342795; 10.903065 1.4510566 2.379642 0.1389899; 69.03035 2.3796422 22.572678 0.33078313; 4.834296 0.13899067 0.3307831 0.13609947]
 * x: Float32[-0.33894712, -0.017445993, -0.43877035, -0.016496973]


In [10]:
@info "The optimal λ is $λ, found in " * repr(Optim.f_calls(res)) * " function calls"

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 05:19:42 The optimal λ is Float32[0.7125201, 0.9827053, 0.64482886, 0.98363835], found in 35 function calls


In [12]:
stop_criteria = convergence_stopper(1e-6, max_iters = 16)
u, a = train_model(training, stop_criteria, λ...);

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 05:19:42 training model with parameters [0.7125201, 0.9827053, 0.64482886, 0.98363835]


## Inference

In [13]:
model(users, items) = make_prediction(users, items, u, a);

In [14]:
write_predictions(model; residual_alphas = residual_alphas);

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 05:20:19 training set weighted-loss: RMSE 1.2923702 MAE 0.9603634 R2 0.4864248
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220520 05:20:20 validation set weighted-loss: RMSE 1.3469614 MAE 1.0024881 R2 0.39467883


In [15]:
write_params(Dict("u" => u, "a" => a, "λ" => λ));