# User Item Biases With Regularization
* Prediction for user $i$ and item $j$ is $\tilde r_{ij} = u_i + a_j$
* Loss function is $L = \sum_{\Omega}w_{ij}\text{loss}(r_{ij}, \tilde r_{ij}) + \lambda_u \sum_i (u_i - \bar u) ^2 + \lambda_a \sum_j (a_j - \bar a)^2 $
* $\bar u$ is the mean of $u_i$ and $\bar a$ is the mean of $a_j$ 
* $\Omega$ is the set of oberved pairs $(i, j)$
* $r_{ij}$ is the rating for user $i$ and item $j$
* $w_{ij}$ is the weight for the prediction $r_{ij}$ and is modeled as a power-law in the number of items seen by $i$ and users than have seen $j$: $w_{ij} = |j' : (i, j') \in \Omega| ^ {\lambda_{wu}} |i' : (i', j) \in \Omega| ^ {\lambda_{wa}}$
* $\text{loss}$ is mean squared error

In [1]:
const name = "UserItemBiases"
const residual_alphas = []
const implicit = false;

In [2]:
import NBInclude: @nbinclude
import SparseArrays: sparse
import Statistics: mean
@nbinclude("Alpha.ipynb");

In [3]:
const training = get_split("training", implicit)
const validation = get_split("validation", implicit);

## Alternating Least Squares
* Given some hyperparameters $\lambda$, we can solve for $U$ and $A$ via Alternating Least Squares
* This is an iterative algorithm where we fix $A$, then solve for the $U$ that minimizes the loss function
* Then we fix $U$ and solve for the best $A$
* These two steps are repeated until the matrices $U$ and $A$ converge

## ALS for Explicit data
* If we fix $a$, then for each user $i$, $u_i$ is optimized when
* $u_i = \dfrac{\sum_{j \in \Omega_i}(r_{ij} - a_j) w_{ij} + \bar u \lambda_u}{ \sum_{j \in \Omega_i} w_{ij} + \lambda_u}$
* $\Omega$ is the set of (user, item) pairs that we have ratings for
* $\Omega_i$ is subset of $\Omega$ for which the user is the $i$-th user

In [4]:
function make_prediction(users, items, u, a)
    r = Array{eltype(u)}(undef, length(users))
    Threads.@threads for i = 1:length(r)
        @inbounds r[i] = u[users[i]] + a[items[i]]
    end
    r
end;

In [5]:
function get_residuals!(users, items, ratings, weights, a, ρ, Ω)
    @inbounds for row = 1:length(users)
        i = users[row]
        j = items[row]
        r = ratings[row]
        w = weights[row]
        ρ[i] += (r - a[j]) * w
        Ω[i] += w
    end
    ρ, Ω
end

function update_users!(users, items, ratings, weights, u, a, λ_u, ρ, Ω)
    Threads.@threads for t = 1:Threads.nthreads()
        range = thread_range(length(ratings))
        ρ[:, Threads.threadid()] .= 0
        Ω[:, Threads.threadid()] .= 0
        @views get_residuals!(
            users[range],
            items[range],
            ratings[range],
            weights[range],
            a,
            ρ[:, Threads.threadid()],
            Ω[:, Threads.threadid()],
        )
    end

    ρ = sum(ρ, dims = 2)
    Ω = sum(Ω, dims = 2)
    μ = mean(u)
    Threads.@threads for i = 1:length(u)
        @inbounds u[i] = (ρ[i] + μ * λ_u) / (Ω[i] + λ_u)
    end
end;

In [6]:
function train_model(training, stop_criteria, λ)
    @info "training model with parameters $λ"
    λ_u, λ_a, λ_wu, λ_wa = λ
    users, items, ratings = training.user, training.item, training.rating
    weights =
        expdecay(get_counts("training", implicit), log(λ_wu)) .*
        expdecay(get_counts("training", implicit; by_item = true), log(λ_wa))
    u = zeros(eltype(λ_u), num_users())
    a = zeros(eltype(λ_a), num_items())

    ρ_u = zeros(eltype(u), length(u), Threads.nthreads())
    Ω_u = zeros(eltype(u), length(u), Threads.nthreads())
    ρ_a = zeros(eltype(a), length(a), Threads.nthreads())
    Ω_a = zeros(eltype(a), length(a), Threads.nthreads())

    while !stop!(stop_criteria, [u, a])
        update_users!(users, items, ratings, weights, u, a, λ_u, ρ_u, Ω_u)
        update_users!(items, users, ratings, weights, a, u, λ_a, ρ_a, Ω_a)
    end
    u, a
end;

## Training

In [7]:
function validation_mse(λ)
    λ = exp.(λ) # ensure λ is nonnegative
    stop_criteria = convergence_stopper(1e-6, max_iters = 16)
    u, a = train_model(training, stop_criteria, λ)
    r = make_prediction(validation.user, validation.item, u, a)
    residualized_loss(residual_alphas, implicit, r)
end;

In [8]:
validation_mse(fill(0.0f0, 4))

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:06:34 training model with parameters Float32[1.0, 1.0, 1.0, 1.0]
[32mProgress: 100%|███████████████████████████| Time: 0:00:02 ( 0.15 μs/it)[39m39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:00 (32.59 ns/it)[39m


1.813418f0

In [9]:
# Find the best regularization hyperparameters
res = Optim.optimize(
    validation_mse,
    fill(0.0f0, 4),
    Optim.LBFGS(),
    autodiff = :forward,
    Optim.Options(show_trace = true, extended_trace = true),
);
λ = exp.(Optim.minimizer(res));

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:07:04 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0,1.0,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0,0.0,1.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0,0.0,0.0,1.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0,0.0,0.0,0.0,1.0)]


Iter     Function value   Gradient norm 
     0     1.813422e+00     5.017445e-02
 * Current step size: 1.0
 * time: 0.03074789047241211
 * g(x): Float32[-0.0045934455, -3.0179476f-6, 0.018213015, 0.050174452]
 * x: Float32[0.0, 0.0, 0.0, 0.0]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:07:54 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.004604,1.004604,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.000003,0.0,1.000003,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.98195183,0.0,0.0,0.98195183,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9510635,0.0,0.0,0.0,0.9510635)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:08:33 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.023233,1.023233,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0000151,0.0,1.0000151,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.91295826,0.0,0.0,0.91295826,0.0), Dual{ForwardDif

     1     1.811669e+00     1.526675e-02
 * Current step size: 1.0271175
 * time: 118.09454584121704
 * g(x): Float32[-0.0010347532, -3.7929474f-6, 0.015266755, -0.0015451695]
 * x: Float32[0.004718008, 3.0997867f-6, -0.018706907, -0.05153506]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:09:50 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0062914,1.0062914,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0000075,0.0,1.0000075,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9636046,0.0,0.0,0.9636046,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.94699776,0.0,0.0,0.0,0.94699776)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:10:27 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0125649,1.0125649,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0000253,0.0,1.0000253,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.89534736,0.0,0.0,0.89534736,0.0), Dual{Forw

     2     1.810900e+00     4.440900e-02
 * Current step size: 4.872041
 * time: 231.85019493103027
 * g(x): Float32[0.0026561713, -4.241068f-6, 0.0074390667, -0.044409003]
 * x: Float32[0.012287829, 2.4657189f-5, -0.10819325, -0.06577837]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:11:44 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0195522,1.0195522,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.000055,0.0,1.000055,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.7980849,0.0,0.0,0.7980849,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9569118,0.0,0.0,0.0,0.9569118)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:12:22 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0488207,1.0488207,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0001758,0.0,1.0001758,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.49911016,0.0,0.0,0.49911016,0.0), Dual{ForwardD

     3     1.808716e+00     4.012633e-02
 * Current step size: 1.7192914
 * time: 345.2062740325928
 * g(x): Float32[0.003347404, 4.124724f-6, -0.0019332627, -0.040126335]
 * x: Float32[0.024453033, 7.6644224f-5, -0.309947, -0.028410792]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:13:37 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.019721,1.019721,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9999307,0.0,0.9999307,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6674332,0.0,0.0,0.6674332,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0155143,0.0,0.0,0.0,1.0155143)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:14:14 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0209557,1.0209557,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.99996656,0.0,0.99996656,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6830927,0.0,0.0,0.6830927,0.0), Dual{ForwardD

     4     1.808150e+00     6.450061e-03
 * Current step size: 0.75424814
 * time: 419.3581919670105
 * g(x): Float32[-0.00013146133, 1.1248627f-5, 0.0024728733, 0.006450061]
 * x: Float32[0.020739174, -3.3458622f-5, -0.38112465, 0.004629772]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:14:51 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0226495,1.0226495,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.99996436,0.0,0.99996436,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.66204804,0.0,0.0,0.66204804,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0069308,0.0,0.0,0.0,1.0069308)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:15:28 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0219977,1.0219977,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9999652,0.0,0.9999652,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6700656,0.0,0.0,0.6700656,0.0), Dual{Forw

     5     1.808130e+00     1.123598e-03
 * Current step size: 0.6153235
 * time: 493.369441986084
 * g(x): Float32[0.00039905624, 1.5101061f-5, 0.0001787557, 0.0011235977]
 * x: Float32[0.02175918, -3.479128f-5, -0.4003797, 0.0060309195]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:16:05 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0215311,1.0215311,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.99995005,0.0,0.99995005,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.67050534,0.0,0.0,0.67050534,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0054435,0.0,0.0,0.0,1.0054435)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:16:42 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0196668,1.0196668,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9998895,0.0,0.9998895,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6722671,0.0,0.0,0.6722671,0.0), Dual{Forw

     6     1.808129e+00     5.227212e-04
 * Current step size: 1.6296648
 * time: 604.297534942627
 * g(x): Float32[0.0005227212, 1.5037459f-5, -6.365744f-5, -0.0004622097]
 * x: Float32[0.021014994, -5.947607f-5, -0.3993106, 0.005049437]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:17:56 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0206496,1.0206496,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9999232,0.0,0.9999232,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.67088073,0.0,0.0,0.67088073,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0049921,0.0,0.0,0.0,1.0049921)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:18:33 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0183018,1.0183018,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.99985415,0.0,0.99985415,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6712746,0.0,0.0,0.6712746,0.0), Dual{Forw

     7     1.808116e+00     1.406724e-03
 * Current step size: 95.5247
 * time: 789.7172048091888
 * g(x): Float32[0.0002447643, 1.2993038f-5, 0.0014067244, 0.0009205817]
 * x: Float32[-0.03398048, -0.0017102798, -0.38529557, -0.0016091829]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:21:02 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9210202,0.9210202,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.996798,0.0,0.996798,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6757155,0.0,0.0,0.6757155,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.99572694,0.0,0.0,0.0,0.99572694)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:21:40 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.75923437,0.75923437,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.99084735,0.0,0.99084735,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.65787953,0.0,0.0,0.65787953,0.0), Dual{Fo

     8     1.808043e+00     1.585269e-04
 * Current step size: 6.0180182
 * time: 940.1777160167694
 * g(x): Float32[-6.8241907f-6, 2.6681591f-5, 3.4664965f-5, 0.00015852695]
 * x: Float32[-0.3246078, -0.010718701, -0.4255415, -0.017695367]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:23:32 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.7190641,0.7190641,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9877013,0.0,0.9877013,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.65312696,0.0,0.0,0.65312696,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9820598,0.0,0.0,0.0,0.9820598)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:24:09 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.70427054,0.70427054,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9811795,0.0,0.9811795,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6519727,0.0,0.0,0.6519727,0.0), Dual{Forw

     9     1.808043e+00     1.284481e-04
 * Current step size: 1.7964028
 * time: 1052.0201058387756
 * g(x): Float32[4.029423f-6, 2.6985828f-5, -2.943826f-5, -0.00012844808]
 * x: Float32[-0.33394367, -0.013693964, -0.4263359, -0.018427776]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:25:24 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.7160642,0.7160642,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.98512363,0.0,0.98512363,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.65290403,0.0,0.0,0.65290403,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9817342,0.0,0.0,0.0,0.9817342)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:26:01 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.7159446,0.7159446,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9800371,0.0,0.9800371,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.65293205,0.0,0.0,0.65293205,0.0), Dual{Fo

    10     1.808037e+00     1.265754e-03
 * Current step size: 440.70148
 * time: 1275.5234088897705
 * g(x): Float32[4.6147055f-5, 5.4074744f-6, 0.0002736034, -0.0012657536]
 * x: Float32[-0.35235333, -0.5840397, -0.42159924, -0.021470923]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:29:08 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.708259,0.708259,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.49575487,0.0,0.49575487,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6523689,0.0,0.0,0.6523689,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.98081166,0.0,0.0,0.0,0.98081166)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:29:45 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.72955984,0.72955984,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.3096819,0.0,0.3096819,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.63805664,0.0,0.0,0.63805664,0.0), Dual{Fo

    11     1.808034e+00     1.384111e-04
 * Current step size: 1.4234217
 * time: 1387.1739649772644
 * g(x): Float32[1.7745315f-5, 2.531124f-6, -8.536298f-5, -0.00013841108]
 * x: Float32[-0.34180877, -0.7514825, -0.42949325, -0.018487295]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:30:59 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.70875156,0.70875156,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.4397899,0.0,0.4397899,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.65099937,0.0,0.0,0.65099937,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.98150295,0.0,0.0,0.0,0.98150295)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:31:36 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.70186365,0.70186365,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.3324183,0.0,0.3324183,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.65164196,0.0,0.0,0.65164196,0.0), Dua

    12     1.808033e+00     1.764127e-04
 * Current step size: 2.013646
 * time: 1498.864665031433
 * g(x): Float32[-1.9009552f-5, 1.9087082f-7, 5.1412477f-5, 0.00017641265]
 * x: Float32[-0.34672502, -0.8923888, -0.42899656, -0.01885572]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:32:51 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.7084238,0.7084238,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.40588373,0.0,0.40588373,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6511278,0.0,0.0,0.6511278,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.98142165,0.0,0.0,0.0,0.98142165)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:33:28 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.71414876,0.71414876,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.39106274,0.0,0.39106274,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6509905,0.0,0.0,0.6509905,0.0), Dual{

    13     1.808033e+00     5.474516e-06
 * Current step size: 1.1224906
 * time: 1611.0625398159027
 * g(x): Float32[3.724754f-7, 3.8308595f-8, -1.622722f-6, -5.4745155f-6]
 * x: Float32[-0.34446633, -0.9028276, -0.42905575, -0.018740539]


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:34:43 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.70864666,0.70864666,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.40451136,0.0,0.40451136,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6511299,0.0,0.0,0.6511299,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9814397,0.0,0.0,0.0,0.9814397)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:35:21 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.7088397,0.7088397,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.40089065,0.0,0.40089065,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.65115505,0.0,0.0,0.65115505,0.0), Dual{

    14     1.808033e+00     6.412606e-07
 * Current step size: 1.1233808
 * time: 1723.2058458328247
 * g(x): Float32[3.0843395f-9, 1.3546859f-9, -1.0447979f-8, -6.412606f-7]
 * x: Float32[-0.34438983, -0.9053528, -0.4290449, -0.018734004]


In [10]:
@info "The optimal λ is $λ, found in " * repr(Optim.f_calls(res)) * " function calls"

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:36:35 The optimal λ is Float32[0.7086526, 0.4043992, 0.6511307, 0.98144037], found in 47 function calls


In [11]:
stop_criteria = convergence_stopper(1e-6, max_iters = 16)
u, a = train_model(training, stop_criteria, λ);

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:36:35 training model with parameters Float32[0.7086526, 0.4043992, 0.6511307, 0.98144037]


In [12]:
validation_mse(Optim.minimizer(res))

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:36:48 training model with parameters Float32[0.7086526, 0.4043992, 0.6511307, 0.98144037]


1.8080267f0

## Inference

In [13]:
model(users, items) = make_prediction(users, items, u, a)
write_alpha(model, residual_alphas, implicit);

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:37:03 validation loss: 1.8080267, β: Float32[1.0015497]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220604 15:37:06 training loss: 1.6717246, β: Float32[1.0015497]


In [14]:
write_params(Dict("u" => u, "a" => a, "λ" => λ));