# User Item Biases With Regularization
* Prediction for user $i$ and item $j$ is $\tilde r_{ij} = u_i + a_j$
* Loss function is $L = \sum_{\Omega}(r_{ij} - u_i - a_j)^2 + \lambda_u \sum_i (u_i - \bar u) ^2 + \lambda_a \sum_j (a_j - \bar a)^2 $
* $\bar u$ is the mean of $u_i$ and $\bar a$ is the mean of $a_j$ 
* $\Omega$ is the set of oberved pairs $(i, j)$
* $r_{ij}$ is the rating for user $i$ and item $j$

In [1]:
name = "UserItemBiases";
residual_alphas = [];

In [2]:
using NBInclude
@nbinclude("Alpha.ipynb");

# Alternating Least Squares Algorithm
* $u_i = \dfrac{\sum_{j \in \Omega_i}(r_{ij} - a_j) + \bar u \lambda_u}{|\Omega_i| + \lambda_u} = \dfrac{\rho_i + \bar u \lambda_u}{|\Omega_i| + \lambda_u}$
* $\Omega$ is the set of (user, item) pairs that we have ratings for
* $\Omega_i$ is subset of $\Omega$ for which the user is the $i$-th user

In [3]:
function get_residuals!(users, items, ratings, u, a, ρ, Ω)
    for row = 1:length(users)
        i = users[row]
        j = items[row]
        r = ratings[row]
        ρ[i] += r - a[j]
        Ω[i] += 1
    end
    ρ, Ω
end

# todo move to utils
function thread_range(n)
    tid = Threads.threadid()
    nt = Threads.nthreads()
    d, r = divrem(n, nt)
    from = (tid - 1) * d + min(r, tid - 1) + 1
    to = from + d - 1 + (tid ≤ r ? 1 : 0)
    from:to
end

function update_users!(users, items, ratings, u, a, λ_u, ρ, Ω)
    Threads.@threads for t = 1:Threads.nthreads()
        range = thread_range(length(ratings))
        ρ[:, Threads.threadid()] .= 0
        Ω[:, Threads.threadid()] .= 0
        @views get_residuals!(
            users[range],
            items[range],
            ratings[range],
            u,
            a,
            ρ[:, Threads.threadid()],
            Ω[:, Threads.threadid()],
        )
    end
    ρ = sum(ρ, dims = 2)
    Ω = sum(Ω, dims = 2)

    μ = mean(u)
    Threads.@threads for i = 1:length(u)
        u[i] = (ρ[i] + μ * λ_u) / (Ω[i] + λ_u)
    end
end;

In [4]:
function train_model(training, λ_u, λ_a, stop_criteria)
    @debug "training model with parameters [$λ_u, $λ_a]"
    users, items, ratings = training.user, training.item, training.rating
    u = zeros(eltype(λ_u), maximum(users))
    a = zeros(eltype(λ_a), maximum(items))

    ρ_u = zeros(eltype(u), length(u), Threads.nthreads())
    Ω_u = zeros(eltype(u), length(u), Threads.nthreads())
    ρ_a = zeros(eltype(a), length(a), Threads.nthreads())
    Ω_a = zeros(eltype(a), length(a), Threads.nthreads())

    # todo maybe refactor into a stopper
    # converged = false
    while !stop!(stop_criteria, [u, a])
        # old_u = copy(u)
        # old_a = copy(a)
        update_users!(users, items, ratings, u, a, λ_u, ρ_u, Ω_u)
        update_users!(items, users, ratings, a, u, λ_a, ρ_a, Ω_a)
        # converged = (maximum(abs.(u - old_u)) < ϵ) && (maximum(abs.(a - old_a)) < ϵ)
    end
    u, a
end;

In [5]:
function make_prediction(users, items, u, a)
    r = zeros(eltype(u), length(users))
    u_mean = mean(u)
    a_mean = mean(a)
    for i = 1:length(r)
        if users[i] > length(u)
            r[i] += mean(u)
        else
            r[i] += u[users[i]]
        end
        if items[i] > length(a)
            r[i] += mean(a)
        else
            r[i] += a[items[i]]
        end
    end
    r
end;

## Training

In [6]:
function validation_mse(λ)
    λ = exp.(λ) # ensure λ is nonnegative
    stop_criteria = convergence_stopper(1e-6)
    u, a = train_model(training, λ[1], λ[2], stop_criteria)
    pred_score = make_prediction(validation.user, validation.item, u, a)
    mse(validation.rating, pred_score)
end;

In [7]:
# Find the best regularization hyperparameters
res = optimize(
    validation_mse,
    fill(0.0, 2), # intial guess
    LBFGS(),
    autodiff = :forward,
    Optim.Options(show_trace = true, extended_trace = true),
);
λ = exp.(Optim.minimizer(res));

[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211204 20:14:06 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(1.0,1.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(1.0,0.0,1.0)]


Iter     Function value   Gradient norm 
     0     1.698360e+00     3.375168e-04
 * Current step size: 1.0
 * time: 0.0509798526763916
 * g(x): [-0.0003375167889227533, -1.6022331222208823e-5]
 * x: [0.0, 0.0]


[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211204 20:14:24 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(1.000337573754123,1.000337573754123,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(1.0000160224595804,0.0,1.0000160224595804)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211204 20:14:35 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(1.0016890087157597,1.0016890087157597,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(1.0000801148651355,0.0,1.0000801148651355)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211204 20:14:45 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(1.008473619307126,1.008473619307126,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(1.0004006385147357,0.0,1.0004006385147357)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211204 20:14:55 training model with pa

     1     1.698217e+00     1.233252e-04
 * Current step size: 1671.2384464768325
 * time: 73.76679301261902
 * g(x): [-0.00012332524666451778, -1.6127362332710334e-5]
 * x: [0.5640710339791112, 0.026777135940741523]


[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211204 20:15:34 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.437301326204616,2.437301326204616,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(1.0887369246571685,0.0,1.0887369246571685)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211204 20:15:44 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.018596397995572,2.018596397995572,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(1.0527739240836238,0.0,1.0527739240836238)]


     2     1.698206e+00     1.844736e-05
 * Current step size: 0.4232642520413312
 * time: 92.30641293525696
 * g(x): [-1.8447364019485055e-5, -1.6194112676016894e-5]
 * x: [0.7024024174138034, 0.05142851312452759]


[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211204 20:15:53 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.0762382738010436,2.0762382738010436,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(1.0817672046836735,0.0,1.0817672046836735)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211204 20:16:02 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.323742427024127,2.323742427024127,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(1.2059479883149455,0.0,1.2059479883149455)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211204 20:16:12 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.095444969897854,2.095444969897854,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(1.091421668167482,0.0,1.091421668167482)]


     3     1.698205e+00     1.626061e-05
 * Current step size: 1.3270498743872332
 * time: 120.9754478931427
 * g(x): [1.484673359510144e-5, -1.6260611794579082e-5]
 * x: [0.7397659269538631, 0.08748112912567516]


[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211204 20:16:21 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.097568336830016,2.097568336830016,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(1.1282883366673282,0.0,1.1282883366673282)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211204 20:16:31 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.1060833429826,2.1060833429826,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(1.2886358440615198,0.0,1.2886358440615198)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211204 20:16:41 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.149179684320585,2.149179684320585,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.504267687773879,0.0,2.504267687773879)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211204 20:16:51 training model with parameters

     4     1.698194e+00     3.899982e-05
 * Current step size: 25.437190507095913
 * time: 174.8577060699463
 * g(x): [3.899982211973321e-5, -8.725989988276363e-6]
 * x: [0.765529018952795, 0.9325200864374217]


[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211204 20:17:15 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.0742572023231967,2.0742572023231967,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(6.639180927989703,0.0,6.639180927989703)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211204 20:17:26 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.1396369491799203,2.1396369491799203,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.895996047800358,0.0,2.895996047800358)]


     5     1.698193e+00     3.425791e-05
 * Current step size: 0.1361929322012428
 * time: 195.25358700752258
 * g(x): [3.425790601525754e-5, -4.743149076498255e-6]
 * x: [0.7606361647256603, 1.0633291098127176]


[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211204 20:17:36 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.0608951090273666,2.0608951090273666,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(3.375530099354065,0.0,3.375530099354065)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211204 20:17:46 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.069943476867063,2.069943476867063,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(3.315638530972272,0.0,3.315638530972272)]


     6     1.698192e+00     3.448332e-06
 * Current step size: 0.8831629501202527
 * time: 215.83857989311218
 * g(x): [3.4483322576967427e-6, 7.66496326668043e-7]
 * x: [0.7275213010431436, 1.1986502237837282]


[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211204 20:17:56 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.060670575977708,2.060670575977708,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(3.2395784404630374,0.0,3.2395784404630374)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211204 20:18:06 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.062686238878083,2.062686238878083,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(3.2559907930758016,0.0,3.2559907930758016)]


     7     1.698192e+00     2.983336e-07
 * Current step size: 0.7822464271425288
 * time: 235.54500794410706
 * g(x): [2.983336205469732e-7, -6.245540860822255e-8]
 * x: [0.724009132787971, 1.1804966204568739]


[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211204 20:18:16 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.0619781693500334,2.0619781693500334,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(3.260579761404818,0.0,3.260579761404818)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211204 20:18:26 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(2.061994938223341,2.061994938223341,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float64}}(3.260470990089637,0.0,3.260470990089637)]


     8     1.698192e+00     2.206703e-09
 * Current step size: 0.9763135067373814
 * time: 255.539057970047
 * g(x): [-2.2067027747480572e-9, -5.394881622849006e-10]
 * x: [0.723673930801911, 1.181871660430657]


In [8]:
@info "The optimal [λ_u, λ_a] is $λ, found in " * repr(Optim.f_calls(res)) * " function calls"

The optimal [λ_u, λ_a] is [2.061994938223341, 3.260470990089637], found in 26 function calls

In [9]:
stop_criteria = convergence_stopper(1e-6)
u, a = train_model(training, λ..., stop_criteria);

[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211204 20:18:37 training model with parameters [2.061994938223341, 3.260470990089637]


## Inference

In [10]:
model(users, items) = make_prediction(users, items, u, a);

In [11]:
write_predictions(model, save_training = true);

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20211204 20:18:50 training set: RMSE 1.2937930865100624 MAE 0.9695274371978653 R2 0.47199769672473824
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20211204 20:18:51 validation set: RMSE 1.3031469776333005 MAE 0.9763230624838878 R2 0.46438777062668946


In [12]:
write_params(Dict("u" => u, "a" => a, "λ" => λ));