# User Item Biases With Regularization
* Prediction for user $i$ and item $j$ is $\tilde r_{ij} = u_i + a_j$
* Loss function is $L = \sum_{\Omega}(r_{ij} - u_i - a_j)^2 + \lambda_u \sum_i (u_i - \bar u) ^2 + \lambda_a \sum_j (a_j - \bar a)^2 $
* $\bar u$ is the mean of $u_i$ and $\bar a$ is the mean of $a_j$ 
* $\Omega$ is the set of oberved pairs $(i, j)$
* $r_{ij}$ is the rating for user $i$ and item $j$

In [1]:
name = "UserItemBiases";
residual_alphas = [];

In [2]:
using NBInclude
@nbinclude("Alpha.ipynb");

In [3]:
const training = get_residuals("training", residual_alphas)
const validation = get_residuals("validation", residual_alphas);

## Alternating Least Squares Algorithm
* $u_i = \dfrac{\sum_{j \in \Omega_i}(r_{ij} - a_j) + \bar u \lambda_u}{|\Omega_i| + \lambda_u} = \dfrac{\rho_i + \bar u \lambda_u}{|\Omega_i| + \lambda_u}$
* $\Omega$ is the set of (user, item) pairs that we have ratings for
* $\Omega_i$ is subset of $\Omega$ for which the user is the $i$-th user

In [4]:
function get_residuals!(users, items, ratings, u, a, ρ, Ω)
    for row = 1:length(users)
        i = users[row]
        j = items[row]
        r = ratings[row]
        ρ[i] += r - a[j]
        Ω[i] += 1
    end
    ρ, Ω
end

# todo move to utils
function thread_range(n)
    tid = Threads.threadid()
    nt = Threads.nthreads()
    d, r = divrem(n, nt)
    from = (tid - 1) * d + min(r, tid - 1) + 1
    to = from + d - 1 + (tid ≤ r ? 1 : 0)
    from:to
end

function update_users!(users, items, ratings, u, a, λ_u, ρ, Ω)
    Threads.@threads for t = 1:Threads.nthreads()
        range = thread_range(length(ratings))
        ρ[:, Threads.threadid()] .= 0
        Ω[:, Threads.threadid()] .= 0
        @views get_residuals!(
            users[range],
            items[range],
            ratings[range],
            u,
            a,
            ρ[:, Threads.threadid()],
            Ω[:, Threads.threadid()],
        )
    end
    ρ = sum(ρ, dims = 2)
    Ω = sum(Ω, dims = 2)

    μ = mean(u)
    Threads.@threads for i = 1:length(u)
        u[i] = (ρ[i] + μ * λ_u) / (Ω[i] + λ_u)
    end
end;

In [5]:
function train_model(training, λ_u, λ_a, stop_criteria)
    @debug "training model with parameters [$λ_u, $λ_a]"
    users, items, ratings = training.user, training.item, training.rating
    u = zeros(eltype(λ_u), maximum(users))
    a = zeros(eltype(λ_a), maximum(items))

    ρ_u = zeros(eltype(u), length(u), Threads.nthreads())
    Ω_u = zeros(eltype(u), length(u), Threads.nthreads())
    ρ_a = zeros(eltype(a), length(a), Threads.nthreads())
    Ω_a = zeros(eltype(a), length(a), Threads.nthreads())

    while !stop!(stop_criteria, [u, a])
        update_users!(users, items, ratings, u, a, λ_u, ρ_u, Ω_u)
        update_users!(items, users, ratings, a, u, λ_a, ρ_a, Ω_a)
    end
    u, a
end;

In [6]:
function make_prediction(users, items, u, a)
    r = zeros(eltype(u), length(users))
    u_mean = mean(u)
    a_mean = mean(a)
    for i = 1:length(r)
        if users[i] > length(u)
            r[i] += mean(u)
        else
            r[i] += u[users[i]]
        end
        if items[i] > length(a)
            r[i] += mean(a)
        else
            r[i] += a[items[i]]
        end
    end
    r
end;

## Training

In [7]:
function validation_mse(λ)
    λ = exp.(λ) # ensure λ is nonnegative
    stop_criteria = convergence_stopper(1e-9)
    u, a = train_model(training, λ[1], λ[2], stop_criteria)
    pred_score = make_prediction(validation.user, validation.item, u, a)
    mse(validation.rating, pred_score)
end;

In [8]:
# Find the best regularization hyperparameters
res = optimize(
    validation_mse,
    fill(0.0f0, 2), # intial guess
    LBFGS(),
    autodiff = :forward,
    Optim.Options(show_trace = true, extended_trace = true),
);
λ = exp.(Optim.minimizer(res));

[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220129 04:46:59 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0,1.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0,0.0,1.0)]


Iter     Function value   Gradient norm 
     0     1.743445e+00     1.340679e-03
 * Current step size: 1.0
 * time: 0.026192903518676758
 * g(x): Float32[-0.0013406794, 5.567348f-7]
 * x: Float32[0.0, 0.0]


[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220129 04:47:35 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0013416,1.0013416,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.99999946,0.0,0.99999946)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220129 04:48:37 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0067259,1.0067259,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9999972,0.0,0.9999972)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220129 04:49:10 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.034085,1.034085,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.99998605,0.0,0.99998605)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220129 04:49:30 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.1824456,1.1824456,0.0), Dual{Forwar

     1     1.742781e+00     8.376977e-05
 * Current step size: 572.716
 * time: 205.81967401504517
 * g(x): Float32[-8.3769766f-5, 5.7841845f-7]
 * x: Float32[0.7678285, -0.0003188509]


[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220129 04:50:59 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(2.268236,2.268236,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9993059,0.0,0.9993059)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220129 04:51:27 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(2.2129505,2.2129505,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.99948686,0.0,0.99948686)]


     2     1.742780e+00     1.640858e-06
 * Current step size: 0.5178059
 * time: 262.6588830947876
 * g(x): Float32[-1.6408579f-6, 5.789553f-7]
 * x: Float32[0.79432666, -0.00051327737]


[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220129 04:51:56 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(2.2141254,2.2141254,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.99929625,0.0,0.99929625)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220129 04:52:59 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(2.218831,2.218831,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.99853426,0.0,0.99853426)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220129 04:53:24 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(2.214244,2.214244,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.99927706,0.0,0.99927706)]


     3     1.742780e+00     5.787520e-07
 * Current step size: 1.1008235
 * time: 433.18425393104553
 * g(x): Float32[2.0743624f-7, 5.7875195f-7]
 * x: Float32[0.79491097, -0.00072321505]


[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220129 04:54:46 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(2.2142441,2.2142441,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.99907106,0.0,0.99907106)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220129 04:56:10 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(2.2142453,2.2142453,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9982475,0.0,0.9982475)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220129 04:57:41 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(2.2142515,2.2142515,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9941399,0.0,0.9941399)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220129 04:58:58 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(2.214282,2.214282,0.0), Dual{ForwardD

     4     1.742779e+00     6.323789e-06
 * Current step size: 13986.611
 * time: 918.0270440578461
 * g(x): Float32[6.3237894f-6, -2.2051385f-8]
 * x: Float32[0.79684776, -2.8842545]


[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220129 05:02:51 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(2.2141361,2.2141361,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.062147424,0.0,0.062147424)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220129 05:03:22 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(2.1966217,2.1966217,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.09496831,0.0,0.09496831)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220129 05:04:16 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(2.2132246,2.2132246,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.06352882,0.0,0.06352882)]


     5     1.742779e+00     1.278600e-06
 * Current step size: 1.2073839
 * time: 1044.9718511104584
 * g(x): Float32[-1.2786004f-6, -2.3876375f-8]
 * x: Float32[0.7944506, -2.7562616]


[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220129 05:04:58 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(2.2131543,2.2131543,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.07289817,0.0,0.07289817)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220129 05:05:58 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(2.2128723,2.2128723,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.12638637,0.0,0.12638637)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220129 05:06:20 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(2.211464,2.211464,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.9797858,0.0,1.9797858)]
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220129 05:06:36 training model with parameters [Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(2.21285,2.21285,0.0), Dual{ForwardDif

     6     1.742779e+00     2.271039e-06
 * Current step size: 9.906665
 * time: 1281.7700810432434
 * g(x): Float32[-2.2710387f-6, -6.624014f-9]
 * x: Float32[0.79413515, -1.3934016]


In [9]:
@info "The optimal [λ_u, λ_a] is $λ, found in " *
      repr(Optim.f_calls(res)) *
      " function calls"

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220129 05:08:55 The optimal [λ_u, λ_a] is Float32[2.2125266, 0.24822949], found in 31 function calls


In [10]:
stop_criteria = convergence_stopper(1e-9)
u, a = train_model(training, λ..., stop_criteria);

[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220129 05:08:55 training model with parameters [2.2125266, 0.24822949]


## Inference

In [11]:
model(users, items) = make_prediction(users, items, u, a);

In [12]:
write_predictions(model);

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220129 05:09:17 training set: RMSE 1.2835 MAE 0.9599677 R2 0.46298516
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220129 05:09:17 validation set: RMSE 1.3201425 MAE 0.9861401 R2 0.4090078


In [13]:
write_params(Dict("u" => u, "a" => a, "λ" => λ));