# User Item Biases With Regularization
* Prediction for user $i$ and item $j$ is $\tilde r_{ij} = u_i + a_j$
* Loss function is $L = \sum_{\Omega}(r_{ij} - u_i - a_j)^2 + \lambda_u \sum_i (u_i - \bar u) ^2 + \lambda_a \sum_j (a_j - \bar a)^2 $
* $\bar u$ is the mean of $u_i$ and $\bar a$ is the mean of $a_j$ 
* $\Omega$ is the set of oberved pairs $(i, j)$
* $r_{ij}$ is the rating for user $i$ and item $j$

In [1]:
name = "UserItemBiases";
residual_alphas = [];

In [2]:
using NBInclude
@nbinclude("Alpha.ipynb");

# Alternating Least Squares Algorithm
* $u_i = \dfrac{\sum_{j \in \Omega_i}(r_{ij} - a_j) + \bar u \lambda_u}{|\Omega_i| + \lambda_u}$
* $\Omega$ is the set of (user, item) pairs that we have ratings for
* $\Omega_i$ is subset of $\Omega$ for which the user is the $i$-th user

In [3]:
function update_users!(users, items, ratings, u, a, λ_u)
    n_users = length(u)
    n_rows = length(users)
    residuals = zeros(eltype(u), n_users)
    Ω = zeros(eltype(u), n_users)

    for row = 1:n_rows
        i = users[row]
        j = items[row]
        r = ratings[row]
        residuals[i] += r - a[j]
        Ω[i] += 1
    end

    u_mean = mean(u)
    for i = 1:n_users
        u[i] = (residuals[i] + u_mean * λ_u) / (Ω[i] + λ_u)
    end
end

In [4]:
function train_model(training, λ_u, λ_a, ϵ = 1e-6)
    users = training.username
    items = training.anime_id
    ratings = training.my_score
    u = zeros(eltype(λ_u), maximum(users))
    a = zeros(eltype(λ_a), maximum(items))

    converged = false
    while !converged
        old_u = copy(u)
        old_a = copy(a)
        update_users!(users, items, ratings, u, a, λ_u)
        update_users!(items, users, ratings, a, u, λ_a)
        converged = (maximum(abs.(u - old_u)) < ϵ) && (maximum(abs.(a - old_a)) < ϵ)
    end
    return u, a
end;

In [5]:
function make_prediction(users, items, u, a)
    r = zeros(eltype(u), length(users))
    u_mean = mean(u)
    a_mean = mean(a)
    for i = 1:length(r)
        if users[i] > length(u)
            r[i] += mean(u)
        else
            r[i] += u[users[i]]
        end
        if items[i] > length(a)
            r[i] += mean(a)
        else
            r[i] += a[items[i]]
        end
    end
    return r
end;

## Training

In [6]:
function validation_mse(λ)
    λ = exp.(λ) # ensure λ is nonnegative
    u, a = train_model(training, λ[1], λ[2])
    pred_score = make_prediction(validation.username, validation.anime_id, u, a)
    return mse(validation.my_score, pred_score)
end;

In [7]:
# Find the best regularization hyperparameters
res = optimize(
    validation_mse,
    fill(0.0, 2), # intial guess
    LBFGS(),
    autodiff = :forward,
    Optim.Options(show_trace = true),
);
λ = exp.(Optim.minimizer(res));

Iter     Function value   Gradient norm 
     0     1.698360e+00     3.375168e-04
 * time: 0.026805877685546875
     1     1.698217e+00     1.233252e-04
 * time: 132.24958992004395
     2     1.698206e+00     1.844736e-05
 * time: 168.06685495376587
     3     1.698205e+00     1.626061e-05
 * time: 222.1215488910675
     4     1.698194e+00     3.899982e-05
 * time: 319.2639989852905
     5     1.698193e+00     3.425791e-05
 * time: 356.7244050502777
     6     1.698192e+00     3.448332e-06
 * time: 394.64650988578796
     7     1.698192e+00     2.983336e-07
 * time: 432.7493441104889
     8     1.698192e+00     2.206703e-09
 * time: 470.34413290023804


2-element Vector{Float64}:
 2.061994938223344
 3.260470990089654

In [8]:
print("The optimal [λ_u, λ_a] is ", λ);

The optimal [λ_u, λ_a] is [2.061994938223344, 3.260470990089654]

In [9]:
u, a = train_model(training, λ...);

## Inference

In [10]:
model(users, items) = make_prediction(users, items, u, a);

In [11]:
write_predictions(model, save_training = true);

training set: RMSE 1.2937930865100624 MAE 0.9695274371978653 R2 0.47199769672473824
validation set: RMSE 1.3031469776333005 MAE 0.9763230624838878 R2 0.46438777062668946


In [12]:
write_params(Dict("u" => u, "a" => a, "λ" => λ));