# User Item Biases With Regularization
* Prediction for user $i$ and item $j$ is $\tilde r_{ij} = u_i + a_j$
* Loss function is $L = \sum_{ij}(r_{ij} - u_i - a_j)^2 + \lambda_u \sum_i (u_i - \bar u) ^2 + \lambda_a \sum_j (a_j - \bar a)^2 $
* $\bar u$ is the mean of $u_i$ and $\bar a$ is the mean of $a_j$ 

In [1]:
using CSV
using DataFrames
using FileIO
using JLD2
using JupyterFormatter
using Optim
using Statistics
import Metrics

In [2]:
enable_autoformat();

In [3]:
function get_split(split)
    @assert split in ["training", "validation"]
    file = "../../data/splits/$(split).csv"
    df = DataFrame(CSV.File(file))
    df.username .+= 1 # julia is 1 indexed
    df.anime_id .+= 1
    df.my_score = float(df.my_score)
    return df
end;

In [4]:
function write_prediction(df, split)
    @assert split in ["validation"]
    outdir = "../../data/alphas/$name"
    if !isdir(outdir)
        mkpath(outdir)
    end
    df = copy(df)
    df.username .-= 1
    df.anime_id .-= 1
    CSV.write("$(outdir)/$(split).csv", df)
end;

In [5]:
function write_model(params)
    outdir = "../../data/alphas/$name"
    if !isdir(outdir)
        mkpath(outdir)
    end
    save("$(outdir)/model.jld2", params)
end;

In [6]:
function evaluate(truth, pred)
    print("RMSE ", sqrt(Metrics.mse(pred, truth)))
    print(" MAE ", Metrics.mae(pred, truth))
    print(" R2 ", Metrics.r2_score(pred, truth))
end;

In [7]:
name = "UserItemBiases";

In [8]:
training = get_split("training");

In [9]:
validation = get_split("validation");

# Alternating Least Squares Algorithm
* $u_i = \dfrac{\sum_{\Omega_i}(r_{ij} - a_j) + \bar u \lambda_u}{|\Omega_i| + \lambda_u}$
* $\Omega$ is the set of (user, item) pairs that we have ratings for
* $\Omega_i$ is subset of $\Omega$ for which the user is the $i$-th user

In [10]:
function als_update_user_biases!(users, items, ratings, λ_u, u, a)
    n_users = length(u)
    n_rows = length(users)
    residuals = zeros(eltype(u), n_users)
    Ω = zeros(eltype(u), n_users)

    for row = 1:n_rows
        i = users[row]
        j = items[row]
        r = ratings[row]
        residuals[i] += r - a[j]
        Ω[i] += 1
    end

    u_mean = mean(u)
    for i = 1:n_users
        if Ω[i] + λ_u == 0
            u[i] = u_mean
        else
            u[i] = (residuals[i] + u_mean * λ_u) / (Ω[i] + λ_u)
        end
    end
end

function als_update_item_biases!(users, items, ratings, λ_a, u, a)
    als_update_user_biases!(items, users, ratings, λ_a, a, u)
end;

In [11]:
function compute_biases(training, λ_u, λ_a, ϵ = 1e-6)
    users = training.username
    items = training.anime_id
    ratings = training.my_score
    u = zeros(eltype(λ_u), maximum(users))
    a = zeros(eltype(λ_a), maximum(items))

    converged = false
    while !converged
        old_u = copy(u)
        old_a = copy(a)
        als_update_user_biases!(users, items, ratings, λ_u, u, a)
        als_update_item_biases!(users, items, ratings, λ_a, u, a)

        converged = (maximum(abs.(u - old_u)) < ϵ) & (maximum(abs.(a - old_a)) < ϵ)
        if converged
            break
        end
    end
    return u, a
end;

In [12]:
function make_prediction(users, items, u, a)
    r = zeros(eltype(u), length(users))
    u_mean = mean(u)
    a_mean = mean(a)
    for i = 1:length(r)
        if users[i] > length(u)
            r[i] += mean(u)
        else
            r[i] += u[users[i]]
        end
        if items[i] > length(a)
            r[i] += mean(a)
        else
            r[i] += a[items[i]]
        end
    end
    return r
end;

In [13]:
function validation_mse(λ)
    u, a = compute_biases(training, λ[1], λ[2])
    pred_score = make_prediction(validation.username, validation.anime_id, u, a)
    return Metrics.mse(validation.my_score, pred_score)
end;

## Training

In [None]:
# Find the best regularization hyperparameters
res = optimize(
    validation_mse,
    fill(0.0, 2),
    fill(Inf, 2),
    fill(1.0, 2),
    Fminbox(LBFGS()),
    autodiff = :forward,
    Optim.Options(show_trace = true),
);

Fminbox
-------
Initial mu = 1.7677e-7

Fminbox iteration 1
-------------------
Calling inner optimizer with mu = 1.7677e-7

(numbers below include barrier contribution)
Iter     Function value   Gradient norm 
     0     1.698360e+00     3.375168e-04
 * time: 0.02632308006286621
     1     1.698206e+00     1.557593e-05
 * time: 150.08646202087402
     2     1.698206e+00     1.558135e-05
 * time: 203.99584197998047
     3     1.698206e+00     1.518728e-05
 * time: 329.4317409992218


In [None]:
print("The optimal [λ_u, λ_a] is ", Optim.minimizer(res));

In [None]:
u, a = compute_biases(training, Optim.minimizer(res)...);
model(users, items) = make_prediction(users, items, u, a);

## Inference

In [24]:
training_pred_score = model(training.username, training.anime_id);
evaluate(training.my_score, training_pred_score);

RMSE 1.2937930876978652 MAE 0.9695274380357186 R2 0.4719976957552441

In [25]:
val_pred_score = model(validation.username, validation.anime_id);
evaluate(validation.my_score, val_pred_score);

RMSE 1.303146977633299 MAE 0.9763230625012824 R2 0.4643877706266908

In [None]:
# write predictions to disk
val_pred = copy(validation);
val_pred.my_score = val_pred_score;
write_prediction(val_pred, "validation");

In [23]:
# write model to disk
write_model(Dict("u" => u, "a" => a, "λ" => Optim.minimizer(res), "model" => model));