# Matrix Factorization
* Prediction is $\tilde R = UA^T$ 
* Loss fuction is $L = \lVert (R - \tilde R)^\Omega \rVert _2^2 + \lambda_u \lVert U \rVert _2^2 + \lambda_a \lVert A \rVert _2^2$
* $\Omega$ is the set of oberved pairs $(i, j)$
* $M^\Omega$ is the projection of $M$ onto $\Omega$ for any matrix $M$
* $U$ is an $m x k$ matrix, $A$ is an $n x k$ matrix and $R$ is the $m x n$ ratings matrix

In [1]:
name = "MatrixFactorization";
residual_alphas = ["UserItemBiases"];

In [2]:
using LinearAlgebra
using SparseArrays

In [3]:
using NBInclude
@nbinclude("Alpha.ipynb");

# Alternating Least Squares Algorithm
* $u_{ik} = \dfrac{\sum_{j \in \Omega_i}(r_{ij} - \tilde r_{ij} + u_{ik}a_{kj})}{\sum_{j \in \Omega_i} a_j^2 + \lambda_u}$
* $\Omega$ is the set of (user, item) pairs that we have ratings for
* $\Omega_i$ is subset of $\Omega$ for which the user is the $i$-th user

In [4]:
function make_prediction(usernames, anime_ids, U, A)
    r = zeros(eltype(U), length(usernames))
    for i = 1:length(r)
        if (usernames[i] <= size(U)[1]) && (anime_ids[i] <= size(A)[1])
            r[i] = dot(U[usernames[i], :], A[anime_ids[i], :])
        end
    end
    return r
end;

In [5]:
function ridge_regression(X, y, λ)
    return (Matrix(X'X) + λ * I(size(X)[2])) \ Vector(X'y)
end;

In [6]:
function sparse_csr(i, j, v, m, n)
    return sparse(j, i, v, n, m)'
end;

In [7]:
function sparse_subset(A, rows)
    # returns a sparse matrix B such that
    # 1) size(B) == size(A)
    # 2) B[rows, :] = A[rows, :]
    # 3) B[i, :] = 0 if i not in rows
    K = size(A)[2]
    nzval = vec(A[rows, :])
    rowval = repeat(rows, K)
    colptr = [1 + (x - 1) * length(rows) for x = 1:K+1]
    return SparseMatrixCSC(size(A)..., colptr, rowval, nzval)
end;

In [8]:
function update_users!(users, items, ratings, U, A, λ_u)
    R = sparse_csr(users, items, ratings, size(U)[1], size(A)[1])
    @tprogress Threads.@threads for i = 1:size(U)[1]
        X = sparse_subset(A, rowvals(R[i, :]))
        y = R[i, :]
        U[i, :] = ridge_regression(X, y, λ_u)
    end
end;

In [9]:
function early_stopping(model, patience, min_improvement_pct, params)
    training_loss = rmse(training.my_score, model(training.username, training.anime_id))
    validation_loss =
        rmse(validation.my_score, model(validation.username, validation.anime_id))
    @debug "Training loss, Validation loss: " * repr((training_loss, validation_loss))

    if validation_loss < params["loss"] * (1 - min_improvement_pct)
        params["loss"] = validation_loss
        params["iters_without_improvement"] = 0
    else
        params["iters_without_improvement"] += 1
    end
    return params["iters_without_improvement"] >= patience
end

early_stopping (generic function with 1 method)

In [10]:
function train_model(training, λ_u, λ_a, K; ϵ = 1e-6, max_iters = 10)
    @debug "train_model ($λ_u, $λ_a)"
    users, items, ratings = training.username, training.anime_id, training.my_score
    U = zeros(eltype(λ_u), maximum(users), K) + randn(maximum(users), K)
    A = zeros(eltype(λ_a), maximum(items), K) + randn(maximum(items), K)
    model(users, items) = make_prediction(users, items, U, A)

    early_stopping_params = Dict("loss" => Inf, "iters_without_improvement" => 0)
    for epoch = 1:max_iters
        update_users!(users, items, ratings, U, A, λ_u)
        update_users!(items, users, ratings, A, U, λ_a)
        if early_stopping(model, 2, 0.0001, early_stopping_params)
            break
        end
    end
    return U, A
end;

## Training

In [11]:
function validation_mse(λ, K, max_iters)
    λ = exp.(λ) # ensure λ is nonnegative
    U, A = train_model(training, λ[1], λ[2], K, max_iters = max_iters)
    pred_score = make_prediction(validation.username, validation.anime_id, U, A)
    return mse(validation.my_score, pred_score)
end;

In [12]:
K = 40;
max_iters = 100;

In [None]:
# Find the best regularization hyperparameters
res = optimize(
    λ -> validation_mse(λ, K, max_iters),
    fill(1., 2),  # intial guess
    LBFGS(),
    autodiff = :forward,
    Optim.Options(show_trace = true, extended_trace = true),
)
λ = exp.(Optim.minimizer(res));

[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39mtrain_model (Dual{ForwardDiff.Tag{var"#8#9", Float64}}(2.718281828459045,2.718281828459045,0.0), Dual{ForwardDiff.Tag{var"#8#9", Float64}}(2.718281828459045,0.0,2.718281828459045))
[32mProgress: 100%|███████████████████████████| Time: 0:03:14 ( 0.43 ms/it)[39m39m
[32mProgress: 100%|███████████████████████████| Time: 0:06:13 (22.01 ms/it)[39m
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39mTraining loss, Validation loss: (Dual{ForwardDiff.Tag{var"#8#9", Float64}}(1.1099903931974153,-0.00030520966842951825,0.0021053025727155276), Dual{ForwardDiff.Tag{var"#8#9", Float64}}(1.4596772152391773,-0.02920858003875626,-0.0065064437229117415))
[32mProgress: 100%|███████████████████████████| Time: 0:03:23 ( 0.45 ms/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:06:27 (22.84 ms/it)[39m
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39mTraining loss, Validation loss: (Dual{ForwardDiff.Tag{var"#8#9", Float64}}

In [None]:
@info "The optimal [λ_u, λ_a] is $(λ)";

In [None]:
U, A = train_model(training, λ..., K);

## Inference

In [None]:
model(users, items) = make_prediction(users, items, U, A);

In [None]:
write_predictions(model);

In [None]:
write_params(Dict("U" => U, "A" => A, "λ" => λ);