# Matrix Factorization
* Prediction is $\tilde R = UA^T$ 
* Loss fuction is $L = \lVert (R - \tilde R)^\Omega \rVert _2^2 + \lambda_u \lVert U \rVert _2^2 + \lambda_a \lVert A \rVert _2^2$
* $\Omega$ is the set of oberved pairs $(i, j)$
* $M^\Omega$ is the projection of $M$ onto $\Omega$ for any matrix $M$
* $U$ is an $m x k$ matrix, $A$ is an $n x k$ matrix and $R$ is the $m x n$ ratings matrix

In [1]:
name = "MatrixFactorization";
residual_alphas = ["UserItemBiases"];

In [2]:
using NBInclude
@nbinclude("Alpha.ipynb");

In [3]:
using LinearAlgebra
using SparseArrays

# Alternating Least Squares Algorithm
* $u_{ik} = \dfrac{\sum_{j \in \Omega_i}(r_{ij} - \tilde r_{ij} + u_{ik}a_{kj})}{\sum_{j \in \Omega_i} a_j^2 + \lambda_u}$
* $\Omega$ is the set of (user, item) pairs that we have ratings for
* $\Omega_i$ is subset of $\Omega$ for which the user is the $i$-th user

In [4]:
function make_prediction(usernames, anime_ids, U, A)
    r = zeros(eltype(U), length(usernames))
    @showprogress for i = 1:length(r)
        if (usernames[i] <= size(U)[1]) && (anime_ids[i] <= size(A)[1])
            r[i] = dot(U[usernames[i], :], A[anime_ids[i], :])
        end
    end
    return r
end

make_prediction (generic function with 1 method)

In [5]:
function ridge_regression(M, b, λ)
    return (Matrix(M'M) + λ * I(size(M)[2])) \ Vector(M'b)
end;

In [6]:
function sparse_csr(i, j, v, m, n)
    return sparse(j, i, v, n, m)'
end;

In [7]:
function sparse_subset(A, rows)
    # returns a sparse matrix B such that
    # 1) size(B) == size(A)
    # 2) B[rows, :] = A[rows, :]
    # 3) B[i, :] = 0 if i not in rows
    K = size(A)[2]
    nzval = vec(A[rows, :])
    rowval = repeat(rows, K)
    colptr = [1 + (x - 1) * length(rows) for x = 1:K+1]
    return SparseMatrixCSC(size(A)..., colptr, rowval, nzval)
end;

In [8]:
function update_users!(users, items, ratings, U, A, λ_u)
    R = sparse_csr(users, items, ratings, size(U)[1], size(A)[1])
    @tprogress Threads.@threads for i = 1:size(U)[1]
        M = sparse_subset(A, rowvals(R[i, :]))
        b = R[i, :]
        U[i, :] = ridge_regression(M, b, λ_u)
    end
end;

In [9]:
function train_model(training, λ_u, λ_a, K, ϵ = 1e-6)
    users = training.username
    items = training.anime_id
    ratings = training.my_score
    U = zeros(eltype(λ_u), maximum(users), K)
    A = zeros(eltype(λ_a), maximum(items), K)
    U .+= rand(size(U)...) .- 0.5 # TODO gaussianize
    A .+= rand(size(A)...) .- 0.5

    function print_progress()
        @fastmath println(rmse(ratings, make_prediction(users, items, U, A)))
        @fastmath println(
            rmse(
                validation.my_score,
                make_prediction(validation.username, validation.anime_id, U, A),
            ),
        )
    end

    print_progress()
    converged = false
    while !converged
        old_U = copy(U)
        old_A = copy(A)
        update_users!(users, items, ratings, U, A, λ_u)

        print_progress()

        update_users!(items, users, ratings, A, U, λ_a)

        print_progress()

        converged = (maximum(abs.(U - old_U)) < ϵ) && (maximum(abs.(A - old_A)) < ϵ)
    end
    return U, A
end;

In [10]:
@time U, A = train_model(training, 10., 10., 5)

[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:18[39m


1.3070890397969488


[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:02[39m


1.316305870551112


[32mProgress: 100%|███████████████████████████| Time: 0:11:34 ( 1.53 ms/it)[39m/it)[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:18[39m


1.2803925757238905


[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:02[39m


1.3083311024265847


[32mProgress: 100%|███████████████████████████| Time: 0:14:48 (52.34 ms/it)[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:17[39m


1.2704607112259834


[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:02[39m


1.3153727875126517


[32mProgress:  27%|███████▍                   |  ETA: 0:08:23 ( 1.53 ms/it)[39m

LoadError: TaskFailedException

[91m    nested task error: [39mInterruptException:
    Stacktrace:
      [1] [0m[1m_growat![22m
    [90m    @ [39m[90m./[39m[90;4marray.jl:890[0m[90m [inlined][39m
      [2] [0m[1minsert![22m
    [90m    @ [39m[90m./[39m[90;4marray.jl:1320[0m[90m [inlined][39m
      [3] [0m[1msetindex![22m[0m[1m([22m[90mx[39m::[0mSparseVector[90m{Float64, Int64}[39m, [90mv[39m::[0mFloat64, [90mi[39m::[0mInt64[0m[1m)[22m
    [90m    @ [39m[35mSparseArrays[39m [90m/buildworker/worker/package_linuxaarch64/build/usr/share/julia/stdlib/v1.6/SparseArrays/src/[39m[90;4msparsevector.jl:323[0m
      [4] [0m[1mmacro expansion[22m
    [90m    @ [39m[90m./[39m[90;4mmultidimensional.jl:860[0m[90m [inlined][39m
      [5] [0m[1mmacro expansion[22m
    [90m    @ [39m[90m./[39m[90;4mcartesian.jl:64[0m[90m [inlined][39m
      [6] [0m[1m_unsafe_getindex![22m
    [90m    @ [39m[90m./[39m[90;4mmultidimensional.jl:855[0m[90m [inlined][39m
      [7] [0m[1m_unsafe_getindex[22m[0m[1m([22m::[0mIndexCartesian, ::[0mAdjoint[90m{Float64, SparseMatrixCSC{Float64, Int64}}[39m, ::[0mInt64, ::[0mBase.Slice[90m{Base.OneTo{Int64}}[39m[0m[1m)[22m
    [90m    @ [39m[90mBase[39m [90m./[39m[90;4mmultidimensional.jl:846[0m
      [8] [0m[1m_getindex[22m
    [90m    @ [39m[90m./[39m[90;4mmultidimensional.jl:832[0m[90m [inlined][39m
      [9] [0m[1mgetindex[22m
    [90m    @ [39m[90m./[39m[90;4mabstractarray.jl:1170[0m[90m [inlined][39m
     [10] [0m[1mmacro expansion[22m
    [90m    @ [39m[90m./[39m[90;4mIn[8]:5[0m[90m [inlined][39m
     [11] [0m[1m(::var"#54#threadsfor_fun#4"{Matrix{Float64}, Matrix{Float64}, Float64, Adjoint{Float64, SparseMatrixCSC{Float64, Int64}}, UnitRange{Int64}})[22m[0m[1m([22m[90monethread[39m::[0mBool[0m[1m)[22m
    [90m    @ [39m[36mMain[39m [90m./[39m[90;4mthreadingconstructs.jl:81[0m
     [12] [0m[1m(::var"#54#threadsfor_fun#4"{Matrix{Float64}, Matrix{Float64}, Float64, Adjoint{Float64, SparseMatrixCSC{Float64, Int64}}, UnitRange{Int64}})[22m[0m[1m([22m[0m[1m)[22m
    [90m    @ [39m[36mMain[39m [90m./[39m[90;4mthreadingconstructs.jl:48[0m

In [11]:
model(users, items) = make_prediction(users, items, U, A);

In [12]:
write_predictions(model);

LoadError: UndefVarError: U not defined

In [13]:
write_params(Dict("U" => U, "A" => A, "λ" => λ);

ErrorException: Parsing error for input occurred on line 1, offset: 46

## Training

In [14]:
K = 10;
function validation_mse(λ)
    λ = exp.(λ) # ensure λ is nonnegative
    U, A = train_model(training, λ[1], λ[2], K)
    pred_score = make_prediction(validation.username, validation.anime_id, U, A)
    return Metrics.mse(validation.my_score, pred_score)
end;

In [None]:
# Find the best regularization hyperparameters
res = optimize(
    validation_mse,
    fill(0.0, 2),
    LBFGS(), # intial guess
    autodiff=:forward,
    Optim.Options(show_trace=true),
);
λ = exp.(Optim.minimizer(res))

[32mProgress:  88%|███████████████████████████████████▉     |  ETA: 0:00:04[39m

In [None]:
print("The optimal [λ_u, λ_a] is ", λ);

In [None]:
U, A = train_model(training, λ..., K);

## Inference

In [None]:
model(users, items) = make_prediction(users, items, U, A);

In [None]:
write_predictions(model);

In [None]:
write_params(Dict("U" => U, "A" => A, "λ" => λ);