# Item Collaborative Filtering
* This notebook implements both item-based and user-based collaborative filtering
* Prediction is $\tilde r_{ij} = \dfrac{\sum_{k \in N(j)} w_{kj}^{\lambda_w}r_{ik}^{\lambda_r}}{\sum_{k \in N(j)} w_{kj}^{\lambda_w} + \lambda}$ for item-based collaborative filtering
* $r_{ij}$ is the rating for user $i$ and item $j$
* $w_{kj}$ is the similarity between items $j$ and $k$
* $N(j)$ is the largest $K$ sorted by $w_{kj}$
* $\lambda_w, \lambda_r, \lambda$ are regularization parameters

In [1]:
name = "ItemCollaborativeFiltering";
residual_alphas = nothing;

In [2]:
using NBInclude
@nbinclude("Alpha.ipynb");

In [3]:
# todo improve interface for running residualizations in a circular manner

## Determine the neighborhoods for each user and item

In [4]:
function get_correlation_matrix_outdir(residual_alphas)
    # if the matrix is already stored on disk, return its filepath
    # otherwise, regenerate the matrix and store it to disk
    outdir = "$name/$(hash(residual_alphas))"
    if ispath("../../data/alphas/$outdir")
        return outdir
    end

    training = get_residuals("training", residual_alphas)
    R = sparse(
        training.user,
        training.item,
        training.rating,
        maximum(training.user),
        maximum(training.item),
    )
    S = zeros(maximum(training.item), maximum(training.item))

    norms = map(norm, eachslice(R, dims = 2))
    norms[norms.==0] .= 1 # prevent division by 0
    @tprogress Threads.@threads for i = 1:size(S)[1]
        S[:, i] = vec(R[:, i]' * R) ./ norms ./ norms[i]
    end

    write_params(Dict("S" => S), outdir = outdir)
    outdir
end;

function read_similarity_matrix(outdir)
    read_params(outdir)["S"]
end;

In [5]:
function get_abs_neighborhood(item, S, K)
    weights = S[:, item]
    K = Int(min(K, length(weights) - 1))
    # skip the most correlated item because it's always itself
    order = partialsortperm(abs.(weights), 2:K+1, rev = true)
    order, weights[order]
end;

In [6]:
# each prediction is just the weighted sum of all items in the neighborhood
# we apply regularization terms to decay the weights, ratings, and final prediction
function make_prediction(item, users, R, get_neighborhood, λ)
    if item > size(R)[2]
        # the item was not in our training set; we have no information
        return zeros(length(item))
    end
    items, weights = get_neighborhood(item)
    decay(x, a) = x != 0 ? sign(x) * abs(x)^a : 0
    weights = decay.(weights, λ[1])
    predictions = zeros(eltype(weights), length(users))
    weight_sum = zeros(eltype(weights), length(users))
    for u = 1:length(users)
        for (i, weight) in zip(items, weights)
            if R[users[u], i] != 0
                predictions[u] += weight * decay(R[users[u], i], λ[2])
                weight_sum[u] += abs(weight)
            end
        end
    end
    for u = 1:length(users)
        if weight_sum[u] + λ[3] != 0
            predictions[u] /= (weight_sum[u] + λ[3])
        end
    end
    predictions
end;

In [7]:
function collaborative_filtering(training, inference, get_neighborhood, λ)
    R = sparse(
        training.user,
        training.item,
        training.rating,
        maximum(training.user),
        maximum(training.item),
    )

    preds = zeros(eltype(λ), length(inference.rating), Threads.nthreads())
    @tprogress Threads.@threads for item in collect(Set(inference.item))
        mask = inference.item .== item
        preds[mask, Threads.threadid()] =
            make_prediction(item, inference.user[mask], R, get_neighborhood, λ)
    end

    vec(sum(preds, dims = 2))
end;

In [8]:
Base.@kwdef mutable struct cf_params
    name::Any
    training_residuals::Any
    validation_residuals::Any
    neighborhood_type::Any
    S::Any # the similarity matrix
    K::Any # the neighborhood size
    λ::Vector{Float64} = [1.0, 1.0, 0.0] # [weight_decay, rating_decay, prediction_decay]
end;

to_dict(x::T) where {T} = Dict(string(fn) => getfield(x, fn) for fn ∈ fieldnames(T));

In [9]:
# setup hyperparameters
downcast_to_int(x) = isinteger(x) ? Int(x) : x
item_cf_params = [[
        cf_params(
            name = "ItemCF.$K",
            training_residuals = ["UserItemBiases"],
            validation_residuals = ["UserItemBiases"],
            neighborhood_type = "abs",
            S = get_correlation_matrix_outdir(["UserItemBiases"]),
            K = K,
            λ = [1.0, 1.0, 0.0],
        ) for K in downcast_to_int.([2^4, 2^6, 2^8, 2^10])
    ];
];

## Item based CF

In [10]:
function get_training(residual_alphas)
    get_residuals("training", residual_alphas)
end

function get_validation(residual_alphas)
    get_residuals("validation", residual_alphas)
end

function get_inference()
    training = get_split("training")
    validation = get_split("validation")
    test = get_split("test")
    RatingsDataset(
        user = [training.user; validation.user; test.user],
        item = [training.item; validation.item; test.item],
        rating = fill(
            0.0,
            length(training.rating) + length(validation.item) + length(test.item),
        ),
    )
end;

In [11]:
function optimize_model(param)
    # unpack parameters
    training = get_training(param.training_residuals)
    validation = get_validation(param.validation_residuals)
    item_ratings = sparse(
        training.user,
        training.item,
        training.rating,
        maximum(training.user),
        maximum(training.item),
    )
    S = read_similarity_matrix(param.S)
    K = param.K
    neighborhood_types = Dict("abs" => get_abs_neighborhood)
    neighborhoods = i -> neighborhood_types[param.neighborhood_type](i, S, K)

    # optimize hyperparameters
    function validation_mse(λ)
        pred = collaborative_filtering(training, validation, neighborhoods, λ)
        truth = validation.rating
        β = pred \ truth
        loss = mse(truth, pred .* β)
        @debug "loss: $loss β: $β: λ $λ"
        loss
    end
    res = optimize(
        validation_mse,
        param.λ,
        LBFGS(),
        autodiff = :forward,
        Optim.Options(show_trace = true, extended_trace = true),
    )
    param.λ = Optim.minimizer(res)

    # save predictions
    inference = get_inference()
    preds = collaborative_filtering(training, inference, neighborhoods, param.λ)
    sparse_preds = sparse(inference.user, inference.item, preds)
    function model(users, items, predictions)
        result = zeros(length(users))
        for i = 1:length(users)
            if users[i] <= size(predictions)[1] && items[i] <= size(predictions)[2]
                result[i] = predictions[users[i], items[i]]
            end
        end
        result
    end
    write_predictions(
        (users, items) -> model(users, items, sparse_preds),
        outdir = param.name,
        residual_alphas = param.validation_residuals,
        save_training = true,
    )
    write_params(to_dict(param), outdir = param.name)
end

train_model (generic function with 1 method)

In [None]:
for param in item_cf_params
    optimize_model(param)
end

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20211225 05:22:06 START
[32mProgress: 100%|███████████████████████████| Time: 0:00:37 (37.90 ms/it)[39m


Iter     Function value   Gradient norm 
     0     1.374536e+00     1.299059e+00
 * Current step size: 1.0
 * time: 0.025832176208496094
 * g(x): [-0.03220525465365872, 0.012377906329505, -1.2990591606514246]
 * x: [1.0, 1.0, 0.0]


[32mProgress: 100%|███████████████████████████| Time: 0:00:34 (34.99 ms/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:34 (35.07 ms/it)[39m


     1     1.348660e+00     4.523598e-02
 * Current step size: 0.9836826797690751
 * time: 91.99000716209412
 * g(x): [0.04523597871285908, -0.008139828552845098, 0.0205522035580075]
 * x: [1.0316797512003566, 0.987824067931862, 1.2778619963281588]


[32mProgress: 100%|███████████████████████████| Time: 0:00:34 (34.98 ms/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:34 (34.96 ms/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:34 (34.98 ms/it)[39m


     2     1.342840e+00     1.434660e-02
 * Current step size: 4.81095406464382
 * time: 227.40951704978943
 * g(x): [-0.005648381022438828, 0.0027073510355605274, 0.014346602184757678]
 * x: [0.824967674653698, 1.0249930234157598, 1.189237366177755]


[32mProgress: 100%|███████████████████████████| Time: 0:00:35 (35.29 ms/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:34 (35.12 ms/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:34 (35.02 ms/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:35 (35.29 ms/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:35 (35.42 ms/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:34 (35.15 ms/it)[39m


     3     1.335705e+00     3.814153e-02
 * Current step size: 25.456786463880924
 * time: 498.2650480270386
 * g(x): [-0.030039999531259163, -0.038141526788000944, 0.011451757484899077]
 * x: [0.8639585108179653, 0.8323564938739996, 0.5194913116674877]


[32mProgress: 100%|███████████████████████████| Time: 0:00:35 (35.37 ms/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:35 (35.24 ms/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:35 (35.52 ms/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:36 (36.61 ms/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:35 (35.53 ms/it)[39m


     4     1.333650e+00     4.298692e-02
 * Current step size: 0.23379902411197562
 * time: 725.3465321063995
 * g(x): [-0.04298692464751488, -0.04226986481523711, -0.002479946916896518]
 * x: [0.9050313115228257, 0.8139400977579417, 0.3126595482013398]


[32mProgress: 100%|███████████████████████████| Time: 0:00:35 (35.68 ms/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:34 (35.11 ms/it)[39m


     5     1.331789e+00     4.233810e-02
 * Current step size: 0.1533535093577409
 * time: 816.0511150360107
 * g(x): [-0.04233809638902409, -0.039232538245823956, -0.006078058343758567]
 * x: [0.9391307697697853, 0.828107257071838, 0.27155254249647065]


[32mProgress: 100%|███████████████████████████| Time: 0:00:35 (35.23 ms/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:34 (34.95 ms/it)[39m


     6     1.320148e+00     3.236140e-02
 * Current step size: 0.8929065612428592
 * time: 906.0708870887756
 * g(x): [0.0037837824678845964, 0.016576448581889548, 0.03236139529872118]
 * x: [1.3560084890550865, 1.0808193922747007, 0.1258539989231959]


[32mProgress: 100%|███████████████████████████| Time: 0:00:34 (35.15 ms/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:34 (35.01 ms/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:34 (35.15 ms/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:34 (35.02 ms/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:34 (35.18 ms/it)[39m


     7     1.319674e+00     1.969105e-02
 * Current step size: 0.04655729494801121
 * time: 1131.6486961841583
 * g(x): [-0.0011851478073443575, 0.017046473123095596, 0.019691045455934923]
 * x: [1.370529471242891, 1.0806698432775665, 0.10737452641933203]


[32mProgress: 100%|███████████████████████████| Time: 0:00:34 (35.12 ms/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:34 (35.19 ms/it)[39m


     8     1.319378e+00     1.562488e-02
 * Current step size: 0.1219400640185169
 * time: 1221.8796939849854
 * g(x): [-0.0028045766752620087, 0.015624883243493055, 0.013506751933684452]
 * x: [1.384087909967692, 1.0737011812399015, 0.09810664230217585]


[32mProgress: 100%|███████████████████████████| Time: 0:00:34 (35.00 ms/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:34 (35.13 ms/it)[39m


     9     1.318838e+00     1.083946e-02
 * Current step size: 0.28933914755195905
 * time: 1312.036190032959
 * g(x): [-0.004861990049671916, 0.010839463952676319, 0.0010179068282149604]
 * x: [1.4169193410624055, 1.0520574446833748, 0.08175461857566156]


[32mProgress: 100%|███████████████████████████| Time: 0:00:34 (35.12 ms/it)[39m
