# Neighborhood Collaborative Filtering
* This notebook implements both item-based and user-based collaborative filtering
* Prediction is $\tilde r_{ij} = \sum_{k \in N(j)} r_{ik}w_{kj}$ for item-based collaborative filtering
* Prediction is $\tilde r_{ij} = \sum_{k \in N(i)} w_{ik}r_{kj}$ for user-based collaborative filtering
* $r_{ij}$ is the rating for user $i$ and item $j$
* $w_{kj}$ is the cosine similarity between items $j$ and $k$
* $N(j)$ is the largest $K$ items $k$ sorted by $|w_{kj}|$

In [1]:
name = "NeighborhoodCollaborativeFiltering";
residual_alphas = [];

In [2]:
using LinearAlgebra
using Memoize
using SparseArrays
# TODO upstream imports

In [3]:
using NBInclude
@nbinclude("Alpha.ipynb");

## Determine the neighborhoods for each user and item

In [4]:
@memoize function get_norms(R)
    norms = map(norm, eachslice(R, dims = 2))
    norms[norms.==0] .= 1 # prevent division by 0
    norms
end;

In [5]:
function get_neighborhood(order, weights, K)
    order = order[2:end] # the most correlated item is always itself
    order[1:K], weights[order[1:K]]
end;

In [6]:
function get_correlation_neighborhood(item, R, K)
    norms = get_norms(R)
    weights = vec(R[:, item]' * R) ./ norms ./ norms[item]
    order = sortperm(abs.(weights), rev = true)
    get_neighborhood(order, weights, K)
end;

function get_correlation_neighborhood(item, R, K, λ)
    norms = get_norms(R)
    weights = vec(R[:, item]' * R) ./ norms ./ norms[item]
    weights = exp.(abs.(weights) .* λ) .* sign.(weights)
    order = sortperm(abs.(weights), rev = true)
    get_neighborhood(order, weights, K)
end;

In [7]:
function get_embedding_neighborhood(item, kernel, K, λ)
    dists = map(norm, eachslice(kernel .- kernel[:, item], dims = 2))
    weights = exp.(-dists .* λ)
    order = sortperm(dists)
    get_neighborhood(order, weights, K)
end;

In [8]:
# U = read_params("MatrixFactorization")["U"]
# A = read_params("MatrixFactorization")["A"]
# function impute_mf(user, item)
#     if user <= size(U)[1] && item <= size(A)[1]
#         return dot(U[user, :], A[item, :])
#     end
#     0.0
# end;

In [9]:
function make_prediction(item, users, R, get_neighborhood, impute, λ)
    items, weights = get_neighborhood(item)
    predictions = zeros(eltype(weights), length(users))
    weight_sum = zeros(eltype(weights), length(users))
    for u = 1:length(users)
        for (i, weight) in zip(items, weights)
            if R[users[u], i] != 0
                predictions[u] += weight * R[users[u], i]
                weight_sum[u] += abs(weight)
            elseif impute(users[u], i) != 0
                predictions[u] += weight * impute(users[u], i)
                weight_sum[u] += abs(weight)
            end
        end
    end
    for u = 1:length(users)
        if weight_sum[u] + λ != 0
            predictions[u] /= (weight_sum[u] + λ)
        end
    end
    predictions
end;

In [10]:
function collaborative_filtering(training, inference, get_neighborhood, impute, λ)
    R = sparse(
        training.user,
        training.item,
        training.rating,
        maximum(training.user),
        maximum(training.item),
    )

    el_type = eltype(get_neighborhood(1)[2])
    preds = zeros(el_type, length(inference.rating), Threads.nthreads())
    @tprogress Threads.@threads for item in collect(Set(inference.item))
        mask = inference.item .== item
        preds[mask, Threads.threadid()] =
            make_prediction(item, inference.user[mask], R, get_neighborhood, impute, λ)
    end

    vec(sum(preds, dims = 2))
end;

In [11]:
function model(users, items, predictions)
    result = zeros(length(users))
    for i = 1:length(users)
        if users[i] < size(predictions)[1] && items[i] < size(predictions)[2]
            result[i] = predictions[users[i], items[i]]
        end
    end
    result
end;

In [12]:
Base.@kwdef struct cf_params
    name::Any
    residual_alphas::Any
    neighborhoods::Any
    impute = (u, i) -> 0
    λ = 0
    hyperparams::Any
end;

function get_params(params::cf_params)
    merge(
        Dict("residual_alphas" => params.residual_alphas, "λ" => params.λ),
        params.hyperparams,
    )
end

get_params (generic function with 1 method)

## Item based CF

### optimize hyperparams

In [13]:
function get_training(residual_alphas)
    get_residuals("training", residual_alphas)
end

function get_validation(residual_alphas)
    get_residuals("validation", residual_alphas)
end

function get_inference(residual_alphas)
    validaiton = get_residuals("validation", residual_alphas)
    test = get_residuals("test", residual_alphas)
    RatingsDataset(
        user = [validation.user; test.user],
        item = [validation.item; test.item],
        rating = [validation.rating; test.rating],
    )
end;

In [14]:
# setup hyperparameters
item_cf_params = [];

In [None]:
training = get_training(["UserItemBiases"])
validation = get_validation(["UserItemBiases"])
item_ratings = sparse(
        training.user,
        training.item,
        training.rating,
        maximum(training.user),
        maximum(training.item),
)

function item_validation_mse(λ)
    @debug "training model with parameters $λ"
    neighborhoods = item -> get_correlation_neighborhood(item, item_ratings, 200, λ[1])
    pred = collaborative_filtering(training, validation, neighborhoods, (u, i) -> 0, λ[2])
    truth = validation.rating
    og_loss = mse(truth, pred)
    β = pred \ truth
    loss = mse(truth, pred .* β)
    @debug "loss: $loss β: $β"
    loss
end;

# Find the best regularization hyperparameters
res = optimize(
    item_validation_mse,
    [18., 0.],  # intial guess
    LBFGS(),
    autodiff = :forward,
    Optim.Options(show_trace = true, extended_trace = true),
)

push!(item_cf_params, cf_params(
        name = "ItemCFReg200",
        residual_alphas = ["UserItemBiases"],
        neighborhood = item -> get_correlation_neighborhood(item, item_ratings, 200, Optim.minimizer(res)[1]), 
        λ = Optim.minimizer(res)[2],
        hyperparams = Dict("K" => 200, "weight_decay" => Optim.minimizer(res)[1]),
    )
)

training = nothing
validation = nothing
item_ratings = nothing

[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211217 08:37:35 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(item_validation_mse), Float64}, Float64, 2}[Dual{ForwardDiff.Tag{typeof(item_validation_mse), Float64}}(18.0,1.0,0.0), Dual{ForwardDiff.Tag{typeof(item_validation_mse), Float64}}(0.0,0.0,1.0)]
[32mProgress: 100%|███████████████████████████| Time: 0:30:30 ( 0.46  s/it)[39m
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211217 09:08:16 loss: Dual{ForwardDiff.Tag{typeof(item_validation_mse), Float64}}(1.3082186837520877,-0.00017044010080159578,-0.014543586284491811) β: Dual{ForwardDiff.Tag{typeof(item_validation_mse), Float64}}(0.7827542356041082,-0.010081085323845114,0.0441406186503027)


Iter     Function value   Gradient norm 
     0     1.308219e+00     1.454359e-02
 * Current step size: 1.0
 * time: 0.02223801612854004
 * g(x): [-0.00017044010080159578, -0.014543586284491811]
 * x: [18.0, 0.0]


[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211217 09:08:17 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(item_validation_mse), Float64}, Float64, 2}[Dual{ForwardDiff.Tag{typeof(item_validation_mse), Float64}}(18.000170440100803,1.0,0.0), Dual{ForwardDiff.Tag{typeof(item_validation_mse), Float64}}(0.014543586284491811,0.0,1.0)]
[32mProgress: 100%|███████████████████████████| Time: 0:29:18 ( 0.44  s/it)[39m
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211217 09:37:44 loss: Dual{ForwardDiff.Tag{typeof(item_validation_mse), Float64}}(1.3080092344605145,-0.00015464718830178104,-0.014256957928010797) β: Dual{ForwardDiff.Tag{typeof(item_validation_mse), Float64}}(0.7833898108002499,-0.01013681146886736,0.043502036492947774)
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211217 09:37:44 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(item_validation_mse), Float64}, Float64, 2}[Dual{ForwardDiff.Tag{typeof(item

In [None]:
# compute predictions
for param in item_cf_params
    inference = get_inference(param.residual_alphas)
    preds = collaborative_filtering(
        get_training(param.residual_alphas),
        inference,
        param.neighborhoods,
        param.impute,
        param.λ,
    )
    sparse_preds = sparse(inference.user, inference.item, preds)
    write_predictions(
        (users, items) -> model(users, items, sparse_preds),
        outdir = param.name,
    )
    write_params(get_params(param), outdir = param.name)
end

In [None]:
#item_embeddings = collect(read_params("MatrixFactorization")["A"]')


# function item_validation_mse(λ)
#     @debug "training model with parameters $λ"
#     item_embeddings = collect(read_params("MatrixFactorization")["A"]')
#     neighborhoods = item -> get_embedding_neighborhood(item, item_embeddings, 200, λ[1])
#     preds = collaborative_filtering(training, validation, neighborhoods, λ[2])
#     loss = mse(validation.rating, preds)
#     @debug "loss: $loss"
#     loss
# end;

# # Find the best regularization hyperparameters
# res_item_embedding = optimize(
#     item_validation_mse,
#     [1/3],  # intial guess
#     LBFGS(),
#     autodiff = :forward,
#     Optim.Options(show_trace = true, extended_trace = true),
# )

## User based CF

In [None]:
# training_t = RatingsDataset(training.item, training.user, training.rating)
# validation_t = RatingsDataset(validation.item, validation.user, validation.rating);

In [None]:
# function user_validation_mse(λ)
#     @debug "training model with parameters $λ"
#     user_embeddings = collect(read_params("MatrixFactorization")["U"]')
#     neighborhoods = user -> get_embedding_neighborhood(user, user_embeddings, 200, λ[1])
#     preds = collaborative_filtering(training_t, validation_t, neighborhoods)
#     loss = mse(validation_t.rating, preds)
#     @debug "loss: $loss"
#     loss
# end;

# # Find the best regularization hyperparameters
# # TODO optimize p-norm
# res_user_embedding = optimize(
#     user_validation_mse,
#     [1/3],  # intial guess
#     LBFGS(),
#     autodiff = :forward,
#     Optim.Options(show_trace = true, extended_trace = true),
# )

In [None]:
# user_ratings = sparse(
#     training.item,
#     training.user,
#     training.rating,
#     maximum(training.item),
#     maximum(training.user),
# )
# user_embeddings = collect(read_params("MatrixFactorization")["U"]')
# user_cf_params = [
#     cf_params(
#         "UserEmbedCF",
#         user -> get_embedding_neighborhood(user, user_embeddings, 200, 1.0),
#         Dict("K" => 200, "λ" => 1.0),
#     ),
#     cf_params(
#         "UserEmbedCF1000",
#         user -> get_embedding_neighborhood(user, user_embeddings, 1000, 1.0),
#         Dict("K" => 1000, "λ" => 1.0),
#     ),
#     cf_params(
#         "UserEmbedCF10000",
#         user -> get_embedding_neighborhood(user, user_embeddings, 10000, 1.0),
#         Dict("K" => 10000, "λ" => 1.0),
#     ),
#     cf_params(
#         "UserCF",
#         user -> get_correlation_neighborhood(user, user_ratings, 200),
#         Dict("K" => 200),
#     ),
# ];