# Item Collaborative Filtering
* Computes Item Collaborative Filtering alphas for the recommendee
* See `../TrainingAlphas/ItemCF.ipynb` for algorithm details

In [1]:
source = "ItemCF";

In [2]:
using NBInclude
@nbinclude("Alpha.ipynb");

## Determine the neighborhoods for each user and item

In [3]:
function read_similarity_matrix(outdir)
    read_params(outdir)["S"]
end;

In [4]:
function get_abs_neighborhood(item, S, K)
    weights = S[:, item]
    K = Int(min(K, length(weights) - 1))
    # skip the most correlated item because it's always itself
    order = partialsortperm(abs.(weights), 2:K+1, rev = true)
    order, weights[order]
end;

In [5]:
# each prediction is just the weighted sum of all items in the neighborhood
# we apply regularization terms to decay the weights, ratings, and final prediction
function make_prediction(item, users, R, get_neighborhood, λ)
    if item > size(R)[2]
        # the item was not in our training set; we have no information
        return zeros(length(item))
    end
    items, weights = get_neighborhood(item)
    decay(x, a) = x != 0 ? sign(x) * abs(x)^a : 0
    weights = decay.(weights, λ[1])
    predictions = zeros(eltype(weights), length(users))
    weight_sum = zeros(eltype(weights), length(users))
    for u = 1:length(users)
        for (i, weight) in zip(items, weights)
            if R[users[u], i] != 0
                predictions[u] += weight * decay(R[users[u], i], λ[2])
                weight_sum[u] += abs(weight)
            end
        end
    end
    for u = 1:length(users)
        if weight_sum[u] + λ[3] != 0
            predictions[u] /= (weight_sum[u] + λ[3])
        end
    end
    predictions
end;

In [6]:
function collaborative_filtering(training, inference, get_neighborhood, λ)
    R = sparse(
        training.user,
        training.item,
        training.rating,
        maximum(training.user),
        num_items(),
    )

    preds = zeros(eltype(λ), length(inference.rating), Threads.nthreads())
    @tprogress Threads.@threads for item in collect(Set(inference.item))
        mask = inference.item .== item
        preds[mask, Threads.threadid()] =
            make_prediction(item, inference.user[mask], R, get_neighborhood, λ)
    end

    vec(sum(preds, dims = 2))
end;

## Item based CF

In [7]:
function get_alphas()
    [x for x in read_params("CombineSignals")["alphas"] if occursin("ItemCF", x)]
end;

10-element Vector{String}:
 "ItemCF.16"
 "ItemCF.64"
 "ItemCF.256"
 "ItemCF.1024"
 "ItemCFResid.16"
 "ItemCFResid.64"
 "ItemCFResid.256"
 "ItemCFResid.1024"
 "ItemCFRelated.strict_relations"
 "ItemCFEmbed.1024"

In [8]:
function compute_alpha(alpha)
    params = read_params(alpha)
    S = read_similarity_matrix(params["S"])
    K = params["K"]
    neighborhood_types = Dict("abs" => get_abs_neighborhood)
    neighborhoods = i -> neighborhood_types[params["neighborhood_type"]](i, S, K)
    λ = params["λ"]
    training = get_residuals("recommendee", params["training_residuals"])
    training.user .= 1 # relabel ids so that recommendee -> 1
    inference = RatingsDataset(fill(1, num_items()), 1:num_items(), zeros(num_items()))
    predictions = collaborative_filtering(training, inference, neighborhoods, λ)
    model(items) = predictions[items]
    write_recommendee_alpha(model, outdir = alpha)
end;

[32mProgress: 100%|███████████████████████████| Time: 0:00:02 ( 2.69 ms/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:01 ( 1.22 ms/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:00 ( 0.69 ms/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:01 ( 1.68 ms/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:00 ( 0.44 ms/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:00 ( 0.46 ms/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:00 ( 0.66 ms/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:01 ( 1.68 ms/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:01 ( 1.25 ms/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:03 ( 2.86 ms/it)[39m


In [None]:
for alpha in get_alphas()
    compute_alpha(alpha)
end;