# Item Collaborative Filtering
* This notebook implements item-based collaborative filtering
* Prediction is $\tilde r_{ij} = \dfrac{\sum_{k \in N(j)} f(w_{kj})g(t_{ik})h(r_{ik})}{\sum_{k \in N(j)} f(w_{kj})g(t_{ik}) + \lambda}$
* $r_{ij}$ is the rating for user $i$ and item $j$
* $w_{kj}$ is the similarity between items $j$ and $k$
* $t_{ik}$ is a measure of how long ago user $i$ watched item $k$
* $f, g, h$ are exponential functions
* $N(j)$ is the largest $K$ items sorted by $|w_{kj}|$

In [None]:
import LinearAlgebra: norm
import Setfield: @set
import SparseArrays: sparse

## Determine the neighborhoods for each user and item

In [None]:
function get_correlation_matrix_outdir(residual_alphas, name = name)
    # if the matrix is already stored on disk, return its filepath
    # otherwise, regenerate the matrix and store it to disk
    outdir = "$name/$(hash(residual_alphas))"
    if ispath(get_data_path("alphas/$outdir"))
        return outdir
    end

    @info "generating similarity matrix for $residual_alphas"
    training = RatingsDataset64(get_residuals("training", residual_alphas))
    R = sparse(
        training.user,
        training.item,
        training.rating,
        maximum(training.user),
        num_items(),
    )
    S = zeros(eltype(R), num_items(), num_items())

    norms = map(norm, eachslice(R, dims = 2))
    norms[norms.==0] .= 1 # prevent division by 0
    @tprogress Threads.@threads for i = 1:size(S)[1]
        S[:, i] = vec(R[:, i]' * R) ./ norms ./ norms[i]
    end

    write_params(Dict("S" => S), outdir)
    outdir
end;

In [None]:
function get_causal_matrix_outdir(residual_alphas, name = name)
    salt = hash("causal")
    outdir = "$name/$(hash(residual_alphas) + salt)"
    if ispath(get_data_path("alphas/$outdir"))
        return outdir
    end

    @info "generating causal similarity matrix for $residual_alphas"
    training = get_residuals("training", residual_alphas)
    training = filter(training, training.timestamp .> 0) 
    Ts = sparse(
        convert.(Int64, training.user),
        convert.(Int64, training.item),
        training.timestamp,
        maximum(training.user),
        num_items(),
    )    
    R = sparse(
        convert.(Int64, training.user),
        convert.(Int64, training.item),
        training.rating,
        maximum(training.user),
        num_items(),
    )
    S = zeros(eltype(R), num_items(), num_items())
    
    function safe_norm(x)
        ret = norm(x)
        ret == 0 ? 1 : ret      
    end

    @showprogress for i = 1:size(S)[2]
        i_norm = safe_norm(R[:, i])
        Threads.@threads for j = 1:size(S)[1]
            t = R[:, j] .* (Ts[:, j] .> Ts[:, i]) # filter to users who watched j after watching i
            S[j, i] = SparseArrays.dot(R[:, i], t) / i_norm / safe_norm(t) 
        end
    end

    write_params(Dict("S" => S), outdir)
    outdir
end;

In [None]:
function read_similarity_matrix(outdir)
    read_params(outdir)["S"]
end;

In [None]:
function get_neighborhood_fn(neighborhood_type)
    if neighborhood_type == "abs"
        return get_abs_neighborhood
    else
        @assert false
    end
end;

In [None]:
function get_abs_neighborhood(item, S, K)
    weights = S[:, item]
    # ensure that the neighborhood for an item does not include itself
    weights[item] = Inf
    K = Int(min(K, length(weights) - 1))
    order = partialsortperm(abs.(weights), 2:K+1, rev = true)
    order, weights[order]
end;

In [None]:
isnonzero(x) = !isapprox(x, 0.0, atol = eps(Float32))

# each prediction is just the weighted sum of all items in the neighborhood
# we apply regularization terms to decay the weights, ratings, and final prediction
function make_prediction(item, users, R, T, get_neighborhood, λ)
    items, weights = get_neighborhood(item)
    weights = powerdecay(weights, λ[2])
    predictions = zeros(eltype(weights), length(users))
    weight_sum = zeros(eltype(weights), length(users))
    for u = 1:length(users)
        for (i, weight) in zip(items, weights)
            if isnonzero(R[users[u], i])
                t = powerlawdecay(1 - T[users[u], i], exp(λ[4]))
                predictions[u] += weight * powerdecay(R[users[u], i], λ[3]) * t
                weight_sum[u] += abs(weight) * t
            end
        end
    end
    for u = 1:length(users)
        predictions[u] = predictions[u] / (weight_sum[u] + exp(λ[1]))
    end
    predictions
end;

In [None]:
function collaborative_filtering(training, inference, get_neighborhood, λ)
    R = sparse(training.user, training.item, training.rating, num_users(), num_items())
    T = sparse(
        training.user,
        training.item,
        max.(training.timestamp, 0.0),
        num_users(),
        num_items(),
    )

    preds = zeros(eltype(λ), length(inference.user))
    @tprogress Threads.@threads for item in collect(Set(inference.item))
        mask = inference.item .== item
        preds[mask] = make_prediction(item, inference.user[mask], R, T, get_neighborhood, λ)
    end
    preds
end;

In [None]:
@with_kw struct cf_params
    name::Any
    training_residuals::Any
    validation_residuals::Any
    neighborhood_type::Any
    S::Any # the similarity matrix
    K::Any # the neighborhood size
    λ::Vector{Float32}
end;

to_dict(x::T) where {T} = Dict(string(fn) => getfield(x, fn) for fn ∈ fieldnames(T));

## Item based CF

In [None]:
function get_residuals(split, residual_alphas)
    df = get_split(split, "explicit")
    ratings = df.rating - read_alpha(residual_alphas, split, "explicit", false).rating
    if split == "training"
        timestamps = df.timestamp
    else
        timestamps = []
    end
    RatingsDataset(
        user = df.user,
        item = df.item,
        rating = ratings,
        timestamp = timestamps,
    )
end

function get_training(residual_alphas)
    get_residuals("training", residual_alphas)
end

function get_validation(residual_alphas)
    get_residuals("validation", residual_alphas)
end;

In [None]:
function optimize_model(param)
    # unpack parameters
    redirect_logging(get_data_path("alphas/$(param.name)"))
    training = get_training(param.training_residuals)
    validation = get_validation(param.validation_residuals)
    S = read_similarity_matrix(param.S)
    K = param.K
    neighborhoods = i -> get_neighborhood_fn(param.neighborhood_type)(i, S, K)

    # optimize hyperparameters
    function validation_mse(λ)
        preds = collaborative_filtering(training, validation, neighborhoods, λ)
        loss = residualized_loss(param.validation_residuals, "explicit", false, preds)
        @info "loss: $loss"
        loss
    end
    res = Optim.optimize(
        validation_mse,
        param.λ,
        Optim.NewtonTrustRegion(),
        autodiff = :forward,
        Optim.Options(show_trace = true, extended_trace = true, iterations = 30),
    )
    param = @set param.λ = Optim.minimizer(res)

    function model(users, items)
        inference = RatingsDataset(user = users, item = items)
        collaborative_filtering(training, inference, neighborhoods, param.λ)
    end

    # save predictions
    write_params(to_dict(param), param.name)
    write_alpha(
        model,
        name;
        log = false,
    )    
end;