# Item-based Collaborative Filtering on Related Series
* Constructs a graph of structurally related series (sequels, prequels, etc.)
* Performs item-based collaborative filtering with that graph as the neighborhood kernel

In [None]:
const name = "ItemCFRelated";

In [None]:
using LinearAlgebra
using SparseArrays
import CSV
import DataFrames: DataFrame
import NBInclude: @nbinclude
@nbinclude("../Alpha.ipynb")
@nbinclude("../Explicit/ExplicitItemCFBase.ipynb");

## Compute similarity matrices

In [None]:
function get_similarity_matrix(relationships)
    file = get_data_path("processed_data/related_series.csv")
    df = DataFrame(CSV.File(file))
    df = filter(x -> x.relationship ∈ relationships, df)
    sparse(
        df.source .+ 1,
        df.target .+ 1,
        fill(1.0, length(df.source)),
        num_items(),
        num_items(),
    )
end;

In [None]:
function all_pairs_shortest_paths(S)
    # uses the optimization that all edges are weight 1
    dists = fill(Inf, size(S)...)
    for v = 1:size(S)[1]
        dists[v, v] = 0
    end
    T = I(size(S)[1])

    @showprogress for epoch = 1:size(S)[1]
        T = T * S
        change = false
        for (i, j, v) in zip(findnz(T)...)
            if dists[i, j] > epoch
                dists[i, j] = epoch
                change = true
            end
        end
        if !change
            break
        end
    end
    dists
end;

In [None]:
function get_similarity_matrix_outdir(relationships)
    # if the matrix is already stored on disk, return its filepath
    # otherwise, regenerate the matrix and store it to disk
    outdir = "$name/$(hash(relationships))"
    if ispath("../../data/alphas/$outdir")
        return outdir
    end

    @debug "generating similarity matrix for relationships $relationships"
    S = get_similarity_matrix(relationships)
    S = collect(1 ./ all_pairs_shortest_paths(S))
    S = convert.(Float32, S)
    write_params(Dict("S" => S), outdir)
    outdir
end;

In [None]:
const strict_relations = [
    "side_story",
    "summary",
    "parent_story",
    "sequel",
    "prequel",
    "alternative_version",
    "spin_off",
    "alternative_setting",
    "full_story",
];

In [None]:
function max_neighborhood_size(relations)
    # returns the size of the largest nonzero neighborhood
    S = read_params(get_similarity_matrix_outdir(relations))["S"]
    S = 1 ./ S
    maxK = 0
    for j = 1:size(S)[2]
        K = sum((S[:, j] .!= 0) .* (S[:, j] .!= Inf))
        if K > maxK
            maxK = K
        end
    end
    neighborhood_size = Int(round(maxK))
    @debug "using neighborhood size $neighborhood_size"
    neighborhood_size
end;

In [None]:
const param = cf_params(
    name = "ItemCFRelated",
    training_residuals = String[],
    validation_residuals = String[],
    neighborhood_type = "abs",
    S = get_similarity_matrix_outdir(strict_relations),
    K = max_neighborhood_size(strict_relations),
    λ = [1.0, 0.0, 0.0],
)

In [None]:
write_params(to_dict(param), param.name)