# Related Series
* Constructs a graph of structurally related series (sequels, prequels, etc.)

In [None]:
const name = "all/RelatedSeries";

In [None]:
using LinearAlgebra
using SparseArrays
import CSV
import DataFrames: DataFrame
import NBInclude: @nbinclude
@nbinclude("../Alpha.ipynb")

## Compute similarity matrices

In [None]:
function get_similarity_matrix(relationships)
    file = get_data_path("processed_data/related_series.csv")
    df = DataFrame(CSV.File(file))
    df = filter(x -> x.relationship ∈ relationships, df)
    sparse(
        df.source .+ 1,
        df.target .+ 1,
        fill(1.0, length(df.source)),
        num_items(),
        num_items(),
    )
end;

In [None]:
function all_pairs_shortest_paths(S)
    # uses the optimization that all edges are weight 1
    dists = fill(Inf32, size(S)...)
    for v = 1:size(S)[1]
        dists[v, v] = 0
    end
    T = I(size(S)[1])

    @showprogress for epoch = 1:size(S)[1]
        T = T * S
        change = false
        for (i, j, v) in zip(findnz(T)...)
            if dists[i, j] > epoch
                dists[i, j] = epoch
                change = true
            end
        end
        if !change
            break
        end
    end
    dists
end;

In [None]:
function get_similarity_matrix_outdir(name, relationships, expand)
    # if the matrix is already stored on disk, return its filepath
    # otherwise, regenerate the matrix and store it to disk
    outdir = "$name/similarity_matrix"
    if ispath("../../data/alphas/$outdir")
        return outdir
    end

    @debug "generating similarity matrix for relationships $relationships"
    S = get_similarity_matrix(relationships)
    if expand
        S = all_pairs_shortest_paths(S)
        S[isfinite.(S)] .= 1
        S[.!isfinite.(S)] .= 0
    end
    S = convert.(Float32, collect(S))
    write_params(Dict("S" => S), outdir)
    outdir
end;

In [None]:
# definitions for the relationships can be found at https://myanimelist.net/info.php?go=relationinfo
const strict_relations = Set(("sequel", "prequel", "parent_story", "side_story"))
const recap_relations = Set(("alternative_version", "summary", "full_story"))
const loose_relations = Set(("alternative_setting", "spin_off"))
const no_relations = Set(("adaptation", "character", "other"))
const all_relations =
    union(strict_relations, recap_relations, loose_relations, no_relations)
df = DataFrame(CSV.File(get_data_path("processed_data/related_series.csv")));
@assert all(map(x -> x ∈ all_relations, df.relationship))

In [None]:
get_similarity_matrix_outdir(
    "all/RelatedSeries",
    union(strict_relations, recap_relations, loose_relations),
    true,
)
get_similarity_matrix_outdir("all/RecapSeries", recap_relations, false);