# Item Collaborative Filtering
* See `ItemCollaborativeFilteringBase.ipynb` for algorithm details
* The weights here are determined by the number of MAL recommendations between two series (see  https://myanimelist.net/recommendations.php?s=recentrecs&t=anime)

In [1]:
name = "MalRecommendations";

In [2]:
using NBInclude
@nbinclude("ItemCollaborativeFilteringBase.ipynb");

In [3]:
using DataFrames

## Compute similarity matrices

In [4]:
function get_rec_matrix()
    file = "../../data/processed_data/mal_recs.csv"
    df = DataFrame(CSV.File(file))
    num_items = maximum(get_split("training").item)
    sparse(
        df.source .+ 1,
        df.target .+ 1,
        convert.(Float64, df.num_recommendations),
        num_items,
        num_items,
    )
end

function to_stochastic_matrix!(S)
    # Make S a left stochastic matrix
    for i = 1:size(S)[1]
        S[i, i] = 0
    end
    vec_weights = sum(S, dims = 1)
    for i = 1:size(S)[1]
        for j = 1:size(S)[2]
            weight = vec_weights[j]
            if weight > 0
                S[i, j] /= weight
            end
        end
    end
    S
end;

In [5]:
function get_rec_matrix_outdir(power)
    # if the matrix is already stored on disk, return its filepath
    # otherwise, regenerate the matrix and store it to disk
    outdir = "$name/$(hash(power))"
    if ispath("../../data/alphas/$outdir")
        return outdir
    end

    @debug "generating similarity matrix for exponent $power"
    T = to_stochastic_matrix!(get_rec_matrix())
    S = collect(sum(T^n for n = 1:power))
    write_params(Dict("S" => S), outdir = outdir)
    outdir
end;

## Setup hyperparameters

In [6]:
downcast_to_int(x) = isinteger(x) ? Int(x) : x
item_alphas = ["ItemCF.$K" for K in downcast_to_int.([2^4, 2^6, 2^8, 2^10])]
params = [[
        cf_params(
            name = "MalRec.$power.$K",
            training_residuals = ["UserItemBiases"],
            validation_residuals = ["UserItemBiases"; item_alphas],
            neighborhood_type = "abs",
            S = get_rec_matrix_outdir(power),
            K = K,
            λ = [1.0, 1.0, 0.0],
        ) for K in downcast_to_int.([256]) for power in [2]
    ];
];

[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211230 14:00:14 generating similarity matrix for exponent 2


## Train models

In [None]:
for param in params
    optimize_model(param)
end

[32mProgress: 100%|███████████████████████████| Time: 0:08:33 ( 0.13  s/it)[39m
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211230 14:09:38 loss: Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}}(1.270084729151381,0.0008338065090826656,0.00016287391382079126,-0.026405306047335944) β: Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}}(0.0785799295501598,-0.05083759709044963,-0.060524213773980734,1.2159393675817618): λ ForwardDiff.Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}, Float64, 3}[Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neigh

Iter     Function value   Gradient norm 
     0     1.270085e+00     2.640531e-02
 * Current step size: 1.0
 * time: 0.02389383316040039
 * g(x): [0.0008338065090826656, 0.00016287391382079126, -0.026405306047335944]
 * x: [1.0, 1.0, 0.0]


[32mProgress: 100%|███████████████████████████| Time: 0:08:10 ( 0.12  s/it)[39m
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211230 14:17:54 loss: Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}}(1.2699688140331429,0.0009752382252436996,0.00022558040712057883,-0.0023408369119160707) β: Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}}(0.08875141669807847,-0.04116366094060482,-0.06836570516305569,0.28697220409467583): λ ForwardDiff.Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}, Float64, 3}[Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_ne