# Item Collaborative Filtering
* See `ItemCollaborativeFilteringBase.ipynb` for algorithm details
* The weights here are the reciprocal of the l2-norm between the item embeddings

In [1]:
name = "ItemCFEmbed";

In [2]:
using NBInclude
@nbinclude("ItemCollaborativeFilteringBase.ipynb");

## Compute cosine correlations

In [3]:
function normalize_rows(X)
    σ = std(X, dims = 2)
    μ = mean(X, dims = 2)
    (X .- μ) ./ σ
end

function get_embed_matrix_outdir(power)
    # if the matrix is already stored on disk, return its filepath
    # otherwise, regenerate the matrix and store it to disk
    outdir = "$name/$(hash(power))"
    if ispath("../../data/alphas/$outdir")
        return outdir
    end

    @debug "generating similarity matrix for $power-norm"
    item_embeddings = normalize_rows(
        vcat(
            [collect(read_params("MatrixFactorization.$K")["A"]') for K in [10, 20, 40]]...,
        ),
    )
    training = get_residuals("training", residual_alphas)
    S = zeros(maximum(training.item), maximum(training.item))

    kernel = item_embeddings
    p = power
    @tprogress Threads.@threads for j = 1:size(S)[1]
        dists = map(x -> norm(x, p), eachslice(kernel .- kernel[:, j], dims = 2))
        S[:, j] = 1 ./ dists
    end

    # remove Inf
    clamp!(S, -1e9, 1e9)
    S = convert.(Float32, S)
    write_params(Dict("S" => S), outdir = outdir)
    outdir
end;

## Setup hyperparameters

In [4]:
downcast_to_int(x) = isinteger(x) ? Int(x) : x
item_alphas = ["ItemCF.$K" for K in downcast_to_int.([2^4, 2^6, 2^8, 2^10])]
item_cf_params = [
    cf_params(
        name = "ItemCFEmbed.$K",
        training_residuals = ["UserItemBiases"],
        validation_residuals = ["UserItemBiases"],
        neighborhood_type = "abs",
        S = get_embed_matrix_outdir(2),
        K = K,
        λ = [2.4182964143573114, 1.0364205793963885, 0.000497907758177069],
    ) for K in downcast_to_int.([2^10])
];

[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220106 06:29:04 generating similarity matrix for 2-norm
[32mProgress: 100%|███████████████████████████| Time: 0:00:10 (10.19 ms/it)[39m


## Train models

In [None]:
for param in item_cf_params
    optimize_model(param)
end

[32mProgress: 100%|███████████████████████████| Time: 0:04:21 ( 0.26  s/it)[39m
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220106 06:35:18 loss: Dual{ForwardDiff.Tag{var"#validation_mse#20"{var"#17#19"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float32}}, RatingsDataset, RatingsDataset}, Float64}}(1.3795801446545115,-0.041928096172602694,0.002072050082606681,-15.35486390120541) β: Dual{ForwardDiff.Tag{var"#validation_mse#20"{var"#17#19"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float32}}, RatingsDataset, RatingsDataset}, Float64}}(0.8385115856434988,0.09899820476597299,-0.5700249503908593,106.45393281308934): λ ForwardDiff.Dual{ForwardDiff.Tag{var"#validation_mse#20"{var"#17#19"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float32}}, RatingsDataset, RatingsDataset}, Float64}, Float64, 3}[Dual{ForwardDiff.Tag{var"#validation_mse#20"{var"#17#19"{cf_params, Dict{String, typeof(get_abs_neighborhood

Iter     Function value   Gradient norm 
     0     1.379580e+00     1.535486e+01
 * Current step size: 1.0
 * time: 0.025140047073364258
 * g(x): [-0.041928096172602694, 0.002072050082606681, -15.35486390120541]
 * x: [2.4182964143573114, 1.0364205793963885, 0.000497907758177069]


[32mProgress: 100%|███████████████████████████| Time: 0:04:35 ( 0.28  s/it)[39m
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220106 06:40:44 loss: Dual{ForwardDiff.Tag{var"#validation_mse#20"{var"#17#19"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float32}}, RatingsDataset, RatingsDataset}, Float64}}(1.6861788549867316,0.019945064070168758,-0.0017525014717441696,0.0006810994706517988) β: Dual{ForwardDiff.Tag{var"#validation_mse#20"{var"#17#19"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float32}}, RatingsDataset, RatingsDataset}, Float64}}(4.011404468993965,-2.203320144702396,-1.841520499344239,-0.027247039139095375): λ ForwardDiff.Dual{ForwardDiff.Tag{var"#validation_mse#20"{var"#17#19"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float32}}, RatingsDataset, RatingsDataset}, Float64}, Float64, 3}[Dual{ForwardDiff.Tag{var"#validation_mse#20"{var"#17#19"{cf_params, Dict{String, typeof(get_abs_neighbor

     1     1.371352e+00     1.299797e+00
 * Current step size: 0.0003376851487172068
 * time: 7777.389698028564
 * g(x): [0.016136936169235457, -0.003589407466564016, 1.2997968213488595]
 * x: [2.418310572852703, 1.0364198796958481, 0.0056830172581880875]


[32mProgress: 100%|███████████████████████████| Time: 0:04:33 ( 0.28  s/it)[39m
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220106 08:50:30 loss: Dual{ForwardDiff.Tag{var"#validation_mse#20"{var"#17#19"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float32}}, RatingsDataset, RatingsDataset}, Float64}}(1.370844061502943,0.012778422589867843,-0.0034557909645783055,1.2062073886707956) β: Dual{ForwardDiff.Tag{var"#validation_mse#20"{var"#17#19"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float32}}, RatingsDataset, RatingsDataset}, Float64}}(1.069398696296752,0.26119486903243155,-0.69121336382819,29.630023598533835): λ ForwardDiff.Dual{ForwardDiff.Tag{var"#validation_mse#20"{var"#17#19"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float32}}, RatingsDataset, RatingsDataset}, Float64}, Float64, 3}[Dual{ForwardDiff.Tag{var"#validation_mse#20"{var"#17#19"{cf_params, Dict{String, typeof(get_abs_neighborhood)},

     2     1.369259e+00     1.784792e-01
 * Current step size: 5.89689916862407
 * time: 10095.340451002121
 * g(x): [-0.006803318976086309, -0.002491903291629999, 0.178479203497622]
 * x: [2.4182827509835043, 1.0364259802518256, 0.0032967550211813025]


[32mProgress: 100%|███████████████████████████| Time: 0:04:46 ( 0.29  s/it)[39m
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220106 09:29:09 loss: Dual{ForwardDiff.Tag{var"#validation_mse#20"{var"#17#19"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float32}}, RatingsDataset, RatingsDataset}, Float64}}(1.369265813631855,-0.011282186077518793,-0.002213234594251102,-0.23466117348558388) β: Dual{ForwardDiff.Tag{var"#validation_mse#20"{var"#17#19"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float32}}, RatingsDataset, RatingsDataset}, Float64}}(0.987433833821948,0.21608959008347448,-0.6471350293969289,41.68065185548802): λ ForwardDiff.Dual{ForwardDiff.Tag{var"#validation_mse#20"{var"#17#19"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float32}}, RatingsDataset, RatingsDataset}, Float64}, Float64, 3}[Dual{ForwardDiff.Tag{var"#validation_mse#20"{var"#17#19"{cf_params, Dict{String, typeof(get_abs_neighborhood

     3     1.369243e+00     1.397650e-02
 * Current step size: 0.43308715103750667
 * time: 10759.756019115448
 * g(x): [-0.00871028759625301, -0.002376508537641774, 0.01397649639170103]
 * x: [2.418290464475559, 1.0364285362526344, 0.0031321720512949664]


[32mProgress: 100%|███████████████████████████| Time: 0:04:36 ( 0.28  s/it)[39m
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220106 09:40:06 loss: Dual{ForwardDiff.Tag{var"#validation_mse#20"{var"#17#19"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float32}}, RatingsDataset, RatingsDataset}, Float64}}(1.369242705569328,-0.008879885815890717,-0.002365504928532769,-0.0014439595624115315) β: Dual{ForwardDiff.Tag{var"#validation_mse#20"{var"#17#19"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float32}}, RatingsDataset, RatingsDataset}, Float64}}(0.9956467058130594,0.22082702830028747,-0.6515032299257606,40.103396561580695): λ ForwardDiff.Dual{ForwardDiff.Tag{var"#validation_mse#20"{var"#17#19"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float32}}, RatingsDataset, RatingsDataset}, Float64}, Float64, 3}[Dual{ForwardDiff.Tag{var"#validation_mse#20"{var"#17#19"{cf_params, Dict{String, typeof(get_abs_neighbor

     4     1.369243e+00     8.932180e-03
 * Current step size: 1.307874124035611
 * time: 11747.191003084183
 * g(x): [-0.0089321801771347, -0.002362104588369521, -0.006225741152135959]
 * x: [2.4183030264819374, 1.0364319432519922, 0.003113157941385315]


[32mProgress: 100%|███████████████████████████| Time: 0:04:26 ( 0.27  s/it)[39m
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220106 09:56:32 loss: Dual{ForwardDiff.Tag{var"#validation_mse#20"{var"#17#19"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float32}}, RatingsDataset, RatingsDataset}, Float64}}(1.3692425808740003,-0.008933139550443435,-0.0023613514249380975,-0.006326464159707899) β: Dual{ForwardDiff.Tag{var"#validation_mse#20"{var"#17#19"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float32}}, RatingsDataset, RatingsDataset}, Float64}}(0.9954588855457661,0.22071902132695548,-0.6514066176331489,40.13869978136758): λ ForwardDiff.Dual{ForwardDiff.Tag{var"#validation_mse#20"{var"#17#19"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float32}}, RatingsDataset, RatingsDataset}, Float64}, Float64, 3}[Dual{ForwardDiff.Tag{var"#validation_mse#20"{var"#17#19"{cf_params, Dict{String, typeof(get_abs_neighbor