# Item Collaborative Filtering
* See `ItemCollaborativeFilteringBase.ipynb` for algorithm details
* The weights here are the cosine correlation between the two items

In [1]:
name = "ItemCFResid";

In [2]:
using NBInclude
@nbinclude("ItemCollaborativeFilteringBase.ipynb");

## Compute cosine correlations

In [3]:
function get_correlation_matrix_outdir(residual_alphas)
    # if the matrix is already stored on disk, return its filepath
    # otherwise, regenerate the matrix and store it to disk
    outdir = "$name/$(hash(residual_alphas))"
    if ispath("../../data/alphas/$outdir")
        return outdir
    end

    @debug "generating similarity matrix for $residual_alphas"
    training = get_residuals("training", residual_alphas)
    R = sparse(
        training.user,
        training.item,
        training.rating,
        maximum(training.user),
        maximum(training.item),
    )
    S = zeros(maximum(training.item), maximum(training.item))

    norms = map(norm, eachslice(R, dims = 2))
    norms[norms.==0] .= 1 # prevent division by 0
    @tprogress Threads.@threads for i = 1:size(S)[1]
        S[:, i] = vec(R[:, i]' * R) ./ norms ./ norms[i]
    end

    write_params(Dict("S" => S), outdir = outdir)
    outdir
end;

## Setup hyperparameters

In [4]:
downcast_to_int(x) = isinteger(x) ? Int(x) : x
item_alphas = ["ItemCF.$K" for K in downcast_to_int.([2^4, 2^6, 2^8, 2^10])]
item_cf_params = [[
        cf_params(
            name = "ItemCFResid.$K",
            training_residuals = ["UserItemBiases"; item_alphas],
            validation_residuals = ["UserItemBiases"; item_alphas],
            neighborhood_type = "abs",
            S = get_correlation_matrix_outdir(["UserItemBiases"; item_alphas]),
            K = K,
            λ = [1., 1., 0.],
        ) for K in downcast_to_int.([2^8])
    ];
];

## Train models

In [None]:
for param in item_cf_params
    optimize_model(param)
end

[32mProgress: 100%|███████████████████████████| Time: 0:03:39 (55.30 ms/it)[39m
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211231 14:44:51 loss: Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}}(1.2561244923782138,-0.00500613411669654,0.0007931270595019888,-7.081951990093505e-5) β: Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}}(1.2012661970345693,2.858771488249437,-0.6184841084904319,0.05904739184038273): λ ForwardDiff.Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}, Float64, 3}[Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborho

Iter     Function value   Gradient norm 
     0     1.256124e+00     5.006134e-03
 * Current step size: 1.0
 * time: 0.022655010223388672
 * g(x): [-0.00500613411669654, 0.0007931270595019888, -7.081951990093505e-5]
 * x: [0.1, 0.9, 11.443905449241216]


[32mProgress: 100%|███████████████████████████| Time: 0:03:38 (54.87 ms/it)[39m
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211231 14:48:36 loss: Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}}(1.2560991864514852,-0.004854938345107371,0.0007633597868912047,-6.915344722977888e-5) β: Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}}(1.2161823890348296,2.902052844225679,-0.6234861037754464,0.060057626464899504): λ ForwardDiff.Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}, Float64, 3}[Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighbor

     1     1.255744e+00     1.219119e-04
 * Current step size: 27.786448094414588
 * time: 1089.0223190784454
 * g(x): [-0.00010840053744305696, -0.00012191193170185788, -2.693972180505159e-5]
 * x: [0.23910268578726646, 0.8779618161288724, 11.445873272155014]


[32mProgress: 100%|███████████████████████████| Time: 0:03:12 (48.32 ms/it)[39m
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211231 15:06:18 loss: Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}}(1.2557436267124915,3.273722999074925e-6,-7.649809594825173e-5,-2.604615406580052e-5) β: Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}}(1.7207405629179902,4.351626546318109,-0.7845144900117014,0.09644581000202314): λ ForwardDiff.Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}, Float64, 3}[Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborho

     2     1.255744e+00     6.312409e-05
 * Current step size: 1.533584822850357
 * time: 1691.9368119239807
 * g(x): [6.312408571097121e-5, -5.231332261189141e-5, -2.5572518673444802e-5]
 * x: [0.2439902260764146, 0.8833216463243835, 11.447062916013047]


[32mProgress: 100%|███████████████████████████| Time: 0:03:11 (48.23 ms/it)[39m
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211231 15:16:20 loss: Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}}(1.2557434151937636,1.2247790307004192e-5,-5.613037125367123e-6,-2.58466604974516e-5) β: Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}}(1.720496051351539,4.353811497567284,-0.7949016957865892,0.09649689809988904): λ ForwardDiff.Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}, Float64, 3}[Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhoo

     3     1.255743e+00     2.593596e-05
 * Current step size: 1.3254161929989923
 * time: 2288.00953912735
 * g(x): [-4.282662751870223e-6, 9.57146712158662e-6, -2.5935955587361987e-5]
 * x: [0.24266290188659695, 0.8866084758002167, 11.448486859692467]


[32mProgress: 100%|███████████████████████████| Time: 0:03:12 (48.37 ms/it)[39m
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211231 15:26:17 loss: Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}}(1.2557433794488295,-4.124210244314427e-6,9.567279843142977e-6,-2.592857004860284e-5) β: Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}}(1.7185561219436802,4.348951324606702,-0.7961357542647028,0.09635211387533363): λ ForwardDiff.Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}, Float64, 3}[Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborho

     4     1.255651e+00     6.513596e-04
 * Current step size: 14209.18562020763
 * time: 4030.296732902527
 * g(x): [0.0006513596437316209, -8.2827328878744e-5, 3.0974551186853886e-6]
 * x: [0.19422036153642955, 0.8718309562791031, 26.241425873883358]


[32mProgress: 100%|███████████████████████████| Time: 0:03:10 (47.92 ms/it)[39m
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211231 15:55:17 loss: Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}}(1.2556436740477044,0.0001986877384755228,-2.8195504862274e-6,1.6582853216536753e-6) β: Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}}(2.52944259830953,7.456715792031507,-1.1138593069465192,0.06865838491240409): λ ForwardDiff.Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}, Float64, 3}[Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)

     5     1.255643e+00     3.461729e-05
 * Current step size: 1.4756559511646348
 * time: 4607.749154090881
 * g(x): [-1.3992854061771862e-5, 3.461729210389886e-5, 9.288778029335517e-7]
 * x: [0.17211574114347003, 0.8730006978517514, 25.956978481295813]


[32mProgress: 100%|███████████████████████████| Time: 0:03:22 (50.94 ms/it)[39m
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211231 16:05:07 loss: Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}}(1.2556424120657173,-1.3379053931340093e-5,5.075420208167593e-6,6.799055653542158e-7) β: Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}}(2.447186357827272,7.161673420666973,-1.0783976611665118,0.0674800697933921): λ ForwardDiff.Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}, Float64, 3}[Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood

     6     1.255642e+00     5.063411e-05
 * Current step size: 2.882168539394575
 * time: 5241.8150289058685
 * g(x): [-1.5533124301701187e-5, -5.063410518484095e-5, 1.4073890149929195e-7]
 * x: [0.17688229179859216, 0.868812440074006, 24.381447988598666]


[32mProgress: 100%|███████████████████████████| Time: 0:03:11 (48.29 ms/it)[39m
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211231 16:15:31 loss: Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}}(1.2556418642872167,-3.875278309849474e-6,-1.140800693433778e-5,-9.32581377454454e-9) β: Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}}(2.3866165671852078,6.9187588413311945,-1.0519793164072344,0.0693385631421962): λ ForwardDiff.Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}, Float64, 3}[Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborho

     7     1.255642e+00     4.550249e-06
 * Current step size: 1.174925467786604
 * time: 5850.077039003372
 * g(x): [-1.8699928942415805e-6, -4.550248704860085e-6, -3.691688759780716e-8]
 * x: [0.1795606040716145, 0.8720751537235332, 23.89850233393179]


[32mProgress: 100%|███████████████████████████| Time: 0:03:49 (57.65 ms/it)[39m
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211231 16:26:17 loss: Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}}(1.2556418595336216,4.844741610142694e-7,3.5327656764912834e-7,5.94570903869958e-9) β: Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}}(2.3868798188657223,6.919922331299344,-1.0545864236169569,0.06940549530521468): λ ForwardDiff.Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}, Float64, 3}[Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood

     8     1.255642e+00     2.277294e-07
 * Current step size: 0.8905409876179644
 * time: 6313.7614760398865
 * g(x): [2.277293708036575e-7, -1.834167610184601e-7, 1.2737367689518493e-9]
 * x: [0.17949008219636026, 0.8723256369460205, 23.947899457325477]


[32mProgress: 100%|███████████████████████████| Time: 0:04:31 (68.43 ms/it)[39m
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20211231 16:34:45 loss: Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}}(1.255641859510327,-7.078817758538532e-9,4.369208227196094e-9,-2.8662635676562452e-11) β: Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}}(2.38644990133269,6.918266938453817,-1.054338038118929,0.0694100821400346): λ ForwardDiff.Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)}, Int64, Matrix{Float64}}, RatingsDataset, RatingsDataset}, Float64}, Float64, 3}[Dual{ForwardDiff.Tag{var"#validation_mse#17"{var"#14#16"{cf_params, Dict{String, typeof(get_abs_neighborhood)

     9     1.255642e+00     1.439361e-09
 * Current step size: 0.9759831704135143
 * time: 6864.404228925705
 * g(x): [-1.439360998211004e-9, -1.408056525475718e-10, 2.619259759706045e-12]
 * x: [0.17948731395513118, 0.8723354575709694, 23.946905176074917]


[32mProgress: 100%|███████████████████████████| Time: 0:38:02 ( 0.54  s/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20211231 17:19:14 training set: RMSE 1.0772049186715105 MAE 0.779596724209766 R2 0.06668462869001401
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20211231 17:19:21 validation set: RMSE 1.1205542644202133 MAE 0.81166477002724 R2 0.013576840927245537
