In [1]:
name = "../../data/recommendations/ItemSimilarity"
residual_alphas = ["UserItemBiases"];

In [2]:
using NBInclude
@nbinclude("../TrainingAlphas/Alpha.ipynb");

In [3]:
using SparseArrays
using DataFrames

In [4]:
anime_to_uid = DataFrame(CSV.File("../../data/processed_data/anime_to_uid.csv"));
anime = DataFrame(CSV.File("../../data/raw_data/anime.csv"));
anime_to_uid = innerjoin(anime_to_uid, anime, on = "anime_id");

## Rating correlation

In [5]:
@memoize function get_norms(R)
    norms = map(norm, eachslice(R, dims = 2))
    norms[norms.==0] .= 1 # prevent division by 0
    norms
end;

function get_correlation_neighborhood(item, R, K)
    norms = get_norms(R)
    weights = vec(R[:, item]' * R) ./ norms ./ norms[item]
    order = sortperm(weights, rev = true)[1:K]
    order[1:K], weights[order[1:K]]
end

const training = get_residuals("training", residual_alphas);
const validation = get_residuals("validation", residual_alphas);
const test = get_residuals("test", residual_alphas);
const allsplits = RatingsDataset(
    [training.user; validation.user; test.user],
    [training.item; validation.item; test.item],
    [training.rating; validation.rating; test.rating],
)

R = sparse(
    allsplits.user,
    allsplits.item,
    allsplits.rating,
    maximum(allsplits.user),
    maximum(allsplits.item),
);

452577×16980 SparseMatrixCSC{Float64, Int64} with 85391078 stored entries:
⣿⡇
⣿⡇
⣿⡇
⣿⡇
⣿⡇
⣿⡇
⣿⡇
⣿⡇
⣿⡇
⣿⡇
⣿⡇
⣿⡇
⣿⡇
⣿⡇
⣿⡇
⣿⡇
⣿⡇
⣿⡇
⣿⡇
⣿⡇
⣿⡇
⣿⡇
⣿⡇
⣿⡇
⣿⡇
⣿⡇

## Embedding distance

In [6]:
function get_embedding_neighborhood(item, kernel, K, p)
    dists = map(x -> norm(x, p), eachslice(kernel .- kernel[:, item], dims = 2))
    weights = -dists
    order = sortperm(dists)[1:K]
    order[1:K], weights[order[1:K]]
end


item_embeddings = vcat(
    [
        collect(read_params("MatrixFactorization10")["A"]')
        collect(read_params("MatrixFactorization20")["A"]')
        collect(read_params("MatrixFactorization40")["A"]')
    ],
);

70×16980 Matrix{Float64}:
  4.45117     -5.27797    -3.11874    …   2.38678    -0.512491   -2.97931
 -1.53218      1.99659    -3.09534       -0.972563   -1.04417    -0.0300188
 -0.689801    11.8814     -2.27086       -1.61779    -5.36215    -3.00539
 -2.92707     -1.52531     4.66393       -0.715207    0.488593   -0.454993
  2.88846      5.30894     3.03145        3.43452     3.43271     0.0886305
  2.16354     -0.533594   -3.59314    …   3.3231      2.73315     2.55765
 -2.82664     -0.919777    5.02297       -1.99147    -2.25105    -0.943974
  2.65113      1.33519     4.31841        2.78762     0.140085    0.42208
  2.68568      2.64292     5.8812        -1.03283    -0.533891    1.99329
 -0.192814     5.2517     -2.06104        2.1356      1.16085     1.44298
  0.63419     -3.187      -1.02936    …   0.528929   -0.812027   -0.717707
  0.621811     5.53095    -0.994116       0.166121    0.787742    0.455445
  1.2595       8.21105     1.23921       -0.345008   -0.263606    0.617078
  ⋮

# Compute Neighborhoods

In [7]:
uid = only(anime_to_uid[anime_to_uid.anime_id.==26, :uid]);

In [8]:
items, weights = get_correlation_neighborhood(uid + 1, R, 30);
corr_df = DataFrame(anime = items, weight = weights);

In [9]:
items, weights = get_embedding_neighborhood(uid + 1, item_embeddings, 30, 2);
embed_df = DataFrame(anime = items, weight = weights);

# Display Similar Series

In [10]:
function get_series(df)
    df = copy(df)
    df[!, :uid] = df[:, :anime] .- 1
    sim = innerjoin(df, anime_to_uid, on = "uid")
    sort(sim, :weight, rev = true)
end;

In [11]:
get_series(corr_df)

Unnamed: 0_level_0,anime,weight,uid,anime_id,title
Unnamed: 0_level_1,Int64,Float64,Int64,Int64,String
1,25,1.0,24,26,Texhnolyze
2,336,0.192323,335,339,Serial Experiments Lain
3,386,0.172929,385,387,Haibane Renmei
4,873,0.151092,872,885,Tenshi no Tamago
5,7393,0.14446,7392,7785,Yojouhan Shinwa Taikei
6,3640,0.139111,3639,3701,Kaiba
7,568,0.13849,567,570,Jin-Rou
8,366,0.136902,365,369,Boogiepop wa Warawanai
9,811,0.133121,810,820,Ginga Eiyuu Densetsu
10,434,0.128573,433,437,Perfect Blue


In [12]:
get_series(embed_df)

Unnamed: 0_level_0,anime,weight,uid,anime_id,title,genres
Unnamed: 0_level_1,Int64,Float64,Int64,Int64,String,String?
1,25,-0.0,24,26,Texhnolyze,"['Action', 'Drama', 'Psychological', 'Sci-Fi']"
2,366,-25.7911,365,369,Boogiepop wa Warawanai,"['Avant Garde', 'Drama', 'Horror', 'Mystery', 'Psychological', 'Supernatural']"
3,3640,-27.0785,3639,3701,Kaiba,"['Adventure', 'Mystery', 'Romance', 'Sci-Fi']"
4,7393,-28.4352,7392,7785,Yojouhan Shinwa Taikei,"['Comedy', 'Mystery', 'Psychological', 'Romance']"
5,873,-29.7006,872,885,Tenshi no Tamago,"['Avant Garde', 'Drama', 'Fantasy']"
6,6488,-30.7963,6487,6774,Kuuchuu Buranko,"['Comedy', 'Drama', 'Psychological', 'Seinen']"
7,1084,-31.0797,1083,1096,Kidou Keisatsu Patlabor 2 the Movie,"['Drama', 'Mecha', 'Military', 'Mystery', 'Police', 'Sci-Fi']"
8,2200,-31.115,2199,2216,Shigurui,"['Action', 'Drama', 'Historical', 'Martial Arts', 'Psychological', 'Samurai', 'Seinen']"
9,2229,-31.2368,2228,2246,Mononoke,"['Demons', 'Fantasy', 'Historical', 'Horror', 'Mystery', 'Psychological', 'Seinen', 'Supernatural']"
10,336,-31.2379,335,339,Serial Experiments Lain,"['Avant Garde', 'Drama', 'Mystery', 'Psychological', 'Sci-Fi', 'Supernatural']"
