In [1]:
name = "../../data/recommendations/ItemSimilarity"
residual_alphas = ["UserItemBiases"];

In [2]:
import BSON

In [3]:
using NBInclude
@nbinclude("../TrainingAlphas/Alpha.ipynb");

In [4]:
anime_to_uid = DataFrame(CSV.File("../../data/processed_data/anime_to_uid.csv"));
anime = DataFrame(CSV.File("../../data/raw_data/anime.csv", ntasks = 1));
anime_to_uid = innerjoin(anime_to_uid, anime, on = "anime_id");

## Rating correlation

In [5]:
@memoize function get_norms(R)
    norms = map(norm, eachslice(R, dims = 2))
    norms[norms.==0] .= 1 # prevent division by 0
    norms
end;

function get_correlation_neighborhood(item, R, K)
    norms = get_norms(R)
    weights = vec(R[:, item]' * R) ./ norms ./ norms[item]
    order = sortperm(weights, rev = true)[1:K]
    order[1:K], weights[order[1:K]]
end

const allsplits = RatingsDataset64(
    reduce(
        cat,
        [
            get_residuals("training", residual_alphas),
            get_residuals("validation", residual_alphas),
            get_residuals("test", residual_alphas),
        ],
    ),
)

R = sparse(
    allsplits.user,
    allsplits.item,
    allsplits.rating,
    maximum(allsplits.user),
    maximum(allsplits.item),
);

## Embedding distance

In [6]:
# function get_embedding_neighborhood(item, kernel, K, p)
#     dists = map(x -> norm(x, p), eachslice(kernel .- kernel[:, item], dims = 2))
#     weights = -dists
#     order = sortperm(dists)[1:K]
#     order[1:K], weights[order[1:K]]
# end

function get_embedding_neighborhood(item, kernel, K, p)
    weights = item_embeddings' * item_embeddings[:, item]
    order = sortperm(weights, rev = true)[1:K]
    order[1:K], weights[order[1:K]]
end

function normalize_rows(X)
    σ = std(X, dims = 2)
    μ = mean(X, dims = 2)
    (X .- μ) ./ σ / sqrt(size(X)[2] - 1)
end

BSON.@load read_params("GNN.1")["model"] m
item_embeddings = collect(normalize_rows(m[end].paths[2][end].W[1:end-1, :])')
#item_embeddings = collect(m[end].paths[2][end].W[1:end-1, :]')



# item_embeddings = normalize_rows(
#     vcat(
#         [
#             collect(read_params("MatrixFactorization.10")["A"]')
#             collect(read_params("MatrixFactorization.20")["A"]')
#             collect(read_params("MatrixFactorization.40")["A"]')
#         ],
#     ),
# );

# Compute Neighborhoods

In [9]:
uid = only(anime_to_uid[anime_to_uid.anime_id.==3326, :uid])

2954

In [10]:
items, weights = get_correlation_neighborhood(uid + 1, R, 30);
corr_df = DataFrame(anime = items, weight = weights);

In [11]:
items, weights = get_embedding_neighborhood(uid + 1, item_embeddings, 30, 2);
embed_df = DataFrame(anime = items, weight = weights);

# Display Similar Series

In [12]:
function get_series(df)
    df = copy(df)
    df[!, :uid] = df[:, :anime] .- 1
    sim = innerjoin(df, anime_to_uid, on = "uid")
    sort(sim, :weight, rev = true)
end;

In [13]:
ENV["LINES"] = 30;

In [14]:
get_series(corr_df)

Unnamed: 0_level_0,anime,weight,uid,anime_id,title,alternative_titles
Unnamed: 0_level_1,Int64,Float32,Int64,Int64,String,String
1,2955,0.999997,2954,3326,Inaka Isha,"{'synonyms': ['Kafka Inaka Isha', 'Ein Landarzt'], 'en': ""Franz Kafka's A Country Doctor"", 'ja': 'カフカ 田舎医者'}"
2,1240,0.19679,1239,1370,Atama Yama,"{'synonyms': ['Atama-yama'], 'en': 'Mt. Head', 'ja': '頭山'}"
3,3641,0.128019,3640,4458,Bavel no Hon,"{'synonyms': [], 'en': ""Bavel's Book"", 'ja': 'バベルの本'}"
4,1099,0.126001,1098,1205,Aru Tabibito no Nikki,"{'synonyms': ['The Diary of Tortov Roddle', ""A Traveller's Diary""], 'en': '', 'ja': '或る旅人の日記'}"
5,565,0.12368,564,601,Nekojiru-sou,"{'synonyms': [], 'en': 'Cat Soup', 'ja': 'ねこぢる草'}"
6,4817,0.123217,4816,6935,Enkinhou no Hako: Hakase no Sagashimono,"{'synonyms': ['Perspektivenbox'], 'en': ""The Researcher's Search - Perspectivenbox"", 'ja': '遠近法の箱-博士のさがしもの'}"
7,3487,0.119318,3486,4095,Chuumon no Ooi Ryouriten (1991),"{'synonyms': ['A Well-Ordered Restaurant', 'The Restaurant of Many Orders'], 'en': '', 'ja': '注文の多い料理店'}"
8,6311,0.118962,6310,10688,Muybridge no Ito,"{'synonyms': [""Muybridge's Strings""], 'en': '', 'ja': 'マイブリッジの糸'}"
9,2878,0.114922,2877,3215,Fuyu no Hi,"{'synonyms': [], 'en': 'Winter Days', 'ja': '冬の日'}"
10,4930,0.110809,4929,7243,Suisei,"{'synonyms': [], 'en': 'Aquatic', 'ja': '水棲'}"


In [15]:
get_series(embed_df)

Unnamed: 0_level_0,anime,weight,uid,anime_id,title,alternative_titles
Unnamed: 0_level_1,Int64,Float64,Int64,Int64,String,String
1,2955,1.0,2954,3326,Inaka Isha,"{'synonyms': ['Kafka Inaka Isha', 'Ein Landarzt'], 'en': ""Franz Kafka's A Country Doctor"", 'ja': 'カフカ 田舎医者'}"
2,1240,0.663072,1239,1370,Atama Yama,"{'synonyms': ['Atama-yama'], 'en': 'Mt. Head', 'ja': '頭山'}"
3,3643,0.644736,3642,4460,Kikumana,"{'synonyms': [], 'en': '', 'ja': 'キクマナ'}"
4,565,0.637754,564,601,Nekojiru-sou,"{'synonyms': [], 'en': 'Cat Soup', 'ja': 'ねこぢる草'}"
5,1099,0.633924,1098,1205,Aru Tabibito no Nikki,"{'synonyms': ['The Diary of Tortov Roddle', ""A Traveller's Diary""], 'en': '', 'ja': '或る旅人の日記'}"
6,975,0.597691,974,1072,Sweat Punch,"{'synonyms': ['Deep Imagination', 'Dan Petory Kyoushu no Yuutsu', ""Professor Dan Petry's Blues"", 'End of the World', 'Kigeki', 'Comedy', 'Higan', 'Beyond', 'Garakuta no Machi', 'Junk Town'], 'en': 'Sweat Punch', 'ja': 'スウェットパンチ'}"
7,2601,0.581913,2600,2832,Ani*Kuri15,"{'synonyms': ['Ani*Cre15', 'Ani Kuri 15', 'Ani-Kuri 15', 'Anikuri 15', 'Neko no Shuukai'], 'en': '', 'ja': 'アニ＊クリ15'}"
8,3080,0.579666,3079,3508,Genius Party,"{'synonyms': ['Shanghai Dragon', 'Deathtic 4', 'Doorbell', 'Limit Cycle', 'Happy Machine', 'Baby Blue'], 'en': '', 'ja': 'ジーニアスパーティー'}"
9,4742,0.570622,4741,6795,Genius Party Beyond,"{'synonyms': ['Genius Party 2', 'Gala', 'Moondrive', 'Wanwa the Puppy', 'Tojin Kit', 'Dimension Bomb'], 'en': '', 'ja': 'ジーニアスパーティービヨンド'}"
10,3983,0.56256,3982,5158,Noisy Birth,"{'synonyms': [], 'en': '', 'ja': '我ハ機ナリ'}"
