In [1]:
name = "../../data/recommendations/ItemSimilarity"
residual_alphas = ["UserItemBiases"];

In [2]:
import BSON

In [3]:
using NBInclude
@nbinclude("../TrainingAlphas/Alpha.ipynb");

In [4]:
anime_to_uid = DataFrame(CSV.File("../../data/processed_data/anime_to_uid.csv"));
anime = DataFrame(CSV.File("../../data/raw_data/anime.csv", ntasks = 1));
anime_to_uid = innerjoin(anime_to_uid, anime, on = "anime_id");

## Rating correlation

In [5]:
@memoize function get_norms(R)
    norms = map(norm, eachslice(R, dims = 2))
    norms[norms.==0] .= 1 # prevent division by 0
    norms
end;

function get_correlation_neighborhood(item, R, K)
    norms = get_norms(R)
    weights = vec(R[:, item]' * R) ./ norms ./ norms[item]
    order = sortperm(weights, rev = true)[1:K]
    order[1:K], weights[order[1:K]]
end

const allsplits = RatingsDataset64(
    reduce(
        cat,
        [
            get_residuals("training", residual_alphas),
            get_residuals("validation", residual_alphas),
            get_residuals("test", residual_alphas),
        ],
    ),
)

R = sparse(
    allsplits.user,
    allsplits.item,
    allsplits.rating,
    maximum(allsplits.user),
    maximum(allsplits.item),
);

## Embedding distance

In [33]:
# function get_embedding_neighborhood(item, kernel, K, p)
#     dists = map(x -> norm(x, p), eachslice(kernel .- kernel[:, item], dims = 2))
#     weights = -dists
#     order = sortperm(dists)[1:K]
#     order[1:K], weights[order[1:K]]
# end

function get_embedding_neighborhood(item, kernel, K, p)
    weights = item_embeddings' * item_embeddings[:, item]
    order = sortperm(weights, rev = true)[1:K]
    order[1:K], weights[order[1:K]]
end

function normalize_rows(X)
    σ = std(X, dims = 2)
    μ = mean(X, dims = 2)
    (X .- μ) ./ σ / sqrt(size(X)[2] - 1)
end

BSON.@load read_params("GNN.1")["model"] m
item_embeddings = collect(normalize_rows(m[end].paths[2][end].W[1:end-1, :])')
#item_embeddings = collect(m[end].paths[2][end].W[1:end-1, :]')



# item_embeddings = normalize_rows(
#     vcat(
#         [
#             collect(read_params("MatrixFactorization.10")["A"]')
#             collect(read_params("MatrixFactorization.20")["A"]')
#             collect(read_params("MatrixFactorization.40")["A"]')
#         ],
#     ),
# );

In [34]:
sum(item_embeddings[:, 1] .^ 2)

0.9999999489956629

In [35]:
item_embeddings

512×18952 Matrix{Float64}:
  0.0106984     0.0248302   -0.0297881    …  -0.0687551   -0.0989704
  0.0276485     0.0265447    0.0273609       -0.0809831    0.0057281
  0.017943      0.0241626    0.0225752       -0.0467197   -0.0126306
 -0.00839203   -0.00366141   0.00450299      -0.015713     0.019321
  0.0544781     0.0778828    0.0634758        0.114164    -0.0138424
  0.0241217     0.0322068   -0.000396235  …  -0.00387985   0.0193752
 -0.0122532    -0.0178925   -0.0111085        0.0261117    0.015431
  0.015314      0.0144015    0.0055518       -0.0261875    0.00188437
 -0.0271665    -0.0148956   -0.0172454        0.0279557   -0.0742629
  0.0248413     0.0367377    0.00520447       0.0378188    0.0256004
 -0.0415802    -0.0324428    0.0182154    …  -0.00470098  -0.00864064
  0.000615042   0.0148209    0.0118555        0.0364203    0.0296078
 -0.04047      -0.0335678   -0.0486003       -0.00776298   0.00271118
  ⋮                                       ⋱   ⋮           
 -0.089114     -

# Compute Neighborhoods

In [36]:
uid = only(anime_to_uid[anime_to_uid.anime_id.==189, :uid])

166

In [37]:
items, weights = get_correlation_neighborhood(uid + 1, R, 30);
corr_df = DataFrame(anime = items, weight = weights);

In [38]:
items, weights = get_embedding_neighborhood(uid + 1, item_embeddings, 30, 2);
embed_df = DataFrame(anime = items, weight = weights);

# Display Similar Series

In [39]:
function get_series(df)
    df = copy(df)
    df[!, :uid] = df[:, :anime] .- 1
    sim = innerjoin(df, anime_to_uid, on = "uid")
    sort(sim, :weight, rev = true)
end;

In [40]:
ENV["LINES"] = 30;

In [41]:
get_series(corr_df)

Unnamed: 0_level_0,anime,weight,uid,anime_id,title
Unnamed: 0_level_1,Int64,Float32,Int64,Int64,String
1,167,0.999997,166,189,Love Hina
2,168,0.3949,167,190,Love Hina Again
3,169,0.32214,168,191,Love Hina Christmas Special: Silent Eve
4,170,0.309668,169,192,Love Hina Haru Special: Kimi Sakura Chiru Nakare!!
5,869,0.255753,868,963,"Love Hina: Motoko no Sentaku, Koi ka Ken... ""Naku na"""
6,2898,0.192223,2897,3247,Love Hina Final Selection
7,173,0.0830307,172,195,Onegai☆Teacher
8,41,0.0784482,40,59,Chobits
9,136,0.0775308,135,157,Mahou Sensei Negima!
10,32,0.0717814,31,50,Aa! Megami-sama! (TV)


In [42]:
get_series(embed_df)

Unnamed: 0_level_0,anime,weight,uid,anime_id,title
Unnamed: 0_level_1,Int64,Float64,Int64,Int64,String
1,167,1.0,166,189,Love Hina
2,168,0.840407,167,190,Love Hina Again
3,169,0.763562,168,191,Love Hina Christmas Special: Silent Eve
4,170,0.745847,169,192,Love Hina Haru Special: Kimi Sakura Chiru Nakare!!
5,869,0.695854,868,963,"Love Hina: Motoko no Sentaku, Koi ka Ken... ""Naku na"""
6,41,0.694563,40,59,Chobits
7,2898,0.678576,2897,3247,Love Hina Final Selection
8,32,0.672177,31,50,Aa! Megami-sama! (TV)
9,15,0.646117,14,24,School Rumble
10,225,0.626733,224,248,Ichigo 100%
