In [1]:
source = "Recommendations";

In [2]:
using NBInclude
@nbinclude("../ProductionAlphas/Alpha.ipynb");

In [3]:
ENV["COLUMNS"] = 999999;
ENV["LINES"] = 100;
recommend_related_series = false;

In [4]:
anime = DataFrame(CSV.File("../../data/raw_data/anime.csv", ntasks = 1))
anime_to_uid = DataFrame(CSV.File("../../data/processed_data/anime_to_uid.csv"))
anime_to_uid = innerjoin(anime_to_uid, anime, on = "anime_id");

In [5]:
predictions = read_recommendee_alpha("CombineSignals").rating
rating_df = DataFrame(
    "uid" => 0:length(predictions)-1,
    "rating" => predictions,
    "alpha" => predictions - read_recommendee_alpha("UserItemBiases").rating,
    "std" => read_recommendee_alpha("PredictedErrors").rating,
    "p" => read_recommendee_alpha("ANN").rating,
);
# penalize items for uncertainty. This can come from 1) model error or 2) insufficient training data
rating_df.score = rating_df.rating - 1 * rating_df.std;

In [6]:
# evaluate our insample predictions
df = get_recommendee_list()
@debug rmse(df.rating, predictions[df.item])
@debug mae(df.rating, predictions[df.item])
@debug r2(df.rating, predictions[df.item])

[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220124 06:00:22 1.2921525637618538
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220124 06:00:22 0.9872614997567055
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220124 06:00:22 0.4451926269017402


In [7]:
function dont_recommend!(df, mask)
    df.rating[mask] .= 0
    df.alpha[mask] .= -Inf
    df.score[mask] .= -Inf
    df.p[mask] .= 0
end;

# don't recommend shows that the user has already seen before
dont_recommend!(rating_df, get_recommendee_list().item)
dont_recommend!(rating_df, get_implicit_list().item)

# don't recommend shows related to shows they have seen before
if !recommend_related_series
    related_series =
        get_alpha("ItemCFRelated.strict_relations", "recommendee_inference").rating .!= 0
    dont_recommend!(rating_df, related_series)
end;

# renormalize p
rating_df.p = rating_df.p ./ sum(rating_df.p);

In [8]:
rec_df = innerjoin(anime_to_uid, rating_df, on = "uid");
rec_df = rec_df[
    :,
    [
        "anime_id",
        "uid",
        "title",
        "genres",
        "medium",
        "rating",
        "alpha",
        "std",
        "score",
        "p",
    ],
];

In [9]:
length(get_recommendee_list().item) + length(get_implicit_list().item)

420

In [10]:
median(rec_df.p), quantile(rec_df.p, 0.75), quantile(rec_df.p, 0.9)

(1.5763948237367375e-7, 2.1463280446115106e-5, 0.00013660620714230212)

In [11]:
# the model predicts ratings conditional on the user watching the series
# we want to predict ratings for items the user has not already decided to watch
# to mitigate the sampling bias, we first filter to series that the user is likely to be interested in
min_alpha = quantile(filter(x -> x.medium == "tv", rec_df).alpha, 0.75);
min_p = quantile(filter(x -> x.medium == "tv", rec_df).p, 0.8);

In [12]:
min_alpha, min_p

(0.19866622948507517, 0.00014881216963895372)

In [13]:
min_alpha = 0.1

0.1

In [14]:
function sampling_filter(df)
    filter(x -> x.p >= min_p, filter(x -> x.alpha .> min_alpha, df))
end

sampling_filter (generic function with 1 method)

In [15]:
filter(x -> x.title == "Monster", rec_df)

Unnamed: 0_level_0,anime_id,uid,title,genres,medium,rating,alpha,std,score,p
Unnamed: 0_level_1,Int64,Int64,String,String,String7,Float64,Float64,Float64,Float64,Float64
1,19,17,Monster,"['Drama', 'Horror', 'Mystery', 'Police', 'Psychological', 'Seinen', 'Suspense']",tv,6.31506,-1.37899,1.22866,5.0864,0.00324212


In [16]:
function pretty_display(df)
    df = sort(df, :score, rev = true)
    select(df, Not(:score))
end;

In [17]:
sort!(
    (filter(x -> x.medium == "tv", rec_df |> sampling_filter)|>pretty_display)[1:100, :],
    :p,
    rev = true,
).title[1:10]

10-element Vector{String}:
 "Hibike! Euphonium"
 "Shirobako"
 "Girls & Panzer"
 "Hibike! Euphonium 2"
 "Kanon (2006)"
 "Air"
 "The iDOLM@STER"
 "Chobits"
 "Rozen Maiden"
 "Kemono Friends"

In [18]:
filter(x -> x.medium == "tv", rec_df |> sampling_filter) |> pretty_display

Unnamed: 0_level_0,anime_id,uid,title,genres,medium,rating,alpha,std,p
Unnamed: 0_level_1,Int64,Int64,String,String,String7,Float64,Float64,Float64,Float64
1,31988,16650,Hibike! Euphonium 2,"['Drama', 'Music', 'School']",tv,7.31783,0.276794,0.860187,0.00227862
2,34914,2128,New Game!!,"['Comedy', 'Game', 'Slice of Life']",tv,7.01229,0.713709,0.633501,0.00114507
3,3750,3684,Maria-sama ga Miteru 4th,"['Girls Love', 'School', 'Shoujo', 'Slice of Life']",tv,7.17142,0.75794,0.80201,0.00015453
4,1530,1514,Kanon (2006),"['Drama', 'Romance', 'Slice of Life', 'Supernatural']",tv,7.22907,0.732846,0.889388,0.00197499
5,38993,5956,Karakai Jouzu no Takagi-san 2,"['Comedy', 'Romance', 'School', 'Shounen', 'Slice of Life']",tv,6.89854,0.300659,0.71322,0.000212542
6,488,484,Ichigo Mashimaro,"['Comedy', 'Slice of Life']",tv,6.90632,0.652097,0.725876,0.0015532
7,27989,15108,Hibike! Euphonium,"['Drama', 'Music', 'School']",tv,6.96863,0.296272,0.821908,0.00314564
8,1858,1841,Gakuen Utopia Manabi Straight!,"['Comedy', 'School', 'Slice of Life']",tv,6.89827,0.970434,0.808837,0.000309187
9,593,589,Mugen no Ryvius,"['Drama', 'Mecha', 'Military', 'Psychological', 'Sci-Fi', 'Space']",tv,7.15226,0.717948,1.09881,0.000212193
10,877,864,Nana,"['Comedy', 'Drama', 'Music', 'Romance', 'Shoujo', 'Slice of Life']",tv,7.2808,0.268505,1.23625,0.00128622


In [19]:
# sort!(
#     filter(
#         x -> x.p >= min_p,
#         filter(x -> x.alpha .> min_alpha, filter(x -> x.medium == "tv", rec_df)),
#     ),
#     :score,
#     rev = true,
# ).title[1:50]

In [20]:
rec_df |> sampling_filter |> pretty_display

Unnamed: 0_level_0,anime_id,uid,title,genres,medium,rating,alpha,std,p
Unnamed: 0_level_1,Int64,Int64,String,String,String7,Float64,Float64,Float64,Float64
1,42984,9352,Gotcha!,"['Fantasy', 'Music']",music,7.81981,0.641094,1.19288,0.000159476
2,31988,16650,Hibike! Euphonium 2,"['Drama', 'Music', 'School']",tv,7.31783,0.276794,0.860187,0.00227862
3,34914,2128,New Game!!,"['Comedy', 'Game', 'Slice of Life']",tv,7.01229,0.713709,0.633501,0.00114507
4,3750,3684,Maria-sama ga Miteru 4th,"['Girls Love', 'School', 'Shoujo', 'Slice of Life']",tv,7.17142,0.75794,0.80201,0.00015453
5,33970,1190,Girls & Panzer: Saishuushou Part 1,"['Military', 'School']",movie,7.21071,0.633273,0.867454,0.000155275
6,1729,1713,Maria-sama ga Miteru 3rd,"['Drama', 'Girls Love', 'School', 'Slice of Life']",ova,7.21715,0.748611,0.875576,0.00033268
7,1530,1514,Kanon (2006),"['Drama', 'Romance', 'Slice of Life', 'Supernatural']",tv,7.22907,0.732846,0.889388,0.00197499
8,32153,16711,Mahou Shoujo Madoka★Magica: Concept Movie,"['Drama', 'Psychological', 'Suspense']",movie,7.13749,1.37538,0.823932,0.000211705
9,38993,5956,Karakai Jouzu no Takagi-san 2,"['Comedy', 'Romance', 'School', 'Shounen', 'Slice of Life']",tv,6.89854,0.300659,0.71322,0.000212542
10,488,484,Ichigo Mashimaro,"['Comedy', 'Slice of Life']",tv,6.90632,0.652097,0.725876,0.0015532
