In [1]:
source = "Recommendations";

In [2]:
using NBInclude
@nbinclude("../ProductionAlphas/Alpha.ipynb");

In [3]:
ENV["COLUMNS"] = 999999;
ENV["LINES"] = 100;
recommend_related_series = false;

In [4]:
anime = DataFrame(CSV.File("../../data/raw_data/anime.csv", ntasks = 1))
anime_to_uid = DataFrame(CSV.File("../../data/processed_data/anime_to_uid.csv"))
anime_to_uid = innerjoin(anime_to_uid, anime, on = "anime_id");

In [5]:
predictions = read_recommendee_alpha("CombineSignals").rating
rating_df = DataFrame(
    "uid" => 0:length(predictions)-1,
    "rating" => predictions,
    "alpha" => predictions - read_recommendee_alpha("UserItemBiases").rating,
    "std" => read_recommendee_alpha("PredictedErrors").rating,
    "p" => read_recommendee_alpha("ANN").rating,
);
# penalize items for uncertainty. This can come from 1) model error or 2) insufficient training data
rating_df.score = rating_df.rating - 1 * rating_df.std;

In [6]:
# evaluate our insample predictions
df = get_recommendee_list()
@debug rmse(df.rating, predictions[df.item])
@debug mae(df.rating, predictions[df.item])
@debug r2(df.rating, predictions[df.item])

[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220123 23:37:09 0.8897539856867506
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220123 23:37:09 0.6647985557678926
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220123 23:37:09 0.5940442402636654


In [7]:
function dont_recommend!(df, mask)
    df.rating[mask] .= 0
    df.alpha[mask] .= 0
    df.score[mask] .= -Inf
end;

# don't recommend shows that the user has already seen before
dont_recommend!(rating_df, get_recommendee_list().item);

In [8]:
function dont_recommend!(df, mask)
    df.rating[mask] .= 0
    df.alpha[mask] .= -Inf
    df.score[mask] .= -Inf
    df.p[mask] .= 0
end;

# don't recommend shows that the user has already seen before
dont_recommend!(rating_df, get_recommendee_list().item)
dont_recommend!(rating_df, get_implicit_list().item)

# don't recommend shows related to shows they have seen before
if !recommend_related_series
    related_series =
        get_alpha("ItemCFRelated.strict_relations", "recommendee_inference").rating .!= 0
    dont_recommend!(rating_df, related_series)
end;

In [9]:
rec_df = innerjoin(anime_to_uid, rating_df, on = "uid");
rec_df = rec_df[
    :,
    [
        "anime_id",
        "uid",
        "title",
        "genres",
        "medium",
        "rating",
        "alpha",
        "std",
        "score",
        "p",
    ],
];

In [10]:
length(get_recommendee_list().item) + length(get_implicit_list().item)

621

In [11]:
median(rec_df.p), quantile(rec_df.p, 0.75), quantile(rec_df.p, 0.9)

(1.1855039616648355e-7, 5.043072292210127e-6, 5.259483368718069e-5)

In [12]:
1000 / length(filter(x -> x.medium == "tv", rec_df).p)

0.19146084625694046

In [13]:
filter(x -> x.anime_id == 26, rec_df)

Unnamed: 0_level_0,anime_id,uid,title,genres,medium,rating,alpha,std,score,p
Unnamed: 0_level_1,Int64,Int64,String,String,String7,Float64,Float64,Float64,Float64,Float64
1,26,24,Texhnolyze,"['Action', 'Drama', 'Psychological', 'Sci-Fi']",tv,8.1478,-0.322322,1.10292,7.04488,9.3912e-05


In [14]:
# the model predicts ratings conditional on the user watching the series
# we want to predict ratings for items the user has not already decided to watch
# to mitigate the sampling bias, we first filter to series that the user is likely to be interested in
min_alpha = quantile(filter(x -> x.medium == "tv", rec_df).alpha, 0.75);
min_p = quantile(filter(x -> x.medium == "tv", rec_df).p, 0.8);

In [15]:
min_alpha, min_p

(0.06329309495422208, 5.5020628497004574e-5)

In [16]:
min_alpha = 0.1

0.1

In [17]:
function sampling_filter(df)
    filter(x -> x.p >= min_p, filter(x -> x.alpha .> min_alpha, df))
end

sampling_filter (generic function with 1 method)

In [18]:
function pretty_display(df)
    df = sort(df, :score, rev = true)
    select(df, Not(:score))
end;

In [19]:
filter(x -> x.medium == "tv", rec_df |> sampling_filter) |> pretty_display

Unnamed: 0_level_0,anime_id,uid,title,genres,medium,rating,alpha,std,p
Unnamed: 0_level_1,Int64,Int64,String,String,String7,Float64,Float64,Float64,Float64
1,19,17,Monster,"['Drama', 'Horror', 'Mystery', 'Police', 'Psychological', 'Seinen', 'Suspense']",tv,9.57911,0.179291,0.783126,0.000450352
2,877,864,Nana,"['Comedy', 'Drama', 'Music', 'Romance', 'Shoujo', 'Slice of Life']",tv,9.42157,0.703496,0.69414,0.000161927
3,12431,10569,Uchuu Kyoudai,"['Comedy', 'Sci-Fi', 'Seinen', 'Slice of Life', 'Space']",tv,9.37633,0.404755,0.691916,0.000164613
4,12531,10615,Sakamichi no Apollon,"['Drama', 'Josei', 'Music', 'Romance', 'School']",tv,9.13093,0.374252,0.626664,0.00022669
5,5941,5678,Cross Game,"['Comedy', 'Drama', 'Romance', 'School', 'Shounen', 'Sports']",tv,9.23264,0.394292,0.736427,8.54747e-5
6,57,55,Beck,"['Comedy', 'Drama', 'Music', 'Shounen', 'Slice of Life']",tv,9.04127,0.471829,0.586103,0.000148104
7,12365,10541,Bakuman. 3rd Season,"['Comedy', 'Drama', 'Romance', 'Shounen']",tv,9.09822,0.182425,0.645067,0.000497861
8,239,235,Gankutsuou,"['Drama', 'Mystery', 'Sci-Fi', 'Supernatural', 'Suspense']",tv,9.07008,0.303079,0.633981,9.18222e-5
9,329,327,Planetes,"['Drama', 'Romance', 'Sci-Fi', 'Seinen', 'Space']",tv,9.05609,0.194288,0.646657,0.000155614
10,37965,5019,Kaze ga Tsuyoku Fuiteiru,"['Drama', 'Sports']",tv,8.94232,0.114608,0.590573,0.000372341


In [20]:
# sort!(
#     filter(
#         x -> x.p >= min_p,
#         filter(x -> x.alpha .> min_alpha, filter(x -> x.medium == "tv", rec_df)),
#     ),
#     :score,
#     rev = true,
# ).title[1:50]

In [21]:
rec_df |> sampling_filter |> pretty_display

Unnamed: 0_level_0,anime_id,uid,title,genres,medium,rating,alpha,std,p
Unnamed: 0_level_1,Int64,Int64,String,String,String7,Float64,Float64,Float64,Float64
1,19,17,Monster,"['Drama', 'Horror', 'Mystery', 'Police', 'Psychological', 'Seinen', 'Suspense']",tv,9.57911,0.179291,0.783126,0.000450352
2,877,864,Nana,"['Comedy', 'Drama', 'Music', 'Romance', 'Shoujo', 'Slice of Life']",tv,9.42157,0.703496,0.69414,0.000161927
3,12431,10569,Uchuu Kyoudai,"['Comedy', 'Sci-Fi', 'Seinen', 'Slice of Life', 'Space']",tv,9.37633,0.404755,0.691916,0.000164613
4,12531,10615,Sakamichi no Apollon,"['Drama', 'Josei', 'Music', 'Romance', 'School']",tv,9.13093,0.374252,0.626664,0.00022669
5,5941,5678,Cross Game,"['Comedy', 'Drama', 'Romance', 'School', 'Shounen', 'Sports']",tv,9.23264,0.394292,0.736427,8.54747e-5
6,12355,10536,Ookami Kodomo no Ame to Yuki,"['Fantasy', 'Slice of Life']",movie,9.21048,0.247287,0.734339,0.000677486
7,57,55,Beck,"['Comedy', 'Drama', 'Music', 'Shounen', 'Slice of Life']",tv,9.04127,0.471829,0.586103,0.000148104
8,12365,10541,Bakuman. 3rd Season,"['Comedy', 'Drama', 'Romance', 'Shounen']",tv,9.09822,0.182425,0.645067,0.000497861
9,239,235,Gankutsuou,"['Drama', 'Mystery', 'Sci-Fi', 'Supernatural', 'Suspense']",tv,9.07008,0.303079,0.633981,9.18222e-5
10,329,327,Planetes,"['Drama', 'Romance', 'Sci-Fi', 'Seinen', 'Space']",tv,9.05609,0.194288,0.646657,0.000155614
