In [1]:
source = "Recommendations";

In [2]:
using NBInclude
@nbinclude("../ProductionAlphas/Alpha.ipynb");

In [3]:
ENV["COLUMNS"] = 999999;
ENV["LINES"] = 200;
recommend_related_series = false;

In [4]:
anime = DataFrame(CSV.File("../../data/processed_data/anime.csv", ntasks = 1))
anime_to_uid = DataFrame(CSV.File("../../data/processed_data/anime_to_uid.csv"))
anime_to_uid = innerjoin(anime_to_uid, anime, on = "anime_id");

In [5]:
predictions = read_recommendee_alpha("CombineSignals").rating

rating_df = DataFrame(
    "uid" => 0:length(predictions)-1,
    "rating" => predictions,
    "alpha" => predictions - read_recommendee_alpha("UserItemBiases").rating,
    "std" => read_recommendee_alpha("PredictedErrors").rating,
    "p" => read_recommendee_alpha("CombineImplicitSignals").rating,
);
# penalize items for uncertainty
rating_df.score = rating_df.rating - 1 * rating_df.std; # TODO optimize this

In [6]:
# evaluate our insample predictions
df = get_recommendee_list()
@debug rmse(df.rating, rating_df.rating[df.item])
@debug mae(df.rating, rating_df.rating[df.item])
@debug r2(df.rating, rating_df.rating[df.item])
@debug sparse_crossentropy(rating_df.p[df.item])

[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220405 05:49:45 1.3159882
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220405 05:49:46 0.996133
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220405 05:49:46 0.42453164
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220405 05:49:46 7.506049406902885


In [7]:
function dont_recommend!(df, mask)
    df.rating[mask] .= 0
    df.alpha[mask] .= -Inf
    df.score[mask] .= -Inf
    df.p[mask] .= 0
end;

# don't recommend shows that the user has already seen before
dont_recommend!(rating_df, get_implicit_list().item)

# don't recommend shows related to shows they have seen before
if !recommend_related_series
    related_series =
        get_alpha("ItemCF.Related.strict_relations", "recommendee_inference").rating .!= 0
    dont_recommend!(rating_df, related_series)
end;

# renormalize p
rating_df.p = rating_df.p ./ sum(rating_df.p);

In [8]:
rec_df = innerjoin(anime_to_uid, rating_df, on = "uid");
rec_df = rec_df[
    :,
    [
        "anime_id",
        "uid",
        "title",
        "genres",
        "tags",
        "medium",
        "rating",
        "alpha",
        "std",
        "score",
        "p",
        "nsfw",
    ],
];

In [9]:
length(get_recommendee_list().item), length(get_implicit_list().item)

(425, 428)

In [10]:
median(rec_df.p), quantile(rec_df.p, 0.75), quantile(rec_df.p, 0.9)

(4.0430183f-7, 6.002851023367839e-6, 7.098949572537077e-5)

In [11]:
# TODO multiple panels
# TODO optmize for diversity

In [12]:
# the model predicts ratings conditional on the user watching the series
# we want to predict ratings for items the user has not already decided to watch
# to mitigate the sampling bias, we first filter to series that the user is likely to be interested in
min_p = quantile(filter(x -> x.medium == "tv", rec_df).p, 0.9);
min_alpha = quantile(filter(x -> x.medium == "tv", rec_df).alpha, 0.75)
min_p, min_alpha

(0.000330962822772563, 0.2035200595855713)

In [17]:
function sampling_filter(df)
    filter(x -> x.p >= min_p, filter(x -> x.alpha .> min_alpha, df))
end;

In [18]:
function pretty_display(df)
    df = sort(df, :score, rev = true)
    select(df, Not(:score))
end;

In [21]:
filter(x -> x.medium == "tv", rec_df |> sampling_filter) |> pretty_display

Unnamed: 0_level_0,anime_id,uid,title,genres,tags,medium,rating,alpha,std,p,nsfw
Unnamed: 0_level_1,Int64,Int64,String,String,String,String7,Float32,Float32,Float32,Float32,String7
1,5941,4377,Cross Game,"['Comedy', 'Drama', 'Romance', 'School', 'Shounen', 'Sports']","['asia', 'baseball', 'based on a manga', 'childhood friends', 'comedy', 'coming of age', 'daily life', 'death of a loved one', 'drama', 'female protagonist', 'ganbatte', 'hero of strong character', 'heterosexual', 'high school', 'japan', 'love triangle', 'male protagonist', 'manga', 'present', 'primarily teen cast', 'romance', 'school', 'school club', 'school clubs', 'school life', 'shounen', 'slice of life', 'sports', 'time skip', 'tomboy', 'tragedy', 'tsundere']",tv,7.4277,0.337576,0.885456,0.00229137,white
2,31988,11173,Hibike! Euphonium 2,"['Drama', 'Music', 'School']","['angst', 'asia', 'band', 'based on a novel', 'coming of age', 'cute girls doing cute things', 'drama', 'earth', 'ensemble cast', 'family life', 'female protagonist', 'heterosexual', 'high school', 'japan', 'kuudere', 'music', 'novel', 'present', 'primarily female cast', 'primarily teen cast', 'school', 'school club', 'school clubs', 'school life', 'slice of life', 'slice of life drama', 'the arts']",tv,7.32109,0.285182,0.890535,0.00340291,white
3,1065,967,Touch,"['Drama', 'Romance', 'School', 'Shounen', 'Slice of Life', 'Sports']","['action', 'asia', 'baseball', 'based on a manga', 'boxing', 'childhood friends', 'comedy', 'coming of age', 'daily life', 'drama', 'earth', 'ganbatte', 'gymnastics', 'heterosexual', 'high school', 'japan', 'love polygon', 'love triangle', 'male protagonist', 'manga', 'neighbors', 'present', 'romance', 'rural', 'school', 'school club', 'school clubs', 'school life', 'shounen', 'siblings', 'slice of life', 'sports', 'tragedy', 'twins']",tv,7.36855,0.476731,1.00349,0.000645495,white
4,39570,15596,High Score Girl II,"['Comedy', 'Game', 'Romance', 'School', 'Seinen']","['achronological order', 'based on a manga', 'brain games and gambling', 'cg animation', 'comedy', 'coming of age', 'drama', 'full cgi', 'game', 'heterosexual', 'historical', 'love polygon', 'love triangle', 'manga', 'middle school', 'mopeds', 'otaku', 'present', 'primarily child cast', 'primarily teen cast', 'romance', 'romantic comedy', 'school', 'school life', 'seinen', 'slapstick', 'slice of life', 'tsundere', 'urban', 'video game industry', 'video games', 'violent retribution for accidental infringement']",tv,7.19783,0.492701,0.855596,0.0012943,white
5,32843,11538,Senki Zesshou Symphogear XV,"['Action', 'Music', 'Sci-Fi']","['action', 'aliens', 'bionic powers', 'drama', 'drugs', 'ensemble cast', 'fantasy', 'female protagonist', 'gods', 'guns', 'henshin', 'idol', 'kemonomimi', 'lost civilization', 'magical girl', 'mahou shoujo', 'monster', 'monster girl', 'motorcycles', 'music', 'mythology', 'new', 'original work', 'philosophy', 'power suit', 'primarily female cast', 'sci fi', 'sci-fi', 'science fiction', 'science-fiction', 'superpowers', 'swordplay', 'the arts', 'tragedy', 'violence', 'yuri']",tv,7.43084,0.557417,1.12153,0.000331769,white
6,25835,9373,Shirobako,"['Comedy', 'Drama']","['acting', 'anime & movie', 'anime industry', 'asia', 'cars', 'comedy', 'coming of age', 'coworkers', 'cute girls doing cute things', 'daily life', 'drama', 'drawing', 'earth', 'educational', 'ensemble cast', 'female protagonist', 'japan', 'kuudere', 'meta', 'new', 'office lady', 'original work', 'otaku culture', 'parody', 'plot continuity', 'present', 'primarily adult cast', 'primarily female cast', 'primarily teen cast', 'seinen', 'slice of life', 'the arts', 'voice acting', 'work', 'work life', 'working life']",tv,7.21804,0.229051,0.910562,0.00380729,white
7,135,114,Hikaru no Go,"['Comedy', 'Game', 'Shounen', 'Supernatural']","['asia', 'based on a manga', 'board games', 'body sharing', 'brain games and gambling', 'comedy', 'coming of age', 'drama', 'foreign', 'game', 'ganbatte', 'ghost', 'ghosts', 'go', 'japan', 'male protagonist', 'manga', 'plot continuity', 'present', 'psychological', 'school club', 'shounen', 'sports', 'supernatural', 'tournaments']",tv,7.1032,0.497078,0.898467,0.00151005,white
8,38993,15192,Karakai Jouzu no Takagi-san 2,"['Comedy', 'Romance', 'School', 'Shounen', 'Slice of Life']","['based on a manga', 'comedy', 'episodic', 'hero of weak character', 'heroine of strong character', 'heterosexual', 'iyashikei', 'manga', 'middle school', 'present', 'primarily child cast', 'romance', 'romantic comedy', 'romantic subtext', 'rural', 'school', 'school life', 'shounen', 'slapstick', 'slice of life', 'verbal comedy']",tv,6.92408,0.304835,0.749921,0.000695622,white
9,1530,1388,Kanon (2006),"['Drama', 'Romance', 'Slice of Life', 'Supernatural']","['age regression', 'amnesia', 'asia', 'based on a visual novel', 'comedy', 'coming of age', 'contemporary fantasy', 'demons', 'drama', 'earth', 'erotic game', 'family life', 'fantasy', 'female harem', 'game', 'ghost', 'harem', 'heterosexual', 'high school', 'japan', 'kuudere', 'male protagonist', 'moe', 'present', 'primarily female cast', 'romance', 'school', 'school life', 'seinen', 'sentimental drama', 'slice of life', 'stereotypes', 'suicide', 'supernatural', 'supernatural drama', 'tragedy', 'transfer students', 'tsundere', 'twisted story', 'visual novel']",tv,7.11519,0.662182,0.971808,0.00263822,white
10,27989,9666,Hibike! Euphonium,"['Drama', 'Music', 'School']","['angst', 'asia', 'band', 'based on a novel', 'coming of age', 'cute girls doing cute things', 'drama', 'earth', 'ensemble cast', 'female protagonist', 'high school', 'japan', 'music', 'novel', 'present', 'primarily female cast', 'primarily teen cast', 'school', 'school club', 'school clubs', 'school life', 'slice of life', 'slice of life drama', 'the arts']",tv,7.14327,0.471976,1.04464,0.00314484,white


In [19]:
rec_df |> sampling_filter) |> pretty_display

ErrorException: Parsing error for input occurred on line 1, offset: 29

In [20]:
# 	anime_id	uid	title	genres	medium	rating	alpha	std	p	nsfw
# Int64	Int64	String	String?	String7	Float32	Float32	Float32	Float32	String7
# 1	31988	11173	Hibike! Euphonium 2	['Drama', 'Music', 'School']	tv	7.38253	0.356698	0.792334	0.00168306	white
# 2	1065	967	Touch	['Drama', 'Romance', 'School', 'Shounen', 'Slice of Life', 'Sports']	tv	7.34929	0.467546	0.908755	0.00076882	white
# 3	27989	9666	Hibike! Euphonium	['Drama', 'Music', 'School']	tv	7.23239	0.571174	0.868754	0.00258483	white
# 4	1858	1694	Gakuen Utopia Manabi Straight!	['Comedy', 'School', 'Slice of Life']	tv	7.09079	1.20158	0.729596	0.00062308	white
# 5	25835	9373	Shirobako	['Comedy', 'Drama']	tv	7.14048	0.161561	0.805839	0.00261632	white
# 6	5941	4377	Cross Game	['Comedy', 'Drama', 'Romance', 'School', 'Shounen', 'Sports']	tv	7.23503	0.154975	0.9538	0.00104641	white
# 7	39570	15596	High Score Girl II	['Comedy', 'Game', 'Romance', 'School', 'Seinen']	tv	7.01997	0.324915	0.878117	0.000592566	white
# 8	1530	1388	Kanon (2006)	['Drama', 'Romance', 'Slice of Life', 'Supernatural']	tv	7.16325	0.720318	1.03834	0.00227492	white
# 9	488	458	Ichigo Mashimaro	['Comedy', 'Slice of Life']	tv	6.9334	0.724677	0.852129	0.00104676	white
# 10	122	101	Full Moon wo Sagashite	['Comedy', 'Drama', 'Music', 'Romance', 'Shoujo', 'Supernatural']	tv	6.94031	0.727181	0.869193	0.000667557	white
# 11	21877	8534	High Score Girl	['Comedy', 'Game', 'Romance', 'School', 'Seinen']	tv	6.88657	0.471743	0.941942	0.000968524	white
# 12	38993	15192	Karakai Jouzu no Takagi-san 2	['Comedy', 'Romance', 'School', 'Shounen', 'Slice of Life']	tv	6.78872	0.179552	0.993493	0.000348786	white
# 13	532	499	Bishoujo Senshi Sailor Moon S	['Drama', 'Romance', 'Shoujo']	tv	6.63147	0.41036	0.84895	0.000437212	white
# 14	135	114	Hikaru no Go	['Comedy', 'Game', 'Shounen', 'Supernatural']	tv	6.74995	0.153899	1.02637	0.000676344	white
# 15	30727	10611	Saenai Heroine no Sodatekata ♭	['Comedy', 'Ecchi', 'Harem', 'Romance', 'School']	tv	6.59874	0.364116	0.876918	0.000742145	white
# 16	593	556	Mugen no Ryvius	['Drama', 'Mecha', 'Military', 'Psychological', 'Sci-Fi', 'Space']	tv	6.68717	0.304934	0.972565	0.00101426	white
# 17	34902	12650	Tsurezure Children	['Comedy', 'Romance', 'School', 'Shounen']	tv	6.51234	0.364974	0.807823	0.00102669	white
# 18	59	40	Chobits	['Comedy', 'Drama', 'Ecchi', 'Romance', 'Sci-Fi', 'Seinen']	tv	6.6161	0.883723	0.948174	0.00179486	white
# 19	38753	15022	Araburu Kisetsu no Otome-domo yo.	['Comedy', 'Drama', 'Romance', 'School', 'Shounen']	tv	6.52573	0.495667	0.864833	0.000905996	white
# 20	31771	11089	Amanchu!	['Comedy', 'School', 'Shounen', 'Slice of Life']	tv	6.58394	0.685901	0.926502	0.000584655	white
# 21	35860	13252	Karakai Jouzu no Takagi-san	['Comedy', 'Romance', 'School', 'Shounen', 'Slice of Life']	tv	6.59876	0.432538	0.977115	0.000852553	white
# 22	427	400	Kaleido Star	['Comedy', 'Drama', 'Fantasy', 'Shoujo', 'Sports']	tv	6.57368	0.344165	0.97538	0.000534053	white
# 23	40530	16120	Jaku-Chara Tomozaki-kun	['Drama', 'Romance', 'School']	tv	6.47727	0.923203	0.894883	0.000440828	white
# 24	874	789	Digimon Tamers	['Adventure', 'Comedy', 'Drama', 'Fantasy', 'Shounen']	tv	6.52593	0.48561	0.980111	0.000362535	white
# 25	35240	12889	Princess Principal	['Action', 'Historical', 'Mystery']	tv	6.5926	0.377174	1.06195	0.000912882	white
# 26	42361	16907	Ijiranaide, Nagatoro-san	['Comedy', 'Romance', 'Slice of Life']	tv	6.60228	0.83992	1.08517	0.000529261	white
# 27	18897	7968	Nisekoi	['Comedy', 'Harem', 'Romance', 'School', 'Shounen']	tv	6.31667	0.286932	0.808886	0.00141164	white
# 28	1486	1348	Kodomo no Omocha (TV)	['Comedy', 'Drama', 'Romance', 'Shoujo']	tv	6.56859	0.231561	1.06944	0.000533452	white
# 29	35639	13136	Just Because!	['Drama', 'Romance', 'School', 'Slice of Life']	tv	6.46695	0.763056	0.99794	0.000684533	white
# 30	1222	1114	Bokura ga Ita	['Drama', 'Romance', 'Shoujo', 'Slice of Life']	tv	6.70307	1.16039	1.23905	0.000494813	white
# 31	41619	16653	Munou na Nana	['Psychological', 'Shounen', 'Super Power', 'Supernatural', 'Suspense']	tv	6.41985	0.698886	0.958501	0.000440847	white
# 32	14131	7087	Girls & Panzer	['Action', 'Military', 'School', 'Sports']	tv	6.55237	0.586552	1.13453	0.00168731	white
# 33	552	517	Digimon Adventure	['Action', 'Adventure', 'Comedy', 'Fantasy', 'Kids']	tv	6.55298	0.365117	1.13609	0.000609451	white
# 34	40685	16205	Super Cub	['School', 'Slice of Life']	tv	6.35695	0.364119	0.944397	0.000580612	white
# 35	37890	14482	Oshi ga Budoukan Ittekuretara Shinu	['Comedy', 'Girls Love', 'Music', 'Seinen', 'Slice of Life']	tv	6.20357	0.206216	0.801772	0.000379987	white
# 36	34984	12704	Koi wa Ameagari no You ni	['Drama', 'Romance', 'Seinen', 'Slice of Life']	tv	6.56196	0.419388	1.16207	0.00116559	white
# 37	3958	3399	Kannagi	['Comedy', 'School', 'Shounen', 'Supernatural']	tv	6.24875	0.494657	0.866859	0.00120494	white
# 38	31859	11120	Hai to Gensou no Grimgar	['Action', 'Adventure', 'Drama', 'Fantasy']	tv	6.24894	0.166416	0.876223	0.000963634	white
# 39	13333	6942	Tari Tari	['Music', 'School', 'Slice of Life']	tv	6.24726	0.466783	0.878362	0.00116383	white
# 40	587	550	Hanbun no Tsuki ga Noboru Sora	['Comedy', 'Drama', 'Romance']	tv	6.30107	0.38663	0.960221	0.00128816	white
# 41	8861	5539	Yosuga no Sora: In Solitude, Where We Are Least Alone.	['Drama', 'Ecchi', 'Harem', 'Romance']	tv	7.46844	3.02749	2.15606	0.00104767	gray
# 42	2986	2716	Bamboo Blade	['Comedy', 'School', 'Seinen', 'Sports']	tv	6.17986	0.452029	0.868882	0.000749638	white
# 43	35756	13204	Comic Girls	['Comedy', 'Slice of Life']	tv	6.22388	0.350058	0.919891	0.000430328	white
# 44	1087	987	Kimagure Orange☆Road	['Comedy', 'Drama', 'Romance', 'School', 'Shounen', 'Slice of Life', 'Super Power']	tv	6.38255	0.190579	1.07896	0.000837759	white
# 45	12149	6691	AKB0048	['Music', 'Sci-Fi']	tv	6.26354	0.61283	0.960399	0.00053292	white
# 46	330	306	Midori no Hibi	['Comedy', 'Drama', 'Ecchi', 'Romance', 'Shounen']	tv	6.17071	0.533026	0.875367	0.00092522	white
# 47	46093	17949	Shiroi Suna no Aquatope	['Drama', 'Slice of Life']	tv	6.36729	0.377252	1.0729	0.000547827	white
# 48	5150	3975	Hatsukoi Limited.	['Comedy', 'Romance', 'School', 'Shounen']	tv	6.09554	0.397329	0.811726	0.000535075	white
# 49	740	677	Bishoujo Senshi Sailor Moon R	['Demons', 'Romance', 'Shoujo']	tv	6.29016	0.24701	1.00655	0.000616277	white
# 50	37982	14550	Domestic na Kanojo	['Drama', 'Romance', 'School', 

In [21]:
rec_df |> sampling_filter |> pretty_display

Unnamed: 0_level_0,anime_id,uid,title,genres,medium,rating,alpha,std,p,nsfw
Unnamed: 0_level_1,Int64,Int64,String,String?,String7,Float32,Float32,Float32,Float32,String7
1,18617,7913,Girls & Panzer Movie,"['Military', 'School', 'Sports']",movie,7.59087,0.785378,0.967688,0.00234118,white
2,38081,14641,Girls & Panzer: Saishuushou Part 2,"['Military', 'School']",movie,7.39121,0.732039,0.824473,0.000470433,white
3,5941,4377,Cross Game,"['Comedy', 'Drama', 'Romance', 'School', 'Shounen', 'Sports']",tv,7.4277,0.337576,0.885456,0.00229137,white
4,38422,14831,High Score Girl: Extra Stage,"['Comedy', 'Game', 'Romance', 'School', 'Seinen']",ova,7.34944,0.677087,0.849232,0.00159978,white
5,31988,11173,Hibike! Euphonium 2,"['Drama', 'Music', 'School']",tv,7.32109,0.285182,0.890535,0.00340291,white
6,2454,2249,Kimagure Orange☆Road: Ano Hi ni Kaeritai,"['Comedy', 'Drama', 'Romance', 'Shounen', 'Slice of Life']",movie,7.24166,0.771602,0.847692,0.000486727,white
7,1065,967,Touch,"['Drama', 'Romance', 'School', 'Shounen', 'Slice of Life', 'Sports']",tv,7.36855,0.476731,1.00349,0.000645495,white
8,33970,12158,Girls & Panzer: Saishuushou Part 1,"['Military', 'School']",movie,7.23999,0.700789,0.877145,0.000920878,white
9,39570,15596,High Score Girl II,"['Comedy', 'Game', 'Romance', 'School', 'Seinen']",tv,7.19783,0.492701,0.855596,0.0012943,white
10,35678,13160,Hibike! Euphonium Movie 3: Chikai no Finale,"['Drama', 'Music', 'School']",movie,7.11935,0.559372,0.777997,0.0013574,white
