In [None]:
source = "Recommendations";

In [None]:
using DataFrames
import CSV
import NBInclude: @nbinclude
import Statistics: mean, quantile
@nbinclude("../InferenceAlphas/Alpha.ipynb")
@nbinclude("../InferenceAlphas/Ensemble/SuppressImplicit.ipynb");;

In [None]:
@assert username == get_recommendee_username()

In [None]:
ENV["COLUMNS"] = 999999;
ENV["LINES"] = 200;

In [None]:
const anime = DataFrame(CSV.File("../../data/processed_data/anime.csv", ntasks = 1))
anime_to_uid = DataFrame(CSV.File("../../data/processed_data/anime_to_uid.csv"))
anime_to_uid = innerjoin(anime_to_uid, anime, on = "anime_id");

In [None]:
rating_df = DataFrame(
    "uid" => 0:num_items()-1,
    "ranking" => read_recommendee_alpha("BPRT", "all").rating,
    "explicit" => read_recommendee_alpha("Explicit", "all").rating,
    "implicit" =>
        read_recommendee_suppressed_alpha("LinearImplicit", "all", true).rating,
    "error_explicit" => read_recommendee_alpha("ErrorExplicit", "all").rating,
    "error_implicit" => read_recommendee_alpha("ErrorImplicit", "all").rating,
    "baseline_explicit" =>
        read_recommendee_alpha("ExplicitUserItemBiases", "all").rating,
    "baseline_implicit" =>
        read_recommendee_suppressed_alpha(
            "NeuralImplicitUserItemBiases",
            "all",
            true,
        ).rating,
)
rating_df[:, "score"] = rating_df[:, "ranking"];

In [None]:
# evaluate our insample explicit predictions
df = get_recommendee_split(false)
rss = sum((df.rating - rating_df.explicit[df.item]) .^ 2)
tss = sum((df.rating .- mean(df.rating)) .^ 2)
@info "Insample RMSE: $(sqrt(rss / length(df.rating)))"
@info "Insample R2: $(1 - rss / tss)"

In [None]:
# don't recommend shows that the user has already seen before
rating_df[:, "seen"] .= false
seen_items = vcat(get_recommendee_split(true).item, get_recommendee_split(false).item)
rating_df.seen[seen_items] .= true

# don't recommend shows related to shows they have seen before
rating_df[:, "related"] .= false
related_series = read_recommendee_alpha("ItemCFRelated", "all").rating .!= 0
rating_df.related[related_series] .= true;

In [None]:
rec_df = innerjoin(anime_to_uid, rating_df, on = "uid");
keepcols = ["anime_id", "uid", "title", "genres", "medium"]
for x in names(rating_df)
    if x ∉ keepcols
        push!(keepcols, x)
    end
end
rec_df = rec_df[:, keepcols];

In [None]:
length(get_recommendee_split(false).item), length(get_recommendee_split(true).item)

In [None]:
function unseen_display(df)
    sort(filter(x -> !x.seen, df), :score)
end

function unrelated_display(df)
    filter(x -> !x.related, df)
end

function related_display(df)
    sort(filter(x -> !x.seen && x.related, df), :score)
end

function explicit_display(df)
    filter(x -> x.explicit > x.baseline_explicit, df)
end

function implicit_display(df)
    filter(x -> x.implicit > x.baseline_implicit, df)
end

function inv_explicit_display(df)
    filter(x -> x.explicit <= x.baseline_explicit, df)
end

function inv_implicit_display(df)
    filter(x -> x.implicit <= x.baseline_implicit, df)
end

function tv_display(df)
    filter(x -> x.medium == "tv", df)
end

function inv_tv_display(df)
    filter(x -> x.medium != "tv", df)
end;