In [None]:
const source_name = "Recommendations";

In [None]:
using DataFrames
import CSV
import NBInclude: @nbinclude
import Statistics: mean, quantile, cor
@nbinclude("../InferenceAlphas/Alpha.ipynb");

In [None]:
ENV["DATAFRAMES_COLUMNS"] = 100;
ENV["DATAFRAMES_ROWS"] = 100;

In [None]:
function get_anime()
    return DataFrame(
        CSV.File("../../data/processed_data/anime.csv", ntasks = 1; stringtype = String),
    )
end;

In [None]:
const anime = get_anime()
anime_to_uid = DataFrame(CSV.File("../../data/processed_data/anime_to_uid.csv"))
anime_to_uid = innerjoin(anime_to_uid, anime, on = "animeid" => "anime_id");

In [None]:
get_alpha(x) = read_recommendee_alpha(x, "all").rating
rating_df = DataFrame(
    "uid" => 0:num_items()-1,
    "explicit" => get_alpha("Explicit"),
    "implicit" => get_alpha("LinearImplicit"),
    "ptw" => get_alpha("LinearPtw"),
    "baseline_explicit" => get_alpha("ExplicitUserItemBiases"),
    "baseline_implicit" => get_alpha("NeuralImplicitItemBiases"),
    "baseline_ptw" => get_alpha("NeuralPtwItemBiases"),
)
rating_df[:, "score"] = -get_alpha("BPR.Neural.Test");

In [None]:
# evaluate our insample explicit predictions
df = get_recommendee_split("explicit")
rss = sum((df.rating - rating_df.explicit[df.item]) .^ 2)
tss = sum((df.rating .- mean(df.rating)) .^ 2)
@info "RMSE: $(sqrt(rss / length(df.rating)))"
@info "R2: $(1 - rss / tss)"
@info "Correlation: $(cor(df.rating, rating_df.explicit[df.item]))"

In [None]:
# don't recommend shows that the user has already seen before
rating_df[:, "seen"] .= false
seen_items = get_recommendee_split("implicit").item
rating_df.seen[seen_items] .= true

# don't recommend shows related to shows they have seen before
rating_df[:, "related"] .= false
related_series = read_recommendee_alpha("ItemCFRelated", "all").rating .!= 0
rating_df.related[related_series] .= true;

In [None]:
rec_df = innerjoin(anime_to_uid, rating_df, on = "uid");
keepcols = ["animeid", "uid", "title", "genres", "medium", "num_episodes", "start_date"]
for x in names(rating_df)
    if x ∉ keepcols
        push!(keepcols, x)
    end
end
rec_df = rec_df[:, keepcols];

In [None]:
for content in ["explicit", "implicit", "ptw"]
    n = length(get_recommendee_split(content).item)
    @info "$n $content items"
end

In [None]:
function is_recent(x)
    if ismissing(x)
        return false
    end
    (x > Dates.today() - Dates.Month(6)) && (x < Dates.today())
end

In [None]:
function unseen_display(df)
    sort(filter(x -> !x.seen, df), :score)
end

function unrelated_display(df)
    filter(x -> !x.related, df)
end

function related_display(df)
    sort(filter(x -> !x.seen && x.related, df), :score)
end

function explicit_display(df)
    filter(x -> x.explicit > x.baseline_explicit, df)
end

function implicit_display(df)
    filter(x -> x.implicit > x.baseline_implicit, df)
end

function inv_explicit_display(df)
    filter(x -> x.explicit <= x.baseline_explicit, df)
end

function inv_implicit_display(df)
    filter(x -> x.implicit <= x.baseline_implicit, df)
end

function tv_display(df)
    filter(x -> x.medium in ["tv", "ona"], df)
end

function inv_tv_display(df)
    filter(x -> x.medium ∉ ["tv", "ona"], df)
end

function recent_display(df)
    filter(x -> is_recent.(x.start_date), df)
end;