In [None]:
username = "Fro116"
source = "MAL"
medium = "anime";

In [None]:
import NBInclude: @nbinclude
@nbinclude("RecommendationsBase.ipynb");
# TODO ptw series

In [None]:
a = get_rec_df("temporal", medium, ".2");

In [None]:
a |>
inv(seen) |>
inv(recap) |>
inv(dependent) |>
inv(crossrelated) |>
released(medium) |>
top(40) |>
display(medium)

In [None]:
b = get_rec_df("temporal_causal", medium, ".2");

In [None]:
b |>
inv(seen) |>
inv(recap) |>
inv(dependent) |>
inv(crossrelated) |>
released(medium) |>
top(40) |>
display(medium)

# RERANKING

In [None]:
@memoize function get_similarity_metric(medium::String, weights = nothing)
    alphas = ["$medium/all/WatchSimilarity", "$medium/all/GenreSimilarity"]
    if medium == "anime"
        push!(alphas, "$medium/all/TagSimilarity")
    end
    if isnothing(weights)
        weights = ones(Float32, length(alphas))
    end
    weights ./ sum(weights)
    sum([read_params(x)["S"] for x in alphas] .* weights)
end;

In [None]:
get_similarity_metric("anime");

In [None]:
get_similarity_metric("manga");

In [None]:
function solve_quadratic_program(
    list_size,
    similarity_metric,
    relevance_scores,
    attributes,
    relative_constraints,
)
    # solves the quadratic program:
    # minimize x' * similarity_metric * x - relevance_scores * x 
    # with the constraints:
    # sum(x) = list_size
    # x \in {0, 1} are binary variables    
    # attributes[i]' * x <= constraints[i] for all i

    # scale problem by list_size
    N = size(similarity_metric)[1]
    similarity_metric ./= list_size^2
    relevance_scores ./= list_size
    constraints = [ceil(x * list_size) for x in relative_constraints]

    # make the initial optimization problem convex
    # this will not change the solution to the minimization problem
    posdef_penalty = 0
    while !LinearAlgebra.isposdef(similarity_metric)
        similarity_metric -= posdef_penalty * LinearAlgebra.I(N)
        posdef_penalty = max(1, posdef_penalty * 2)
        similarity_metric += posdef_penalty * LinearAlgebra.I(N)
    end

    # solve the mixed-integer quadratic problem by
    # iteratively solving the QP with an increasing penalty 
    # for non-binary solutions
    nonbinary_penalty = 0
    model = nothing
    warm_start = zeros(N)
    epsilon = 0.01
    while sum(warm_start .> epsilon) != list_size
        # model
        model = Model(
            optimizer_with_attributes(Ipopt.Optimizer, "print_level" => 0, "sb" => "yes"),
        )
        @variable(model, 0 <= x[1:N] <= 1)
        JuMP.set_start_value.(x, warm_start)

        # constraints
        @constraint(model, sum(x) == list_size)
        for i = 1:length(constraints)
            @constraint(model, attributes[i]' * x <= constraints[i])
        end

        #objective
        @expression(model, relevance, relevance_scores' * x)
        @expression(model, similarity, x' * similarity_metric * x)
        @expression(model, penalty, nonbinary_penalty * x' * (1 .- x))
        @objective(model, Min, similarity - relevance + penalty)

        # solve
        optimize!(model)
        y = value.(x)
        nonbinary_penalty = max(1, nonbinary_penalty * 2)
        if nonbinary_penalty > 2^10
            @info "could not solve miqp $list_size. dropping constraint"
            @assert length(constraints) > 0
            skip = (list_size % length(constraints)) + 1
            return solve_quadratic_program(
                list_size,
                similarity_metric,
                relevance_scores,
                [attributes[i] for i in 1:length(attributes) if i != skip],
                [relative_constraints[i] for i in 1:length(constraints) if i != skip],
            )
        end
        warm_start = value.(x)
    end

    collect(1:length(warm_start))[warm_start.>epsilon]
end;

In [None]:
function get_reranking_order(
    similarity_metric,
    list_size,
    relevance_scores,
    attributes,
    constraints,
)
    # get a subset of the list_size best recs
    candidates = solve_quadratic_program(
        list_size,
        similarity_metric,
        relevance_scores,
        attributes,
        constraints,
    )

    # rerank the recs such that the top N items 
    # are a optimal subset for any 1 <= N <= list_size
    order = []
    while length(candidates) > 0
        new_candidates = candidates[solve_quadratic_program(
            length(candidates) - 1,
            similarity_metric[candidates, candidates],
            relevance_scores[candidates],
            [x[candidates] for x in attributes],
            constraints,
        )]
        worst_candidate = [x for x in candidates if x ∉ new_candidates][1]
        candidates = [x for x in candidates if x != worst_candidate]
        pushfirst!(order, worst_candidate)
    end
    order
end;

In [None]:
function rerank(df, medium::String, list_size; similarity_penalty)
    S = similarity_penalty * get_similarity_metric(medium)[df.uid.+1, df.uid.+1]
    order = get_reranking_order(
        S,
        list_size,
        copy(df.score), # exp.(df.score),
        [],
        [],
        # [df.long_runner, df.related],
        # [long_runner_constraint, related_constraint],
    )
    df[order, :]
end;

In [None]:
c =
    b |>
    inv(seen) |>
    inv(recap) |>
    inv(dependent) |>
    inv(crossrelated) |>
    released(medium) |>
    # startdate(2023, 01) |>
    top(5);

In [None]:
# TODO bucketwise randomization

In [None]:
rerank(c, "anime", length(c.title); similarity_penalty = 1000) |> display("anime")

In [None]:
rerank(c, "anime", length(c.title); similarity_penalty = 0.1) |> display("anime")