# TrainingServingSkew
* Moniitors whether the alphas generated for inference are identical 
    to the alphas generated during training

In [1]:
import NBInclude: @nbinclude
import Statistics: mean
@nbinclude("Alpha.ipynb");

In [2]:
function read_training_alpha(alpha, split)
    uid = parse(Int, get_recommendee_username()) + 1
    df = read_alpha(alpha, split, true)
    mask = df.user .== uid
    df = RatingsDataset(df.user[mask], df.item[mask], df.rating[mask])
    df.user .= 1
    df
end;

In [3]:
function average_errors(alpha, split)
    serving = read_recommendee_alpha(alpha, "all")
    training = read_training_alpha(alpha, split)
    abs.(training.rating .- serving.rating[training.item])
end

function relative_errors(alpha, split)
    serving = read_recommendee_alpha(alpha, "all")
    training = read_training_alpha(alpha, split)
    ϵ = eps(Float64)
    abs.(abs.(training.rating) ./ (abs.(serving.rating[training.item]) .+ ϵ) .- 1)
end;

In [4]:
function maximum_error(alpha, split)
    maximum(vcat(average_errors(alpha, split), relative_errors(alpha, split)))
end

function average_abs_error(alpha, split)
    mean(average_errors(alpha, split))
end

function average_rel_error(alpha, split)
    mean(relative_errors(alpha, split))
end;

In [5]:
function display_errors(alphas, split)
    alpha_padding = maximum(length.(alphas))
    number_padding = 16

    header_1 = rpad("Alpha", alpha_padding)
    header_2 = rpad("Avg Abs Error", number_padding)
    header_3 = rpad("Avg Rel Error", number_padding)
    header_4 = rpad("Max Error", number_padding)
    @info "$header_1 $header_2 $header_3 $header_4"

    for alpha in alphas
        avg_abs_error = rpad(average_abs_error(alpha, split), number_padding)
        avg_rel_error = rpad(average_rel_error(alpha, split), number_padding)
        max_error = rpad(maximum_error(alpha, split), number_padding)
        alpha = rpad(alpha, alpha_padding)
        @info "$alpha $avg_abs_error $avg_rel_error $max_error"
    end
end;

In [6]:
potential_alphas = readdir(recommendee_alpha_basepath())
alphas = sort([
    x for
    x in potential_alphas if ispath("$(recommendee_alpha_basepath())/$(x)/alpha.jld2")
])
ensemble_alphas = sort(["ErrorExplicit", "ErrorImplicit", "Explicit", "NonlinearExplicit"])
inference_only_alphas = sort(["ItemCFRelated", "BPR"])
standard_alphas = [x for x in alphas if x ∉ ensemble_alphas && x ∉ inference_only_alphas];

In [None]:
display_errors(standard_alphas, "training")
# ensemble alphas do not save training splits

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220704 16:58:15 Alpha                             Avg Abs Error    Avg Rel Error    Max Error       
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220704 16:58:23 ExplicitItemCF                    9.706281e-7      0.05265420916290078 1.0             
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220704 16:58:27 ExplicitUserItemBiases            1.9073486e-6     2.3695841768580756e-7 1.9073486328125e-6
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220704 16:58:32 LinearExplicit                    0.0008248028     9.900572676129147e-5 0.0020475387573242188
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220704 16:58:37 LinearImplicit                    5.483985e-6      0.0015382511445828945 0.009135707927428571
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220704 16:58:42 NeuralExplicitAutoencoderUntuned  0.00057415804    0.0034894848548399113 0.04958087969790004
[38;5;6m[1m[ [22m[

In [None]:
display_errors(standard_alphas, "validation")
println()
display_errors(ensemble_alphas, "validation")

In [None]:
display_errors(standard_alphas, "test")
println()
display_errors(ensemble_alphas, "test")