# Training Serving Skew
* Checks that the alphas generated for inference are identical to the alphas generated during training

In [None]:
username = ""
source = ""
medium = ""

In [None]:
import NBInclude: @nbinclude
import Statistics: mean
@nbinclude("../TrainingAlphas/Alpha.ipynb");

In [None]:
get_fields() = [x for x in fieldnames(RatingsDataset) if x ∉ [:alpha, :metric, :userid]];

## Check that the raw data is the same

In [None]:
function get_rec_split(medium)
    get_raw_split("rec_training", medium, get_fields(), nothing)
end;

In [None]:
@memoize function get_training_uid(source, username)
    username_to_uid =
        DataFrame(CSV.File(get_data_path("processed_data/username_to_uid.csv")))
    idx = findfirst(x -> x == "$source@$username", username_to_uid.userid)
    if isnothing(idx)
        return nothing
    end
    username_to_uid.uid[idx]
end;

In [None]:
function get_training_split(medium, split = "training")
    uid = get_training_uid(source, username)
    df = get_raw_split(split, medium, vcat([:userid], get_fields()), nothing)
    filter(df, df.userid .== uid)
end;

In [None]:
function check_raw_splits(medium)
    x = get_rec_split(medium)
    y = get_training_split(medium)
    for k in get_fields()
        @assert all(getfield(x, k) .== getfield(y, k))
    end
end;

## Check that the computed alphas are the same

In [None]:
function get_training_alpha(alpha)
    uid = get_training_uid(source, username)
    dfs = []
    for x in ["test", "negative"]
        df = get_raw_split(x, medium, [:userid, :itemid], alpha)
        df = filter(df, df.userid .== uid)
        push!(dfs, df)
    end
    reduce(cat, dfs)
end;

In [None]:
function get_rec_alpha(alpha)
    get_raw_split("rec_inference", medium, [:userid, :itemid], alpha)
end;

In [None]:
function check_raw_alpha(alpha)
    x = get_training_alpha(alpha)
    y = get_rec_alpha(alpha)
    d = y.alpha[x.itemid.+1] - x.alpha
    absdiffs = abs.(d)
    reldiffs = abs.(d ./ x.alpha)
    maximum(absdiffs), mean(absdiffs)
end;

In [None]:
function check_skew()
    alphas = vcat(
        ["$medium/Baseline/rating"],
        ["$medium/BagOfWords/v1/$metric" for metric in ALL_METRICS],
        ["$medium/Transformer/v1/$metric" for metric in ALL_METRICS],
        ["$medium/Linear/$metric" for metric in ALL_METRICS],
    )

    uid = get_training_uid(source, username)
    if isnothing(uid)
        return
    end
    if length(get_training_split(medium, "test").userid) == 0
        return
    end
    for x in alphas
        @info (x, check_raw_alpha(x)...)
    end
end

In [None]:
check_skew()