# LinearModel

In [1]:
import SparseArrays: sparse
import NBInclude: @nbinclude
@nbinclude("../Alpha.ipynb")
@nbinclude("EnsembleInputs.ipynb");

## Suppress seen shows
* Zero out any items the user has already seen and rescale the probablity distribution

In [2]:
function get_seen_probabilities(alpha::String)
    if alpha in implicit_raw_alphas
        content = "implicit"
    elseif alpha in ptw_raw_alphas
        content = "ptw"
    else
        @assert false
    end
    df = read_alpha(alpha, "training", content)
    p_seen = zeros(Float32, num_users(), Threads.nthreads())
    Threads.@threads for i = 1:length(df.user)
        p_seen[df.user[i], Threads.threadid()] += df.rating[i]
    end
    vec(sum(p_seen, dims = 2))
end

function read_raw_alpha(alpha::String, split::String, content::String)
    suppress = (split != "training") && (alpha in [implicit_raw_alphas; ptw_raw_alphas])
    if !suppress
        return read_raw_alpha_impl(alpha, split, content)
    end
    ϵ = sqrt(eps(Float32))
    df = get_raw_split(split, content; fields = [:user])
    ratings = read_raw_alpha_impl(alpha, split, content).rating
    # zero out any items the user has already seen
    p_seen = get_seen_probabilities(alpha)
    Threads.@threads for i = 1:length(ratings)
        if 1 - p_seen[df.user[i]] > ϵ
            ratings[i] /= 1 - p_seen[df.user[i]]
        end
    end
    RatingsDataset(rating = ratings)
end;

## Save predictions

In [3]:
function save_linear_model(alphas::Vector{String}, content::String, outdir::String)
    if content == "explicit"
        implicit = false
    elseif content in ["implicit", "ptw"]
        implicit = true
    else
        @assert false
    end
    set_logging_outdir(outdir)
    _, β = regress(alphas, content, implicit)
    @info "alphas: $alphas"
    @info "coefficients: $β"
    write_params(Dict("β" => β, "alphas" => alphas), outdir)

    function model(split::String, content::String; raw_splits::Bool)
        read_fn = raw_splits ? read_raw_alpha : read_alpha
        Xs = [read_fn(alpha, split, content).rating for alpha in alphas]
        if implicit
            push!(Xs, fill(1.0f0 / num_items(), length(Xs[1])))
        end
        X = hcat(Xs...)
        y = X * β
        vec(y)
    end
    write_alpha(
        model,
        outdir;
        by_split = true,
        log = true,
        log_alphas = String[],
        log_content = content,
        log_splits = ["validation", "test"],
    )
end;

In [4]:
save_linear_model(explicit_raw_alphas, "explicit", "LinearExplicit");

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220828 19:49:48 alphas: ["ExplicitUserItemBiases", "NeuralExplicitAutoencoderUntuned"]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220828 19:49:48 coefficients: Float32[1.0496258, 0.7263953]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220828 19:49:52 validation loss: 1.7381309, β: Float32[1.000004]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220828 19:49:53 test loss: 1.741148, β: Float32[1.000004]


In [5]:
save_linear_model(implicit_raw_alphas, "implicit", "LinearImplicit");

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220828 19:51:31 alphas: ["NeuralImplicitItemBiases", "NeuralImplicitAutoencoderUntuned"]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220828 19:51:31 coefficients: Float32[0.14596194, 0.8540367, 1.4226254f-6]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220828 19:52:24 validation loss: 6.586966, β: Float32[8.50817f-7, 0.99999917]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220828 19:53:03 test loss: 6.585931, β: Float32[8.50817f-7, 0.99999917]


In [6]:
save_linear_model(ptw_raw_alphas, "ptw", "LinearPtw");

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220828 19:56:19 alphas: ["NeuralPtwItemBiases", "NeuralPtwAutoencoderUntuned"]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220828 19:56:19 coefficients: Float32[0.3110793, 0.68877864, 0.00014203173]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220828 19:56:27 validation loss: 7.356245, β: Float32[1.9048655f-5, 0.9999809]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220828 19:56:32 test loss: 7.357166, β: Float32[1.9048655f-5, 0.9999809]
