# LinearModel

In [None]:
medium = ""
task = ""

In [None]:
import SparseArrays: sparse
import NBInclude: @nbinclude
@nbinclude("../Alpha.ipynb")
@nbinclude("EnsembleInputs.ipynb");

## Suppress seen shows
* Zero out any items the user has already seen and rescale the probablity distribution

In [None]:
function get_seen_probabilities(alpha::String)
    if alpha in implicit_raw_alphas(task, medium)
        content = "implicit"
    else
        @assert false
    end
    df = read_alpha(alpha, "training", task, content, medium)
    p_seen = zeros(Float32, num_users(medium), Threads.nthreads())
    Threads.@threads for i = 1:length(df.user)
        p_seen[df.user[i], Threads.threadid()] += df.rating[i]
    end
    vec(sum(p_seen, dims = 2))
end

function read_raw_alpha(
    alpha::String,
    split::String,
    task::String,
    content::String,
    medium::String,
)
    suppress = (split != "training") && (alpha in implicit_raw_alphas(task, medium))
    if !suppress
        return read_raw_alpha_impl(alpha, split, task, content, medium)
    end
    ϵ = sqrt(eps(Float32))
    df = get_raw_split(split, task, content, medium; fields = [:user])
    ratings = read_raw_alpha_impl(alpha, split, task, content, medium).rating
    # zero out any items the user has already seen
    p_seen = get_seen_probabilities(alpha)
    Threads.@threads for i = 1:length(ratings)
        if 1 - p_seen[df.user[i]] > ϵ
            ratings[i] /= 1 - p_seen[df.user[i]]
        end
    end
    RatingsDataset(rating = ratings, medium = medium)
end;

## Save predictions

In [None]:
function save_linear_model(
    alphas::Vector{String},
    task::String,
    content::String,
    medium::String,
    outdir::String,
)
    if content == "explicit"
        implicit = false
    elseif content == "implicit"
        implicit = true
    else
        @assert false
    end
    set_logging_outdir(outdir)
    _, β = regress(alphas, "test", task, content, medium, implicit)
    @info "alphas: $alphas"
    @info "coefficients: $β"
    write_params(Dict("β" => β, "alphas" => alphas), outdir)

    function model(
        split::String,
        task::String,
        content::String,
        medium::String;
        raw_splits::Bool,
    )
        GC.gc()
        read_fn = raw_splits ? read_raw_alpha : read_alpha
        Xs = [read_fn(alpha, split, task, content, medium).rating for alpha in alphas]
        if implicit
            push!(Xs, fill(1.0f0 / num_items(medium), length(Xs[1])))
        end
        X = hcat(Xs...)
        y = X * β
        vec(y)
    end
    write_alpha(
        model,
        medium,
        outdir;
        task = task,
        by_split = true,
        log = true,
        log_alphas = String[],
        log_task = task,
        log_content = content,
        log_splits = ["test"],
    )
end;

In [None]:
save_linear_model(
    explicit_raw_alphas(task, medium),
    task,
    "explicit",
    medium,
    "$medium/$task/LinearExplicit",
);

In [None]:
save_linear_model(
    implicit_raw_alphas(task, medium),
    task,
    "implicit",
    medium,
    "$medium/$task/LinearImplicit",
);