# Generalized Neural Network
* A denoising autoencoder 

In [1]:
const name = "GNN.Rating.Test"

"GNN.Rating.Test"

In [2]:
using Random
using StatsBase
import BSON

In [3]:
using NBInclude
@nbinclude("Alpha.ipynb");

In [4]:
const device = gpu;

In [5]:
Random.seed!(20220410 * hash(name));

## Parameters

In [6]:
const training_residuals = ["UserItemBiases"]
const validation_residuals = ["UserItemBiases"]
const derived_features = true
const train_implicit_model = false
const autoencode = true
const batch_size = 128
const dropout_perc = 0.5
const l2penalty = 1e-5
const learning_rate = 0.001
const num_seeds = 1;
const optimizer = "ADAM";
const patience = 100;
const use_residualized_validation_loss = false;
const training_weight_scheme = "linear"
const validation_weight_scheme = "constant"
const sampling_weight_scheme = "linear"
# TODO dropout anti-scaling
# todo size/num layers
# TODO train models with different parameter settings

## Data Prepocessing

In [7]:
const n_items = num_items() + 1 # leave room to map unseen items
const n_users = maximum(get_split("training").user) + 1; # leave room to map unseen users

In [8]:
# column accesses are faster than row accesses, so we make this an (item, user) matrix 
function to_sparse_mat(split)
    sparse(split.item, split.user, split.rating, n_items, n_users)
end

to_sparse_mat (generic function with 1 method)

In [9]:
function get_derived_feature(split, agg)
    sums = zeros(Float32, n_users, Threads.nthreads())
    counts = zeros(Float32, n_users, Threads.nthreads())
    @tprogress Threads.@threads for i = 1:length(split.rating)
        sums[split.user[i], Threads.threadid()] += split.rating[i]
        counts[split.user[i], Threads.threadid()] += 1
    end
    sums = sum(sums, dims = 2)
    counts = sum(counts, dims = 2)
    sparse(agg.(sums, counts)')
end;

In [36]:
function weighting_scheme(x, scheme)
    if scheme == "linear"
        return x
    elseif scheme == "constant"
        return x > 0 ? 1 : 0
    elseif scheme == "inverse"
        return x > 0 ? 1 / x : 0
    else
        @assert false "Unknown weighting scheme $scheme"
        return -1
    end
end;

In [10]:
function get_epoch(split)
    # todo support autoencode = false
    @assert autoencode

    # construct inputs
    X = vcat(
        to_sparse_mat(get_residuals("training", training_residuals)),
        to_sparse_mat(get_split("implicit_training")),
    )
    if derived_features
        Xd = vcat(
            # fraction of implicit items
            get_derived_feature(
                get_split("implicit_training"),
                (sum, count) -> count / n_items,
            ),
            # fraction of seen items
            get_derived_feature(get_split("training"), (sum, count) -> count / n_items),
            # average item rating
            get_derived_feature(
                get_split("training"),
                (sum, count) -> sum / max(1, count) / 10,
            ),
        )
        X = vcat(X, Xd, Xd .^ 2, sqrt.(Xd))
    end

    # construct outputs
    Y = to_sparse_mat(get_residuals(split, validation_residuals))
    if train_implicit_model
        Y.nzval .= 1
    end

    # How much to weight each user in the loss function    
    function count_to_weight(x)
        scheme = split == "training" ? training_weight_scheme : validation_weight_scheme
        weighting_scheme(x, scheme)
    end
    W = get_derived_feature(get_split(split), (_, count) -> count_to_weight(count))

    X, Y, W
end;

In [11]:
function get_sampling_order(split)
    weights = vec(collect(get_derived_feature(get_split(split), (_, count) -> weighting_scheme(count, sampling_weight_scheme)))
    sample(1:n_users, Weights(weights), n_users)
end;

In [12]:
function get_batch(epoch, iter, batch_size, sampling_order)
    range = sampling_order[(iter-1)*batch_size+1:min(iter * batch_size, size(epoch[1])[2])]
    process(x) = collect(x[:, range]) |> device
    [process.(epoch)]
end;

function get_batch(epoch, iter, batch_size)
    sampling_order = 1:size(epoch[1])[2]
    get_batch(epoch, iter, batch_size, sampling_order)
end;

## Model

In [13]:
function generate_model()
    # inputs are the user's ratings for all shows (unseen shows get mapped to zero) + implicit ratings + heterogenous features
    # outputs are the user's ratings for all shows (unseen shows get mapped to zero), implicit ratings
    # we will train ratings using mse on observed shows, and implicit ratings via crossentropy loss
    n_inputs = n_items + n_items + (derived_features ? 9 : 0)
    encoder =
        Chain(Dense(n_inputs, 512, relu), Dense(512, 256, relu), Dense(256, 128, relu))
    decoder = Chain(Dense(128, 256, relu), Dense(256, 512, relu), Dense(512, n_items))
    m = Chain(Dropout(dropout_perc), encoder, decoder) |> device
    m |> device
end;

## Loss Functions

In [16]:
function evaluate(m, split)
    testmode!(m)
    BLAS.set_num_threads(1)

    df = get_split(split)
    users = df.user
    items = df.item

    # index users
    user_to_output_idxs = [Dict() for t = 1:Threads.nthreads()]
    @tprogress Threads.@threads for j = 1:length(users)
        u = users[j]
        t = Threads.threadid()
        if u ∉ keys(user_to_output_idxs[t])
            user_to_output_idxs[t][u] = []
        end
        push!(user_to_output_idxs[t][u], j)
    end
    user_to_output_idxs = merge(vcat, user_to_output_idxs...)

    # compute predictions
    ratings = zeros(Float32, length(users))
    epoch = get_epoch(split)
    epoch = (epoch[1], epoch[2], collect(1:n_users)')
    @tprogress Threads.@threads for iter = 1:Int(ceil(n_users / batch_size))
        batch = get_batch(epoch, iter, batch_size)[1]
        alpha = m(batch[1]) |> cpu
        if train_implicit_model
            alpha .= exp.(alpha)
            alpha .= alpha ./ sum(alpha, dims = 1)
        end

        for j = 1:size(alpha)[2]
            u = batch[3][1, j]
            if u ∉ keys(user_to_output_idxs)
                continue
            end
            for output_idx in user_to_output_idxs[u]
                ratings[output_idx] = alpha[items[output_idx], j]
            end
        end
    end

    ratings
end;

In [14]:
function rating_loss(ŷ, y, weights)
    # only compute loss on items the user has seen
    mask = y .!= 0
    per_user_mse =
        sum(((ŷ .- y) .* mask) .^ 2, dims = 1) ./
        max.(one(eltype(weights)), sum(mask, dims = 1))
    dot(per_user_mse, weights) / sum(weights)
end

function implicit_loss(ŷ, y, weights)
    # TODO handle empty users (proper weighting!)
    agg(x) = dot(x, weights)
    Flux.logitcrossentropy(ŷ, y, agg = agg) / sum(weights)
end

loss(m, x, y, weights) =
    train_implicit_model ? implicit_loss(m(x), y, weights) : rating_loss(m(x), y, weights);

In [15]:
function get_loss(m, split)
    testmode!(m)
    BLAS.set_num_threads(1)

    epoch = get_epoch(split)
    batch_size = 16
    losses = zeros(Threads.nthreads())
    @tprogress Threads.@threads for iter = 1:Int(ceil(n_users / batch_size))
        batch = get_batch(epoch, iter, batch_size)
        losses[Threads.threadid()] += loss(m, batch[1]...) * sum(batch[1][3])
    end
    sum(losses) / sum(epoch[3])
end;

In [46]:
function get_residualized_loss(m, split)
    @assert !train_implicit_model

    rating = evaluate(m, split)
    df = get_residuals(split, validation_residuals)
    # turn per-user weights into per-item weights
    W = get_derived_feature(
        df,
        (_, count) ->
            weighting_scheme(count, validation_weight_scheme) *
            weighting_scheme(count, "inverse"),
    )
    weights = zeros(eltype(rating), length(df.user))
    Threads.@threads for i = 1:length(weights)
        weights[i] = W[df.user[i]]
    end

    Y = df.rating .* sqrt.(weights)
    X = rating .* sqrt.(weights)
    β = X \ Y
    @info "beta: $β"
    sum((Y .- X .* β) .^ 2) / sum(weights)
end;

In [28]:
BSON.@load "../../data/alphas/$name/model.1.bson" m

In [29]:
get_loss(m, "validation")

[32mProgress: 100%|███████████████████████████| Time: 0:00:11 ( 0.88 μs/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:08 ( 0.87 μs/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:08 ( 0.85 μs/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:00 ( 0.78 μs/it)[39m
[38;5;6m[1m┌ [22m[39m[38;5;6m[1mInfo: [22m[39m20220430 13:11:44 The GPU function is being called but the GPU is not accessible. 
[38;5;6m[1m└ [22m[39mDefaulting back to the CPU. (No action is required if you want to run on the CPU).
[38;5;6m[1m┌ [22m[39m[38;5;6m[1mInfo: [22m[39m20220430 13:11:44 The GPU function is being called but the GPU is not accessible. 
[38;5;6m[1m└ [22m[39mDefaulting back to the CPU. (No action is required if you want to run on the CPU).
[38;5;6m[1m┌ [22m[39m[38;5;6m[1mInfo: [22m[39m20220430 13:11:44 The GPU function is being called but the GPU is not accessible. 
[38;5;6m[1m└ [22m[39mDefaulting back to t

1.5256684128737303

In [47]:
get_residualized_loss(m, "validation")

[32mProgress: 100%|███████████████████████████| Time: 0:00:00 ( 0.58 μs/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:13 ( 0.98 μs/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:10 ( 1.07 μs/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:10 ( 1.07 μs/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:00 ( 0.37 μs/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:19:45 ( 1.84  s/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:00 ( 0.64 μs/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220430 20:35:20 beta: 1.094241


1.5234805f0

## Training

In [21]:
function continue_training(m, model_name)
    loss_fn = use_residualized_validation_loss ? get_residualized_loss : get_los
    training_loss = loss_fn(m, "training")
    validation_loss = loss_fn(m, "validation")
    @info "training loss $training_loss, validation loss $validation_loss, best_loss $best_loss"
    if validation_loss < best_loss
        global best_loss = validation_loss
        global checks_without_improvement = 0
        BSON.@save "../../data/alphas/$name/model.$(model_name).bson" m
    else
        global checks_without_improvement += 1
    end
    checks_without_improvement < patience
end;

In [22]:
function train_epoch!(m, opt, model_name)
    trainmode!(m)
    BLAS.set_num_threads(Threads.nthreads())
    ps = Flux.params(m)
    train_loss(x, y, w) = loss(m, x, y, w)
    epoch = get_epoch("training")
    sampling_order = get_sampling_order("training")

    nbatches = Int(ceil(size(epoch[1])[2] / batch_size))
    @showprogress for iter = 1:nbatches
        batch = get_batch(epoch, iter, batch_size, sampling_order)
        Flux.train!(train_loss, ps, batch, opt)

        if iter % (nbatches ÷ 10) == 0
            continue_training(m, model_name)
            trainmode!(m)
            BLAS.set_num_threads(Threads.nthreads())
        end
    end
end;

In [23]:
function train_model(model_name, seed)
    # create model
    Random.seed!(seed)
    m = generate_model()

    # setup optimizer
    if optimizer == "ADAM"
        opt = ADAMW(learning_rate, (0.9, 0.999), l2penalty)
    end
    global best_loss = Inf
    global checks_without_improvement = 0

    # Train model
    while continue_training(m, model_name)
        train_epoch!(m, opt, model_name)
    end

    Dict("model" => m, "name" => model_name, "seed" => seed)
end;

In [24]:
# train_model("1", 20220422)

## Write predictions

In [25]:
function make_prediction(sparse_preds, users, items)
    preds = zeros(length(users))
    @tprogress Threads.@threads for j = 1:length(preds)
        preds[j] = sparse_preds[users[j], items[j]]
    end
    preds
end;

In [26]:
function save_model(params)
    BSON.@load params["model"] m
    training = evaluate(m, "training")
    validation = evaluate(m, "validation")
    test = evaluate(m, "test")
    df = reduce(cat, [training, validation, test])
    sparse_preds = sparse(df.user, df.item, df.rating)

    write_params(params, outdir = params["name"])
    write_predictions(
        (users, items) -> make_prediction(sparse_preds, users, items),
        residual_alphas = validation_residuals,
        outdir = params["name"],
        implicit = train_implicit_model,
    )
end;

In [27]:
# function fit(num_seeds, start = 1)
#     seeds = hash.(rand(Int, num_seeds))
#     for i = start:length(seeds)
#         save_model(train_model(i, seeds[i]))
#     end
# end