# Generalized Neural Network
* A denoising autoencoder 

In [1]:
const name = "GNN.Rating.Test"
const training_residuals = ["UserItemBiases"]
const validation_residuals = ["UserItemBiases"]
const derived_features = true
const train_implicit_model = false
const autoencode = true
const batch_size = 128
const dropout_perc = 0.5
const l2penalty = 1e-5
const learning_rate = 0.001
const num_seeds = 1;
const optimizer = "ADAM";
# TODO dropout anti-scaling
# TODO train models with different parameter settings

In [2]:
using Random
import BSON

In [3]:
using NBInclude
@nbinclude("Alpha.ipynb");

In [4]:
const device = gpu;

In [5]:
Random.seed!(20220410 * hash(name));

## Data preparation

In [6]:
const n_items = num_items() + 1 # leave room to map unseen items
const n_users = maximum(get_split("training").user) + 1; # leave room to map unseen users

In [7]:
# column accesses are faster than row accesses, so we make this an (item, user) matrix 
function to_sparse_mat(split)
    sparse(split.item, split.user, split.rating, n_items, n_users)
end

to_sparse_mat (generic function with 1 method)

In [8]:
function get_derived_feature(split, agg)
    sums = zeros(Float32, n_users, Threads.nthreads())
    counts = zeros(Float32, n_users, Threads.nthreads())
    @tprogress Threads.@threads for i = 1:length(split.rating)
        sums[split.user[i], Threads.threadid()] += split.rating[i]
        counts[split.user[i], Threads.threadid()] += 1
    end
    sums = sum(sums, dims = 2)
    counts = sum(counts, dims = 2)
    sparse(agg.(sums, counts)')
end;

In [9]:
function get_epoch(split)
    # todo support autoencode = false
    @assert autoencode

    # construct inputs
    X = vcat(
        to_sparse_mat(get_residuals("training", training_residuals)),
        to_sparse_mat(get_split("implicit_training")),
    )
    if derived_features
        Xd = vcat(
            # fraction of seen items
            get_derived_feature(get_split("training"), (sum, count) -> count / n_items),
            # fraction of implicit items
            get_derived_feature(
                get_split("implicit_training"),
                (sum, count) -> count / n_items,
            ),
            # average item rating
            get_derived_feature(
                get_split("training"),
                (sum, count) -> sum / max(1, count) / 10,
            ),
        )
        X = vcat(X, Xd, Xd .^ 2, sqrt.(Xd))
    end

    # construct outputs
    Y = to_sparse_mat(get_residuals(split, validation_residuals))
    if train_implicit_model
        Y.nzval .= 1
    end

    # randomly shuffle the users
    order = randperm(size(X)[2])
    X = X[:, order]
    Y = Y[:, order]

    X, Y
end;

In [10]:
function get_batch(X, Y, iter, batch_size)
    range = (iter-1)*batch_size+1:min(iter * batch_size, size(X)[2])
    [(collect(X[:, range]) |> device, collect(Y[:, range]) |> device)]
end;

In [11]:
function generate_model()
    # inputs are the user's ratings for all shows (unseen shows get mapped to zero) + implicit ratings + heterogenous features
    # outputs are the user's ratings for all shows (unseen shows get mapped to zero), implicit ratings
    # we will train ratings using mse on observed shows, and implicit ratings via crossentropy loss
    n_inputs = n_items + n_items + (derived_features ? 9 : 0)
    encoder =
        Chain(Dense(n_inputs, 512, relu), Dense(512, 256, relu), Dense(256, 128, relu))
    decoder = Chain(Dense(128, 256, relu), Dense(256, 512, relu), Dense(512, n_items))
    m = Chain(Dropout(dropout_perc), encoder, decoder) |> device
    m |> device
end;

## Training

In [12]:
function rating_loss(ŷ, y)
    # only compute loss on items the user has seen
    # TODO try mult on gpu
    mask = y .!= 0
    mean((ŷ[mask] .- y[mask]) .^ 2)
end

implicit_loss(ŷ, y) = Flux.logitcrossentropy(ŷ, y)

loss(m, x, y) = train_implicit_model ? implicit_loss(m(x), y) : rating_loss(m(x), y)

loss (generic function with 1 method)

In [13]:
function reset_training()
    global best_loss = Inf
    global iteration = 0
    global epoch = 0
end;

In [14]:
function get_validation_loss(m)
    BLAS.set_num_threads(1)
    X, Y = get_epoch("validation")
    batch_size = 16
    losses = [0.0 for _ = 1:Threads.nthreads()]
    @tprogress Threads.@threads for iter = 1:Int(ceil(size(X)[2] / batch_size))
        batch = get_batch(X, Y, iter, batch_size)
        losses[Threads.threadid()] += loss(m, batch[1]...) * size(batch[1][1])[2]
    end
    BLAS.set_num_threads(Threads.nthreads())
    sum(losses) / size(X)[2]
end

get_validation_loss (generic function with 1 method)

In [15]:
function continue_training(m, name)
    validation_loss = get_validation_loss(m)
    @info "Epoch $epoch, loss $validation_loss, best_loss $best_loss"
    if validation_loss < best_loss
        global best_loss = validation_loss
        BSON.@save "../../data/alphas/$name/model.$(name).bson" m
        return true
    else
        return false
    end
end;

In [16]:
# function split_batch(batch, chunks)
#     X, Y = batch[1]
#     chunks = Iterators.partition(1:size(X)[2], Int(ceil(size(X)[2] / chunks)))
#     data = []
#     for chunk in chunks
#         push!(data, (X[:, chunk], Y[:, chunk]))
#     end
#     data
# end

# function cpu_train(loss, ps, data, opt)
#     grads = []
#     for _ = 1:Threads.nthreads()
#         push!(grads, nothing)
#     end
#     @time data = split_batch(data, Threads.nthreads())
#     @time Threads.@threads for d in data
#         gs = gradient(ps) do
#             loss(d...)
#         end
#         grads[Threads.threadid()] = gs
#     end
#     @time gs = reduce(.+, grads[grads.!=nothing])
#     @time Flux.update!(opt, ps, gs)
# end

In [17]:
function train_model(model_name, seed)
    # create model
    Random.seed!(seed)
    m = generate_model()
    ps = Flux.params(m)
    BLAS.set_num_threads(Threads.nthreads())

    # setup optimizer
    reset_training()
    function evalcb()
        @info "Epoch $epoch, Iteration $iteration"
    end
    throttled_cb = Flux.throttle(evalcb, 60)
    if optimizer == "ADAM"
        opt = ADAMW(learning_rate, (0.9, 0.999), l2penalty)
    end
    training_loss(x, y) = loss(m, x, y)

    # Train model
    while continue_training(m, model_name)
        X, Y = get_epoch("training")
        @showprogress for iter = 1:Int(ceil(size(X)[2] / batch_size))
            batch = get_batch(X, Y, iter, batch_size)
            Flux.train!(training_loss, ps, batch, opt)
            global iteration += 1
            throttled_cb()
        end
        global epoch += 1
    end
end;

In [None]:
train_model(name, 1)

[38;5;6m[1m┌ [22m[39m[38;5;6m[1mInfo: [22m[39m20220416 19:31:42 The GPU function is being called but the GPU is not accessible. 
[38;5;6m[1m└ [22m[39mDefaulting back to the CPU. (No action is required if you want to run on the CPU).
[32mProgress: 100%|███████████████████████████| Time: 0:00:08 ( 0.93 μs/it)[39m
[32mProgress: 100%|███████████████████████████|  ETA: 0:00:00 (41.52 ms/it)[39m[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220416 19:36:34 Epoch 0, loss 1.7453222129310897, best_loss Inf
[32mProgress:  87%|███████████████████████▌   |  ETA: 0:00:01 ( 0.94 μs/it)[39m[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220416 19:38:51 Epoch 0, Iteration 1
[32mProgress:   1%|▎                                        |  ETA: 4:23:03[39mm29[39mm[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220416 19:39:54 Epoch 0, Iteration 71
[32mProgress:   1%|▌                                        |  ETA: 3:27:45[39m[38;5;6m[1m[ [22m[39m[38;5;6

## Write predictions

In [None]:
function gmodel(m, users, items)
    # index users
    user_to_output_idxs = [Dict() for t = 1:Threads.nthreads()]
    @tprogress Threads.@threads for j = 1:length(users)
        u = users[j]
        t = Threads.threadid()
        if u ∉ keys(user_to_output_idxs[t])
            user_to_output_idxs[t][u] = []
        end
        push!(user_to_output_idxs[t][u], j)
    end
    user_to_output_idxs = merge(vcat, user_to_output_idxs...)

    # allocate outputs
    ratings = zeros(Float32, length(users))

    # split users into mini-batches
    deduped_users = collect(Set(users))
    batch(arr, n) = [arr[i:min(i + n - 1, end)] for i = 1:n:length(arr)]
    batches = batch(deduped_users, 128)

    # compute predictions
    @tprogress Threads.@threads for i = 1:length(batches)
        b = batches[i]
        user_to_input_idx = Dict(zip(b, 1:length(b)))
        alpha = m(get_batch(b)) |> cpu
        if train_implicit_model
            alpha .= exp.(alpha)
            alpha .= alpha ./ sum(alpha, dims = 1)
        end

        for u in b
            input_idx = user_to_input_idx[u]
            for output_idx in user_to_output_idxs[u]
                ratings[output_idx] = alpha[items[output_idx], input_idx]
            end
        end
    end
    ratings
end;

In [None]:
function make_prediction(sparse_preds, users, items)
    preds = zeros(length(users))
    @tprogress Threads.@threads for j = 1:length(preds)
        preds[j] = sparse_preds[users[j], items[j]]
    end
    preds
end;

In [None]:
function save_model(params)
    BSON.@load params["model"] m
    testmode!(m)
    BLAS.set_num_threads(1) # gmodel already multithreads

    full_df = reduce(cat, [training, validation, get_split("test")])
    ratings = gmodel(m, full_df.user, full_df.item)
    sparse_preds = sparse(full_df.user, full_df.item, ratings)

    write_params(params, outdir = params["name"])
    write_predictions(
        (users, items) -> make_prediction(sparse_preds, users, items),
        residual_alphas = validation_residuals,
        outdir = params["name"],
        implicit = train_implicit_model,
    )
end;

In [None]:
# function fit(num_seeds, start = 1)
#     seeds = hash.(rand(Int, num_seeds))
#     for i = start:length(seeds)
#         save_model(train_model(i, seeds[i]))
#     end
# end