# Generalized Neural Network
* A denoising autoencoder 

In [1]:
const name = "GNN.Rating.Test"
const training_residuals = ["UserItemBiases"]
const validation_residuals = ["UserItemBiases"]
const derived_features = true
const train_implicit_model = false
const autoencode = true
const batch_size = 128
const dropout_perc = 0.5
const l2penalty = 1e-5
const learning_rate = 0.001
const num_seeds = 1;
const optimizer = "ADAM";
# TODO train models with different parameter settings

In [2]:
using Random
import BSON

In [3]:
using NBInclude
@nbinclude("Alpha.ipynb");

In [4]:
const device = gpu;

In [5]:
Random.seed!(20220410 * hash(name));

## Data preparation

In [6]:
const n_items = num_items() + 1 # leave room to map unseen items
const n_users = maximum(get_split("training").user) + 1; # leave room to map unseen users

In [7]:
# column accesses are faster than row accesses, so we make this an (item, user) matrix 
function to_sparse_mat(split)
    sparse(split.item, split.user, split.rating, n_items, n_users)
end

to_sparse_mat (generic function with 1 method)

In [8]:
function get_derived_feature(split, agg)
    sums = zeros(Float32, n_users, Threads.nthreads())
    counts = zeros(Float32, n_users, Threads.nthreads())
    @tprogress Threads.@threads for i = 1:length(split.rating)
        sums[split.user[i], Threads.threadid()] += split.rating[i]
        counts[split.user[i], Threads.threadid()] += 1
    end
    sums = sum(sums, dims = 2)
    counts = sum(counts, dims = 2)
    sparse(agg.(sums, counts)')
end;

In [9]:
function get_epoch(split)
    # todo support autoencode = false
    @assert autoencode

    # construct inputs
    X = vcat(
        to_sparse_mat(get_residuals("training", training_residuals)),
        to_sparse_mat(get_split("implicit_training")),
    )
    if derived_features
        Xd = vcat(
            # fraction of seen items
            get_derived_feature(get_split("training"), (sum, count) -> count / n_items),
            # fraction of implicit items
            get_derived_feature(
                get_split("implicit_training"),
                (sum, count) -> count / n_items,
            ),
            # average item rating
            get_derived_feature(
                get_split("training"),
                (sum, count) -> sum / max(1, count) / 10,
            ),
        )
        X = vcat(X, Xd, Xd .^ 2, sqrt.(Xd))
    end

    # construct outputs
    Y = to_sparse_mat(get_residuals(split, validation_residuals))
    if train_implicit_model
        Y.nzval .= 1
    end

    # randomly shuffle the users
    order = randperm(size(X)[2])
    X = X[:, order]
    Y = Y[:, order]

    X, Y
end;

In [10]:
X, Y = get_epoch("validation")
X2, Y2 = get_epoch("validation")

[32mProgress: 100%|███████████████████████████| Time: 0:00:06 ( 2.07 μs/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:07 ( 1.78 μs/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:05 ( 1.86 μs/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:05 ( 1.81 μs/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:07 ( 1.78 μs/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:05 ( 1.84 μs/it)[39m


(sparse([6191, 6700, 6976, 7160, 7162, 7169, 7969, 8487, 8563, 8595  …  36936, 37907, 37908, 37909, 37910, 37911, 37912, 37913, 37914, 37915], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1  …  1320150, 1320150, 1320150, 1320150, 1320150, 1320150, 1320150, 1320150, 1320150, 1320150], Float32[-0.86373305, 0.41021174, 0.13650393, -2.22978, -0.3714294, -0.5365475, -0.063417554, 0.44583693, 1.9406518, -0.9813562  …  1.0, 0.0067007863, 0.008389173, 0.79133856, 4.490054f-5, 7.037823f-5, 0.6262167, 0.08185833, 0.09159243, 0.88957214], 37915, 1320150), sparse(Int32[4265, 7422, 9367, 12640, 735, 1855, 3272, 7169, 8529, 8996  …  7256, 11088, 11411, 13248, 14253, 14325, 14568, 15162, 16351, 16829], [1, 1, 1, 1, 2, 2, 2, 2, 2, 2  …  1320150, 1320150, 1320150, 1320150, 1320150, 1320150, 1320150, 1320150, 1320150, 1320150], Float32[-0.3389134, 1.1170983, -1.0159798, -0.27521604, -1.1530313, -1.0277189, 1.0618806, 0.88071936, -0.302486, -3.8224938  …  -0.5345252, 0.19452278, 0.75691026, 0.62451655, 0.45544934, -1.818

In [11]:
sum(X), sum(Y), sum(X2), sum(Y2)

(2.1703885f8, 11978.103f0, 2.1703885f8, 11978.102f0)

In [12]:
function get_batch(X, Y, iter, batch_size)
    range = (iter-1)*batch_size+1:min(iter * batch_size, size(X)[2])
    [(collect(X[:, range]) |> device, collect(Y[:, range]) |> device)]
end;

In [13]:
function generate_model()
    # inputs are the user's ratings for all shows (unseen shows get mapped to zero) + implicit ratings + heterogenous features
    # outputs are the user's ratings for all shows (unseen shows get mapped to zero), implicit ratings
    # we will train ratings using mse on observed shows, and implicit ratings via crossentropy loss
    n_inputs = n_items + n_items + (derived_features ? 9 : 0)
    encoder =
        Chain(Dense(n_inputs, 512, relu), Dense(512, 256, relu), Dense(256, 128, relu))
    decoder = Chain(Dense(128, 256, relu), Dense(256, 512, relu), Dense(512, n_items))
    m = Chain(Dropout(dropout_perc), encoder, decoder) |> device
    m |> device
end;

## Training

In [14]:
function rating_loss(ŷ, y)
    # only compute loss on items the user has seen
    # TODO try mult on gpu
    mask = y .!= 0
    mean((ŷ[mask] .- y[mask]) .^ 2)
end

implicit_loss(ŷ, y) = Flux.logitcrossentropy(ŷ, y)

loss(m, x, y) = train_implicit_model ? implicit_loss(m(x), y) : rating_loss(m(x), y)

loss (generic function with 1 method)

In [15]:
function reset_training()
    global best_loss = Inf
    global iteration = 0
    global epoch = 0
end;

In [16]:
function get_validation_loss(m)
    X, Y = get_epoch("validation")
    losses = [0.0 for x = 1:Threads.nthreads()]
    @tprogress Threads.@threads for iter = 1:Int(ceil(size(X)[2] / batch_size))
        batch = get_batch(X, Y, iter, batch_size)
        losses[Threads.threadid()] += loss(m, batch[1]...) * size(batch[1][1])[2]
        #push!(losses[Threads.threadid()], loss(m, batch[1]...) * size(batch[1][1])[2])
    end
    #sum([sum(x) for x in losses]) / size(X)[2]
    sum(losses) / size(X)[2]
end

get_validation_loss (generic function with 1 method)

In [17]:
m = generate_model()

[38;5;6m[1m┌ [22m[39m[38;5;6m[1mInfo: [22m[39m20220416 05:26:37 The GPU function is being called but the GPU is not accessible. 
[38;5;6m[1m└ [22m[39mDefaulting back to the CPU. (No action is required if you want to run on the CPU).


Chain(
  Dropout(0.5),
  Chain(
    Dense(37915 => 512, relu),          [90m# 19_412_992 parameters[39m
    Dense(512 => 256, relu),            [90m# 131_328 parameters[39m
    Dense(256 => 128, relu),            [90m# 32_896 parameters[39m
  ),
  Chain(
    Dense(128 => 256, relu),            [90m# 33_024 parameters[39m
    Dense(256 => 512, relu),            [90m# 131_584 parameters[39m
    Dense(512 => 18953),                [90m# 9_722_889 parameters[39m
  ),
) [90m                  # Total: 12 arrays, [39m29_464_713 parameters, 112.400 MiB.

In [18]:
get_validation_loss(m)

[32mProgress: 100%|███████████████████████████| Time: 0:00:05 ( 1.88 μs/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:07 ( 1.77 μs/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:05 ( 1.86 μs/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:01:24 ( 0.40  s/it)[39m


1.7429431451821251

In [19]:
get_validation_loss(m)

[32mProgress: 100%|███████████████████████████| Time: 0:00:06 ( 1.94 μs/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:07 ( 1.78 μs/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:05 ( 1.87 μs/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:01:19 ( 0.37  s/it)[39m


1.743045823037595

In [20]:
get_validation_loss(m)

[32mProgress: 100%|███████████████████████████| Time: 0:00:06 ( 1.99 μs/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:07 ( 1.78 μs/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:06 ( 1.93 μs/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:01:25 ( 0.40  s/it)[39m


1.7431525926388056

In [21]:
function continue_training(m)
    validation_loss = get_validation_loss(m)
    @info "Epoch $epoch, loss $validation_loss, best_loss $best_loss"
    if validation_loss < best_loss
        global best_loss = validation_loss
        BSON.@save "../../data/alphas/$name/model.$(model_name).bson" m
        return true
    else
        return false
    end
end;

In [22]:
function train_model(model_name, seed)
    # create model
    Random.seed!(seed)
    m = generate_model()
    ps = Flux.params(m)
    #BLAS.set_num_threads(Threads.nthreads())
    BLAS.set_num_threads(1)


    # setup optimizer
    reset_training()
    function evalcb()
        @info "Epoch $epoch, Iteration $iteration"
    end
    throttled_cb = Flux.throttle(evalcb, 60)
    if optimizer == "ADAM"
        opt = ADAMW(learning_rate, (0.9, 0.999), l2penalty)
    end
    training_loss(x, y) = loss(m, x, y)

    # Train model
    while continue_training(m)
        X, Y = get_epoch("training")
        for iter = 1:Int(ceil(size(X)[2] / batch_size))
            batch = get_batch(X, Y, iter, batch_size)
            Flux.train!(training_loss, ps, batch, opt)
            global iteration += 1
            throttled_cb()
        end
        global epoch += 1
    end
end;

In [23]:
model_name = name
seed = 1

1

In [24]:
# 45 with single threaded
# 55 while multi-threaded (32 openblas)

In [None]:
train_model(model_name, seed)

[32mProgress: 100%|███████████████████████████| Time: 0:00:06 ( 1.92 μs/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:07 ( 1.78 μs/it)[39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:05 ( 1.86 μs/it)[39m
[32mProgress:  51%|█████████████▉             |  ETA: 0:00:39 ( 0.37  s/it)[39m

## Write predictions

In [None]:
function gmodel(m, users, items)
    # index users
    user_to_output_idxs = [Dict() for t = 1:Threads.nthreads()]
    @tprogress Threads.@threads for j = 1:length(users)
        u = users[j]
        t = Threads.threadid()
        if u ∉ keys(user_to_output_idxs[t])
            user_to_output_idxs[t][u] = []
        end
        push!(user_to_output_idxs[t][u], j)
    end
    user_to_output_idxs = merge(vcat, user_to_output_idxs...)

    # allocate outputs
    ratings = zeros(Float32, length(users))

    # split users into mini-batches
    deduped_users = collect(Set(users))
    batch(arr, n) = [arr[i:min(i + n - 1, end)] for i = 1:n:length(arr)]
    batches = batch(deduped_users, 128)

    # compute predictions
    @tprogress Threads.@threads for i = 1:length(batches)
        b = batches[i]
        user_to_input_idx = Dict(zip(b, 1:length(b)))
        alpha = m(get_batch(b)) |> cpu
        if train_implicit_model
            alpha .= exp.(alpha)
            alpha .= alpha ./ sum(alpha, dims = 1)
        end

        for u in b
            input_idx = user_to_input_idx[u]
            for output_idx in user_to_output_idxs[u]
                ratings[output_idx] = alpha[items[output_idx], input_idx]
            end
        end
    end
    ratings
end;

In [None]:
function make_prediction(sparse_preds, users, items)
    preds = zeros(length(users))
    @tprogress Threads.@threads for j = 1:length(preds)
        preds[j] = sparse_preds[users[j], items[j]]
    end
    preds
end;

In [None]:
function save_model(params)
    BSON.@load params["model"] m
    testmode!(m)
    BLAS.set_num_threads(1) # gmodel already multithreads

    full_df = reduce(cat, [training, validation, get_split("test")])
    ratings = gmodel(m, full_df.user, full_df.item)
    sparse_preds = sparse(full_df.user, full_df.item, ratings)

    write_params(params, outdir = params["name"])
    write_predictions(
        (users, items) -> make_prediction(sparse_preds, users, items),
        residual_alphas = validation_residuals,
        outdir = params["name"],
        implicit = train_implicit_model,
    )
end;

In [None]:
# function fit(num_seeds, start = 1)
#     seeds = hash.(rand(Int, num_seeds))
#     for i = start:length(seeds)
#         save_model(train_model(i, seeds[i]))
#     end
# end