# Generalized Neural Network
* A denoising autoencoder that learns the user's ratings and implicit ratings

In [1]:
name = "GNN.Resid";

In [2]:
using Flux
using Random
import BSON

In [3]:
using NBInclude
@nbinclude("Alpha.ipynb");

In [4]:
device = gpu;

In [5]:
Random.seed!(20220130);

## Data preparation

In [6]:
const residual_alphas = [
    ["UserItemBiases"]
    ["GNN"]
    ["ItemCF.$K" for K in [2^4, 2^6, 2^8, 2^10, 2^12]]
    ["MatrixFactorization.$K" for K in [10, 20, 40]] # TODO make 8,16,32
    ["ItemCF.Resid.$alpha.1.$K" for alpha in ["ItemCF", "GNN", "MF"] for K in [2^8]]
]
const training = get_residuals("training", residual_alphas)
const validation = get_residuals("validation", residual_alphas)
const implicit = get_split("implicit")
const n_items = num_items() + 1 # leave room to map unseen items
const n_users = maximum(training.user) + 1; # leave room to map unseen users

[32mProgress: 100%|█████████████████████████████████████████| Time: 0:02:48[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:04[39m


In [7]:
# column accesses are faster than row accesses, so we make this an (item, user) matrix 
const R = sparse(training.item, training.user, training.rating, n_items, n_users)
const Ri = sparse(implicit.item, implicit.user, implicit.rating, n_items, n_users);

In [8]:
# number of items each user has seen
counts = zeros(Float32, n_users, Threads.nthreads())
@tprogress Threads.@threads for u in implicit.user
    counts[u, Threads.threadid()] += 1
end
counts = sum(counts, dims = 2);

[32mProgress: 100%|███████████████████████████| Time: 0:00:06 ( 0.42 μs/it)[39m


In [9]:
function get_data(split, j, train)
    # inputs are the user's ratings (unseen shows get mapped to zero) + implicit ratings + heterogenous features
    # during training, outputs are the user's ratings + implicit ratings 
    # during inference, outputs are the user's rating + implicit rating for a held out item on their list

    # handle users and items that aren't in the training set
    u = min(split.user[j], n_users)
    i = min(split.item[j], n_items)

    # ratings
    X1 = collect(R[:, u])
    X1[i] = 0
    # implicit ratings
    X2 = collect(Ri[:, u])
    X2[i] = 0
    # heterogeneous features
    count = convert(Float32, max(counts[u] - 1, 0) / n_items)
    X3 = [count, sqrt(count), count^2]
    X = vcat(X1, X2, X3)

    # outputs
    Y1 = zeros(eltype(X1), length(X1))
    Y2 = zeros(eltype(X2), length(X2))
    if train
        mask = X2 .!= 0
        Y1[mask] .= X1[mask]
        Y2[mask] .= X2[mask]
    else
        Y1[i] = split.rating[j]
        Y2[i] = 1
    end

    (X, Y1, Y2)
end

function get_batch(split, block_size, train)
    idxs = rand(1:length(split.rating), block_size)
    data = [[] for j = 1:Threads.nthreads()]
    Threads.@threads for i = 1:length(items)
        push!(data[Threads.threadid()], get_data(split, idxs[i], train))
    end
    X = Flux.batch([data[t][i][1] for t = 1:Threads.nthreads() for i = 1:length(data[t])])
    Y1 = Flux.batch([data[t][i][2] for t = 1:Threads.nthreads() for i = 1:length(data[t])])
    Y2 = Flux.batch([data[t][i][3] for t = 1:Threads.nthreads() for i = 1:length(data[t])])
    [(X, (Y1, Y2))] |> device
end;

In [10]:
# custom split layer
struct Split{T}
    paths::T
end
Split(paths...) = Split(paths)
Flux.@functor Split
(m::Split)(x::AbstractArray) = map(f -> f(x), m.paths)

In [11]:
function generate_model()
    # inputs are the user's ratings for all shows (unseen shows get mapped to zero) + implicit ratings + heterogenous features
    # outputs are the user's ratings for all shows (unseen shows get mapped to zero), implicit ratings
    # we will train ratings using mse on observed shows, and implicit ratings via crossentropy loss
    encoder = Chain(
        Dense(n_items + n_items + 3, 512, relu),
        Dense(512, 256, relu),
        Dense(256, 128, relu),
    )
    rating_decoder =
        Chain(Dense(128, 256, relu), Dense(256, 512, relu), Dense(512, n_items))
    implicit_decoder =
        Chain(Dense(128, 256, relu), Dense(256, 512, relu), Dense(512, n_items))
    m = Chain(Dropout(0.5), encoder, Split(rating_decoder, implicit_decoder)) |> device
    m
end

generate_model (generic function with 1 method)

## Training

In [12]:
function rating_loss(ŷ, y)
    # only compute loss on items the user has seen
    mask = y .!= 0
    Flux.mse(ŷ[mask], y[mask])
end

implicit_loss(ŷ, y) = Flux.logitcrossentropy(ŷ, y)

function implicit_loss(ŷ, y, mask)
    # we're predicting a held out series
    # so we zero out items in the user's training set
    ŷ[mask] .= -1e3
    implicit_loss(ŷ, y)
end

function loss_components(m, x, y, train)
    ŷ = m(x)
    if train
        return (rating_loss(ŷ[1], y[1]), implicit_loss(ŷ[2], y[2]))
    else
        mask = (x.!=0)[1:n_items, :]
        return (rating_loss(ŷ[1], y[1]), implicit_loss(ŷ[2], y[2], mask))
    end
end;

In [13]:
function reset_training()
    global best_loss = Inf
    global patience = 10
    global iters_without_improvement = 0
    global continue_training = true
    global iters = 0
end;

In [14]:
function train_model(model_name, seed)
    Random.seed!(seed)
    m = generate_model()
    ps = Flux.params(m)
    reset_training()
    BLAS.set_num_threads(Threads.nthreads())

    # Setup early stopping callbacks
    function evalcb(split, train)
        losses = []
        @showprogress for epoch = 1:100
            push!(losses, loss_components(m, get_batch(split, 128, train)[1]..., train))
        end
        reduce(.+, losses) ./ length(losses)
    end

    function evalcb()
        # print losses and perform early stopping
        testmode!(m)
        @debug "iteration: $iters"
        training_losses = evalcb(training, true)
        training_loss = sum(training_losses ./ training_baseline_loss)
        @debug "training losses: $(training_losses) -> $(training_loss)"
        inference_losses = evalcb(validation, false)
        inference_loss = sum(inference_losses ./ inference_baseline_loss)
        @debug "validation losses: $(inference_losses) -> $(inference_loss)"
        if inference_loss < best_loss
            global best_loss = inference_loss
            global iters_without_improvement = 0
            BSON.@save "../../data/alphas/$name/model.$(model_name).bson" m
        else
            global iters_without_improvement += 1
            if iters_without_improvement >= patience
                global continue_training = false
            end
        end
        trainmode!(m)
    end

    # Setup loss
    training_baseline_loss = evalcb(training, true)
    inference_baseline_loss = evalcb(training, false)
    throttled_cb = Flux.throttle(evalcb, 600)
    opt = ADAMW(0.001, (0.9, 0.999), 1e-3)

    function loss(x, y)
        sum(loss_components(m, x, y, true) ./ training_baseline_loss)
    end

    # Train model
    while continue_training
        batch = get_batch(training, 128, true)
        Flux.train!(loss, ps, batch, opt, cb = throttled_cb)
        global iters += 1
    end

    Dict(
        "name" => "$name.$model_name",
        "loss" => best_loss,
        "patience" => patience,
        "iters" => iters,
        "model" => "../../data/alphas/$name/model.$(model_name).bson",
        "residual_alphas" => residual_alphas,
        "seed" => seed,
    )
end;

## Write predictions

In [15]:
function get_data(u)
    # ratings
    X1 = collect(R[:, u])
    # implicit ratings
    X2 = collect(Ri[:, u])
    # heterogeneous features
    count = convert(Float32, max(counts[u] - 1, 0) / n_items)
    X3 = [count, sqrt(count), count^2]
    vcat(X1, X2, X3)
end

function get_batch(users)
    data = [[] for j = 1:Threads.nthreads()]
    Threads.@threads for i = 1:length(users)
        push!(data[Threads.threadid()], get_data(users[i]))
    end
    X = Flux.batch([data[t][i] for t = 1:Threads.nthreads() for i = 1:length(data[t])])
    X |> device
end;

In [23]:
function gmodel(m, users, items)
    ratings = zeros(length(users))
    implicit = zeros(length(users))
    deduped_users = collect(Set(users))
    batch(arr, n) = [arr[i:min(i + n - 1, end)] for i = 1:n:length(arr)]
    batches = batch(deduped_users, 128)
    @tprogress Threads.@threads for i = 1:length(batches)
        b = batches[i]
        alpha = m(get_batch(b)) |> cpu
        user_to_idx = Dict(zip(b, 1:length(b)))
        for j = 1:length(users)
            if users[j] in keys(user_to_idx)
                idx = user_to_idx[users[j]]
                ratings[j] = alpha[1][items[j], idx]
                implicit[j] = alpha[2][items[j], idx]
            end
        end
    end
    ratings, implicit
end;

In [24]:
function make_prediction(sparse_preds, users, items)
    preds = zeros(length(users))
    @tprogress Threads.@threads for j = 1:length(preds)
        preds[j] = sparse_preds[users[j], items[j]]
    end
    preds
end;

In [25]:
function save_model(params)
    BSON.@load params["model"] m
    testmode!(m)
    BLAS.set_num_threads(1) # gmodel already multithreads

    full_df = reduce(cat, [training, validation, get_residuals("test", residual_alphas)])
    ratings, _ = gmodel(m, full_df.user, full_df.item)
    sparse_preds = sparse(full_df.user, full_df.item, ratings)
    model(users, items) = make_prediction(sparse_preds, users, items)

    write_params(params, outdir = params["name"])
    write_predictions(model, residual_alphas = residual_alphas, outdir = params["name"])
end

save_model (generic function with 1 method)

In [19]:
seeds = hash.(rand(Int, 1))
for i in 1:length(seeds)
    save_model(train_model(i, seeds[i]))
end;

[38;5;6m[1m┌ [22m[39m[38;5;6m[1mInfo: [22m[39m20220226 01:55:26 The GPU function is being called but the GPU is not accessible. 
[38;5;6m[1m└ [22m[39mDefaulting back to the CPU. (No action is required if you want to run on the CPU).
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:16[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:22[39m
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220226 01:57:01 iteration: 0
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:09[39m
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220226 01:57:12 training losses: (0.8532783f0, 5068.1943f0) -> 1.9878938
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:11[39m
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220226 01:57:23 validation losses: (1.2928891f0, 9.8290415f0) -> 2.339808
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220226 02:07:42 iterati

LoadError: TaskFailedException

[91m    nested task error: [39mUndefVarError: m not defined
    Stacktrace:
     [1] [0m[1mmacro expansion[22m
    [90m   @ [39m[90m./[39m[90;4mIn[16]:9[0m[90m [inlined][39m
     [2] [0m[1m(::var"#989#threadsfor_fun#56"{Vector{Int32}, Vector{Int32}, Vector{Vector{Int32}}, Vector{Float64}, Vector{Float64}, UnitRange{Int64}})[22m[0m[1m([22m[90monethread[39m::[0mBool[0m[1m)[22m
    [90m   @ [39m[35mMain[39m [90m./[39m[90;4mthreadingconstructs.jl:81[0m
     [3] [0m[1m(::var"#989#threadsfor_fun#56"{Vector{Int32}, Vector{Int32}, Vector{Vector{Int32}}, Vector{Float64}, Vector{Float64}, UnitRange{Int64}})[22m[0m[1m([22m[0m[1m)[22m
    [90m   @ [39m[35mMain[39m [90m./[39m[90;4mthreadingconstructs.jl:48[0m

In [20]:
# Progress: 100%|███████████████████████████| Time: 0:00:14 ( 1.53 μs/it)
# [ Info: 20220130 13:41:55 training set: RMSE 1.0846514400339415 MAE 0.8076981570774939 R2 0.28584964713185324
# Progress: 100%|███████████████████████████| Time: 0:00:00 ( 1.33 μs/it)
# [ Info: 20220130 13:41:58 validation set: RMSE 1.2024856553053038 MAE 0.892453189382654 R2 0.17030444448299753
# Progress: 100%|███████████████████████████| Time: 0:00:00 ( 1.33 μs/it)