# Autoencoder Neural Network

In [1]:
name = "ENN";
residual_alphas = ["UserItemBiases"];

In [2]:
using Flux # TODO add to readme
import BSON

│ For performance reasons, it is recommended to upgrade to a driver that supports CUDA 11.2 or higher.
└ @ CUDA C:\Users\kunda\.julia\packages\CUDA\nYggH\src\initialization.jl:70


In [3]:
using NBInclude
@nbinclude("Alpha.ipynb");

In [4]:
BLAS.set_num_threads(Threads.nthreads())
Threads.nthreads()

4

In [5]:
device = gpu;

## train on data

In [6]:
training = get_residuals("training", residual_alphas);
const validation = get_residuals("validation", residual_alphas)
# column accesses are faster than row accesses, so we make this an (item, user) matrix instead of a (user, item) matrix
R = sparse(
    training.item,
    training.user,
    convert.(Float32, training.rating),
    maximum(training.item),
    maximum(training.user),
);

In [7]:
function get_data(R, split, j)
    # inputs are the user's ratings for all shows (unseen shows get mapped to zero) + implicit ratings + heterogenous features
    # outputs are the user's ratings for all shows (unseen shows get mapped to zero)
    X = collect(R[:, split.user[j]])
    X[split.item[j]] = 0
    Y = copy(X)
    Xr = copy(X)
    Xr[Xr.!=0] .= 1
    X = vcat(X, Xr)

    # add heterogeneous features
    weight = sum(X .!= 0)
    nitems_feature = weight / size(R)[1]
    push!(X, nitems_feature)
    push!(X, sqrt(nitems_feature))
    push!(X, nitems_feature^2)
    return (X, Y)
end

function get_batch(R, split, block_size)
    items = rand(1:length(split.rating), block_size)
    data = [[] for j = 1:Threads.nthreads()]
    Threads.@threads for i = 1:length(items)
        push!(data[Threads.threadid()], get_data(R, split, items[i]))
    end
    X = Flux.batch([data[t][i][1] for t = 1:Threads.nthreads() for i = 1:length(data[t])])
    Y = Flux.batch([data[t][i][2] for t = 1:Threads.nthreads() for i = 1:length(data[t])])
    [(X, Y)] |> device
end;

function evalcb(R, split)
    losses = []
    @showprogress for epoch = 1:100
        push!(losses, loss(get_batch(R, split, 128)[1]...))
    end
    mean(losses)
end;

evalcb (generic function with 1 method)

In [8]:
function get_val_data(R, split, j)
    # inputs are the user's ratings for all shows (unseen shows get mapped to zero) + implicit ratings + heterogenous features
    # outputs are the user's ratings for a held_out show (unseen shows get mapped to zero)
    X = collect(R[:, split.user[j]])
    X[split.item[j]] = 0
    Y = zeros(eltype(X), length(X))
    Y[split.item[j]] = split.rating[j]

    Xr = copy(X)
    Xr[Xr.!=0] .= 1
    X = vcat(X, Xr)

    # add heterogeneous features
    weight = sum(X .!= 0)
    nitems_feature = weight / size(R)[1]
    push!(X, nitems_feature)
    push!(X, sqrt(nitems_feature))
    push!(X, nitems_feature^2)
    return (X, Y)
end

function get_val_batch(R, split, block_size)
    items = rand(1:length(split.rating), block_size)
    data = [[] for j = 1:Threads.nthreads()]
    Threads.@threads for i = 1:length(items)
        push!(data[Threads.threadid()], get_val_data(R, split, items[i]))
    end
    X = Flux.batch([data[t][i][1] for t = 1:Threads.nthreads() for i = 1:length(data[t])])
    Y = Flux.batch([data[t][i][2] for t = 1:Threads.nthreads() for i = 1:length(data[t])])
    [(X, Y)] |> device
end;

function val_evalcb(R, split)
    losses = []
    @showprogress for epoch = 1:100
        push!(losses, loss(get_val_batch(R, split, 128)[1]...))
    end
    mean(losses)
end;

val_evalcb (generic function with 1 method)

In [22]:
# inputs are the user's ratings for all shows (unseen shows get mapped to zero)
# labels are the user's predictions for all shows
n_items = size(R)[1]
m =
    Chain(
        Dropout(0.5),
        Dense(n_items + n_items + 3, 512, relu),
        Dense(512, 256, relu),
        Dense(256, 128, relu),
        Dense(128, 256, relu),
        Dense(256, 512, relu),
        Dense(512, n_items),
    ) |> device
ps = Flux.params(m);

In [23]:
function loss(x, y)
    mask = y .!= 0
    Flux.mse(m(x)[mask], y[mask])
end
opt = ADAM();

In [33]:
best_loss = Inf
patience = 1000000
iters_without_improvement = 0
continue_training = true
iters = 0

function evalcb()
    # print losses and perform early stopping
    testmode!(m)
    @debug "iteration: $iters"
    @debug "training rmse: $(evalcb(R, training))"
    loss = val_evalcb(R, validation)
    @debug "validation rmse: $(loss)"
    if loss < best_loss
        global best_loss = loss
        global iters_without_improvement = 0
        BSON.@save "../../data/alphas/$name/model.bson" m
    else
        global iters_without_improvement += 1
        if iters_without_improvement >= patience
            global continue_training = false
        end
    end
    trainmode!(m)
end

throttled_cb = Flux.throttle(evalcb, 600);

In [None]:
while continue_training
    batch = get_batch(R, training, 128)
    Flux.train!(loss, ps, batch, opt, cb = throttled_cb)
    iters += 1
end

[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220127 14:01:58 iteration: 24591
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:37[39m
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220127 14:02:36 training rmse: 0.96169114
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:39[39m
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220127 14:03:15 validation rmse: 1.3738708
[38;5;4m[1m[ [22m[39m[38;5;4m[1mDebug: [22m[39m20220127 14:13:16 iteration: 27026
[32mProgress:  25%|███████████                              |  ETA: 0:00:15[39m