# Common utitities for all alphas

In [None]:
using CSV
using DataFrames
using FileIO
using JLD2
using JupyterFormatter
using Optim
using ProgressMeter
using Statistics

In [None]:
enable_autoformat();

In [None]:
function get_split(split)
    @assert split in ["training", "validation", "test"]
    file = "../../data/splits/splits.jld2"
    return load(file, split)
end;

In [None]:
function get_alpha(alpha, split)
    @assert split in ["training", "validation", "test"]
    file = "../../data/alphas/$(alpha)/predictions.jld2"
    return load(file, split)
end;

In [None]:
function get_residuals(split, alphas)
    df = get_split(split)
    for alpha in alphas
        df.my_score -= get_alpha(alpha, split).my_score
    end
    return df
end

In [None]:
function mse(truth, pred)
    return mean((truth .- pred) .^ 2)
end

function rmse(truth, pred)
    return sqrt(mse(truth, pred))
end

function mae(truth, pred)
    return mean(abs.(truth .- pred))
end

function r2(truth, pred)
    return 1 - mse(truth, pred) / mse(truth, mean(truth))
end

function evaluate(truth, pred)
    return "RMSE $(rmse(truth, pred)) MAE $(mae(truth, pred)) R2 $(r2(truth, pred))"
end;

In [None]:
function write_predictions(model; save_training=false)
    splits = ["training", "validation", "test"]
    # don't save training set by default because it's huge
    splits_to_save = ["validation", "test"]
    if save_training
        push!(splits_to_save, "training")
    end
    # don't cheat by peeking at the test set
    splits_to_evaluate = ["training", "validation"]

    predictions = Dict()
    for split in splits
        df = get_residuals(split, residual_alphas)
        pred_score = model(df.username, df.anime_id)
        if split in splits_to_evaluate
            println("$(split) set: " * evaluate(df.my_score, pred_score))
        end
        if split in splits_to_save
            df.my_score = pred_score
            predictions[split] = df
        end
    end

    outdir = "../../data/alphas/$name"
    if !isdir(outdir)
        mkpath(outdir)
    end
    save("$outdir/predictions.jld2", predictions)
end;

In [None]:
function write_params(params)
    outdir = "../../data/alphas/$name"
    if !isdir(outdir)
        mkpath(outdir)
    end
    save("$outdir/params.jld2", params)
end;

In [1]:
macro tprogress(expr)
    # let the @progress macro work with Threads.@threads
    loop = expr
    if loop.head == :macrocall && loop.args[1] == :(Threads.var"@threads")
        loop = loop.args[end]
    end
    
    p=gensym()    
    r = loop.args[1].args[end]
    ex = quote
        n = length($(esc(r)))
        global $p = Progress(n; showspeed=true)
        $(esc(expr))
    end
    push!(loop.args[end].args, :(next!($p)))
    return ex    
end;

In [None]:
training = get_residuals("training", residual_alphas);

In [None]:
validation = get_residuals("validation", residual_alphas);