# Global Average
* Outputs the mean score across all (user, item) pairs
  * Prediction for user $i$ and item $j$ is $\tilde r_{ij} = a$
  * Loss function is $L = \sum_\Omega(r_{ij} - \tilde r)^2$
  * $\Omega$ is the set of oberved pairs $(i, j)$
  * $r_{ij}$ is the rating for user $i$ and item $j$
* This is a useful baseline to measure improvements from

In [1]:
using CSV
using DataFrames
using FileIO
using JLD2
using JupyterFormatter
using Statistics
import Metrics

In [2]:
enable_autoformat();

In [3]:
function get_split(split)
    @assert split in ["training", "validation"]
    file = "../../data/splits/$(split).csv"
    df = DataFrame(CSV.File(file))
    df.username .+= 1 # julia is 1 indexed
    df.anime_id .+= 1
    df.my_score = float(df.my_score)
    return df
end;

In [4]:
function write_prediction(df, split)
    @assert split in ["validation"]
    outdir = "../../data/alphas/$name"
    if !isdir(outdir)
        mkpath(outdir)
    end
    df = copy(df)
    df.username .-= 1
    df.anime_id .-= 1
    CSV.write("$(outdir)/$(split).csv", df)
end;

In [5]:
function write_model(params)
    outdir = "../../data/alphas/$name"
    if !isdir(outdir)
        mkpath(outdir)
    end
    save("$(outdir)/model.jld2", params)
end;

In [6]:
function evaluate(truth, pred)
    print("RMSE ", sqrt(Metrics.mse(pred, truth)))
    print(" MAE ", Metrics.mae(pred, truth))
    print(" R2 ", Metrics.r2_score(pred, truth))
end;

In [7]:
name = "GlobalAverage";

In [8]:
training = get_split("training");

In [9]:
validation = get_split("validation");

## Training

In [10]:
μ = mean(training.my_score);

In [11]:
function make_prediction(users, items, μ)
    return fill(μ, length(users))
end;

model(users, items) = make_prediction(users, items, μ);

## Inference

In [12]:
training_pred_score = model(training.username, training.anime_id);
evaluate(training.my_score, training_pred_score);

RMSE 1.7805202512279754 MAE 1.3906332220653705 R2 0.0

In [13]:
val_pred_score = model(validation.username, validation.anime_id);
evaluate(validation.my_score, val_pred_score);

RMSE 1.7806075310542464 MAE 1.3908660830061252 R2 -2.1652728277032907e-7

In [14]:
# write predictions to disk
val_pred = copy(validation);
val_pred.my_score = val_pred_score;
write_prediction(val_pred, "validation");

In [15]:
# write model to disk
write_model(Dict("μ" => μ, "model" => model));