# Write the splits in a Julia optimized format

In [None]:
using CSV
using DataFrames
using JLD2
using JupyterFormatter
using ProgressMeter

In [None]:
enable_autoformat();

In [None]:
struct RatingsDataset
    user::Vector{Int32}
    item::Vector{Int32}
    rating::Vector{Float32}
    timestamp::Vector{Float32}
    status::Vector{Int32}
    completion::Vector{Float32}
    rewatch::Vector{Int32}
    source::Vector{Int32}
end;

In [None]:
function get_dataset(file)
    df = DataFrame(CSV.File(file))
    RatingsDataset(
        df.username .+ 1, # julia is 1 indexed
        df.animeid .+ 1, # julia is 1 indexed
        df.score,
        df.timestamp,
        df.status,
        df.completion,
        df.rewatch,
        df.source,
    )
end;

In [None]:
function get_negative_dataset(file)
    df = DataFrame(CSV.File(file))
    RatingsDataset(
        df.user .+ 1, # julia is 1 indexed
        df.item .+ 1, # julia is 1 indexed
        [],
        [],
        [],
        [],
        [],
        [],
    )
end;

## Save splits

In [None]:
for content in ["explicit", "implicit", "ptw"]
    for split in ["training", "validation", "test"]
        file = "../../data/splits/$(content)_$(split).jld2"
        @time dataset = get_dataset("../../data/splits/$(content)_$(split).csv")
        @time jldsave(file; dataset)
    end
end;

In [None]:
for split in ["training", "validation", "test"]
    file = "../../data/splits/negative_$(split).jld2"
    @time dataset = get_negative_dataset("../../data/splits/negative_$(split).csv")
    @time jldsave(file; dataset)
end;