# ItemMetadata

In [1]:
using CSV
using DataFrames
using Dates

import DataStructures: DefaultDict
import NBInclude: @nbinclude
@nbinclude("../Alpha.ipynb");

In [2]:
anime = DataFrame(CSV.File(get_data_path("processed_data/anime.csv"), ntasks = 1))
anime_to_uid = DataFrame(CSV.File(get_data_path("processed_data/anime_to_uid.csv")))
anime_to_uid = innerjoin(anime_to_uid, anime, on = "anime_id");

In [3]:
function date_transform(x)
    if ismissing(x)
        return 0
    else
        return Dates.value(x)
    end
end;

In [4]:
function capitalize(s::String)
    join(uppercasefirst.(split(s, "_")), "")
end;

In [5]:
function write_feature(feature; categorical = false, transform = identity)
    @assert !categorical
    outdir = "Item" * capitalize(feature)
    u = Dict(Pair.(anime_to_uid.uid, transform.(anime_to_uid[:, feature])))
    write_params(Dict("u" => u), outdir)

    function make_prediction(users, items, u)
        u = DefaultDict(zero(Float32), u)
        r = Array{Float32}(undef, length(users))
        Threads.@threads for i = 1:length(r)
            r[i] = u[users[i]]
        end
        r
    end
    model(users, items) = make_prediction(users, items, u)
    write_alpha(model, [], true, outdir; log_splits = false)
end;

In [6]:
names(anime_to_uid)

22-element Vector{String}:
 "anime_id"
 "uid"
 "title"
 "main_picture"
 "alternative_titles"
 "start_date"
 "end_date"
 "synopsis"
 "num_list_users"
 "num_scoring_users"
 "nsfw"
 "medium"
 "status"
 "num_episodes"
 "start_season"
 "source"
 "average_episode_duration"
 "studios"
 "related_anime"
 "recommendations"
 "genres"
 "tags"

In [7]:
write_feature("average_episode_duration")

In [8]:
write_feature("start_date"; transform = date_transform)

In [9]:
write_feature("end_date"; transform = date_transform)