In [None]:
import numpy as np
import pandas as pd
from spotlight.evaluation import mrr_score
from spotlight.evaluation import rmse_score
from spotlight.factorization.explicit import ExplicitFactorizationModel
from spotlight.factorization.implicit import ImplicitFactorizationModel
from spotlight.interactions import Interactions

In [None]:
train_df = pd.read_parquet("../data/interim/20210602_initial_data_children_biography_train_dev_test/children-train.parquet")

In [None]:
train_df = train_df.loc[:100000, :]

In [None]:
user_id_long_to_id = {_id: i for i, _id in enumerate(train_df["User_id"].unique())}

In [None]:
users = np.asarray([user_id_long_to_id[_id] for _id in train_df["User_id"]], dtype="int32")
books = train_df["Book_id"].values.astype("int32")
ratings = train_df["Rating"].values.astype("float32")

In [None]:
train = Interactions(users, books, ratings=ratings)

In [None]:
len(train)

In [None]:
model = ImplicitFactorizationModel(n_iter=1, loss='bpr')
model.fit(train)

In [None]:
dev_df = pd.read_parquet("../data/interim/20210602_initial_data_children_biography_train_dev_test/children-dev.parquet")

In [None]:
keep_indices = []
book_ids = set(books)
for _, row in dev_df.iterrows():
    u_id = row["User_id"]
    b_id = int(row["Book_id"])
    if u_id in user_id_long_to_id and b_id in book_ids:
        keep_indices.append(True)
    else: 
        keep_indices.append(False)

In [None]:
dev_df = dev_df.loc[keep_indices, :]

In [None]:
len(dev_df)

In [None]:
dev_users = np.asarray([user_id_long_to_id[_id] for _id in dev_df["User_id"]], dtype="int32")
dev_books = dev_df["Book_id"].values.astype("int32")
dev_ratings = dev_df["Rating"].values.astype("float32")
dev = Interactions(dev_users, dev_books, ratings=dev_ratings)

In [None]:
rmse = rmse_score(model, dev)
rmse

In [None]:
mrr = mrr_score(model, dev)
mrr