In [1]:
import numpy as np
import pandas as pd
from surprise import Reader
from surprise.dataset import DatasetAutoFolds

from recommender.testing.custom_metric_utils import calculate_metric_scores
from recommender.testing.custom_metric_utils import create_recommendations_profiles_embeddings
from recommender.tools.surprise_utils import SurpriseTests

In [2]:
DATA_ROOT = '../../data'

## Data loading

In [3]:
columns = ['bgg_user_name', 'bgg_id', 'bgg_user_rating']
train_df = pd.read_csv(f'{DATA_ROOT}/ratings_train_explicit.csv.gz')[columns]
train_df_implicit = pd.read_csv(f'{DATA_ROOT}/ratings_train_implicit.csv.gz')
test_df = pd.read_csv(f'{DATA_ROOT}/ratings_test_explicit.csv.gz')[columns]

features_names = pd.read_csv(f'{DATA_ROOT}/game_features_names.csv.gz').values.flatten()
games_df = pd.read_json(f'{DATA_ROOT}/bgg_GameItem.jl', lines=True)\
[['bgg_id', 'mechanic', 'category', 'complexity', 'max_players_best', 'min_players_best', 'max_players_rec', 'min_players_rec']]

test_users_profiles = pd.read_pickle(f'{DATA_ROOT}/test_users_profiles.pkl')

model = pd.read_pickle('../notebooks_data/SVD_model.pkl')

## Data processing

In [4]:
mechanics_names = features_names[:20]
categories_names = features_names[20:40]

In [5]:
reader = Reader(rating_scale=(1, 10))
trainset = DatasetAutoFolds.load_from_df(train_df, reader).build_full_trainset()

In [6]:
test_users_profiles = test_users_profiles[test_users_profiles.index.isin(train_df['bgg_user_name'].unique())]

In [7]:
test_users_profiles.values[:, :40] *= 3

## Evaluation

In [8]:
users_ids = test_users_profiles.index.values
users_ids.sort()

In [9]:
test_class = SurpriseTests(model=model, test_df=test_df, trainset=trainset)
num_recs = 5
recommendation_profiles = create_recommendations_profiles_embeddings(
    test_class.get_top_n(num_recs, np.array(list(map(lambda x: trainset.to_inner_uid(x), users_ids)))), num_recs, games_df, categories_names, mechanics_names, 10)
metric = calculate_metric_scores(recommendation_profiles, test_users_profiles.loc[users_ids])

  0%|          | 0/75107 [00:00<?, ?it/s]

In [10]:
metric

array([0.57307393, 1.02336196, 0.35789683, 0.3888144 , 2.34314712])