In [1]:
import pickle
import random

import numpy as np
import pandas as pd
import scipy.sparse
from tqdm.auto import tqdm

from recommender.testing.custom_metric_utils import calculate_metric_scores
from recommender.testing.custom_metric_utils import create_recommendations_profiles_embeddings, create_users_profiles_embeddings

from recommender.tools.lightfm_utils import LightFMTests

## Data loading

In [2]:
DATA_ROOT = '../../data'
train_df = pd.read_csv(f'{DATA_ROOT}/ratings_train_implicit.csv.gz')
test_df = pd.read_csv(f'{DATA_ROOT}/ratings_test_implicit.csv.gz')
full_df = pd.read_csv(f'{DATA_ROOT}/ratings_all_implicit.csv.gz')
features_names = pd.read_csv(f'{DATA_ROOT}/game_features_names.csv.gz').values.flatten()
games_features_df = pd.read_json(f'{DATA_ROOT}/bgg_GameItem.jl', lines=True)\
[['bgg_id', 'mechanic', 'category', 'complexity', 'max_players_best', 'min_players_best', 'max_players_rec', 'min_players_rec']]

In [3]:
users_profiles = pd.read_pickle(f'{DATA_ROOT}/users_profiles.pkl')
test_users_profiles = pd.read_pickle(f'{DATA_ROOT}/test_users_profiles.pkl')
games_profiles = pd.read_pickle(f'{DATA_ROOT}/games_profiles.pkl')

In [4]:
mechanics_names = features_names[:20]
categories_names = features_names[20:40]

## LightFM model

In [5]:
dataset = pd.read_pickle(f'{DATA_ROOT}/dataset.pkl')
item_features = pd.read_pickle(f'{DATA_ROOT}/item_features.pkl')
train_interactions = scipy.sparse.load_npz(f'{DATA_ROOT}/train_interactions.npz')
test_interactions = scipy.sparse.load_npz(f'{DATA_ROOT}/test_interactions.npz')

In [6]:
model = pd.read_pickle('../notebooks_data/warp_model.pkl')
test_class = LightFMTests(model=model,
                      train_df=train_df,
                      dataset=dataset,
                      train_interactions=train_interactions,
                      test_interactions=test_interactions,
                      item_features=item_features)

In [7]:
train_counts = train_df['bgg_user_name'].value_counts().rename('train')
test_counts = test_df['bgg_user_name'].value_counts().rename('test')

df = pd.concat((train_counts, test_counts), axis=1).dropna()
users = df[(df['train'] >= 10) & (df['test'] >= 5)].index.values

In [8]:
len(users)

64832

In [9]:
DEBUG = False
random_users = np.random.choice(users, size=100, replace=False)
test_metric_scores = []
random_metric_scores = []
recommended_metric_scores = []
random_users_games = test_df.loc[test_df['bgg_user_name'].isin(random_users)]
test_games_grouped = random_users_games .groupby('bgg_user_name')["bgg_id"]
tqdm.pandas()

for user, test_games in tqdm(test_games_grouped):
    user_profile = users_profiles.loc[user]
    num_recs = len(test_games)
    test_games = pd.DataFrame({"bgg_user_name": user, "bgg_id": test_games})
    test_games_profiles = create_recommendations_profiles_embeddings(test_games, num_recs, games_features_df, categories_names, mechanics_names, 10)
    metric = calculate_metric_scores(test_games_profiles, user_profile)
    test_metric_scores.append(metric)
    if DEBUG:
        print("Test_games: ", metric[-1])

    random_games = train_df.sample(5)
    random_games_profiles = create_recommendations_profiles_embeddings(random_games, 5, games_features_df, categories_names, mechanics_names, 10)
    metric = calculate_metric_scores(random_games_profiles, user_profile)
    random_metric_scores.append(metric)
    if DEBUG:
        print("Random_games: ", metric[-1])

    recommended_games = test_class.get_top_n(
        users_inner_id_subset=[dataset.mapping()[0][user]],
        n=5,
    )
    recommended_games_profiles = create_recommendations_profiles_embeddings(recommended_games, 5, games_features_df, categories_names, mechanics_names, 10)
    metric = calculate_metric_scores(recommended_games_profiles, user_profile)
    recommended_metric_scores.append(metric)
    if DEBUG:
        print("Recommended games: ", metric[-1])
print("Test games: ", np.array(test_metric_scores).mean(axis = 0)[-1])
print("Random games: ", np.array(random_metric_scores).mean(axis = 0)[-1])
print("Recommended games: ", np.array(recommended_metric_scores).mean(axis = 0)[-1])

  0%|          | 0/5000 [00:00<?, ?it/s]

Test games:  1.4851917212562356
Random games:  0.3052717159612862
Recommended games:  1.6059528218575938


# Evaluation on strongly defined users

In [32]:
USERS_TO_ADD = [
    # Family: Catan, Monopoly, UNO, Ticket to Ride, Scrabble + Cluedo, Carcassone, Splendor 
    {"bgg_user_name": "USER_1", "train": [13, 1406, 2223, 9209, 320], "test": [130592, 822, 148228]},
    # Complex games: Game of Thrones, War of the Ring, Twilight Imperium (3rd edition),
    # Blood Rage, Scythe + Star Wars: Rebellion, Root, Battlestar Galactica
    {"bgg_user_name": "USER_2", "train": [103343, 2228, 12493, 170216, 169786], "test": [187645, 237182, 37111]},
    # Worker placement: Viticulture, Agricola, Feast for Odin, West Kingdom 1, 3, + 2, Stone Age, Caylus
    {"bgg_user_name": "USER_3", "train": [183394, 31260, 177736, 236457, 296151], "test": [266810, 34635, 18602]},
    # Ameritrash: Runewars, Arkham Horror, Starcraft + Last Night on Earth, Talisman, Divine Right 
    {"bgg_user_name": "USER_4", "train": [59294, 15987, 22827], "test": [29368, 27627, 23]},
    # Eurogames: Catan, Puerto Rico, Carcassone + Ra, El Grande, Five Tribes
    {"bgg_user_name": "USER_5", "train": [13, 3076, 822], "test": [12, 93, 157354]},
]

In [35]:
for user in USERS_TO_ADD:
    user_games = pd.DataFrame.from_records([user])
    user_train_games = user_games[['bgg_user_name', 'train']].explode('train').rename(columns = {"train": "bgg_id"})
    user_test_games = user_games[['bgg_user_name', 'test']].explode('test').rename(columns = {"test": "bgg_id"})
    n_recs = len(user_test_games)
    user_profile = create_users_profiles_embeddings(user_train_games, games_features_df, categories_names, mechanics_names, 10)
    similar_games_profiles = create_recommendations_profiles_embeddings(user_test_games, n_recs, games_features_df, categories_names, mechanics_names, 10)
    metric = calculate_metric_scores(similar_games_profiles, user_profile)
    print("Similar games: ", metric)
    random_games = train_df.sample(n_recs)
    random_games_profiles = create_recommendations_profiles_embeddings(random_games, n_recs, games_features_df, categories_names, mechanics_names, 10)
    metric = calculate_metric_scores(random_games_profiles, user_profile)
    print("Random games: ", metric)
    print("--------")

Similar games:  [0.25       0.14583333 0.38095238 0.93945532 1.71624104]
Random games:  [0.05555556 0.04166667 0.17460317 0.14965318 0.42147858]
--------
Similar games:  [0.43589744 0.4        0.2        0.88510251 1.92099994]
Random games:  [0.1025641  0.09259259 0.06666667 0.64208555 0.90390891]
--------
Similar games:  [0.5        0.06060606 0.33333333 0.71553873 1.60947812]
Random games:  [0.         0.08080808 0.22222222 0.22123458 0.52426489]
--------
Similar games:  [0.48148148 0.35555556 0.66666667 0.19043728 1.69414098]
Random games:  [0.03703704 0.02222222 0.         0.         0.05925926]
--------
Similar games:  [0.         0.11111111 0.33333333 0.80098784 1.24543228]
Random games:  [0.         0.01851852 0.25925926 0.35811306 0.63589084]
--------


In [41]:
# [12, 93, 157354]

games_features_df[games_features_df['bgg_id'] == 157354]

Unnamed: 0,bgg_id,mechanic,category,complexity,max_players_best,min_players_best,max_players_rec,min_players_rec
61558,157354,"[Auction/Bidding:2012, Constrained Bidding:292...","[Animals:1089, Arabian:1052, Fantasy:1010, Myt...",2.8542,2.0,2.0,4.0,2.0
