In [1]:
import sys
import os
sys.path.append(os.path.join(os.path.abspath(''), '../'))

from evaluation.evaluation_harness import TrainEvaluator
from pprint import pprint

from dataset.data_loader import DataLoader, playtime_forever_users_games_edge_scoring_function, LinearNormalizer, GaussianNormalizer, PercentileNormalizer, never_remove_edge, remove_zero_playtime_edge
from models.random_model import RandomModel
from models.common_neighbors_model import CommonNeighbors
from models.popularity_model import GamePopularityModel
from models.ncf_model import NCFModel

In [2]:
data_loader = DataLoader(cache_local_dataset=True, users_games_edge_scoring_function = playtime_forever_users_games_edge_scoring_function, user_game_edge_embeddings = ['playtime_forever'], score_normalizers = [GaussianNormalizer(1.0, 1.0)])
data_loader.load_random_edge_train_test_split(train_percentage=0.8, test_percentage=0.2, seed=0)

evaluator = TrainEvaluator(data_loader, top_N_games_to_eval = 500, num_users_to_eval=10000, seed=0, debug=True)

In [4]:
random_model_initializer = lambda: RandomModel()
evaluator.reset(random_model_initializer, network_save_file = 'evaluation_test_random_model')
evaluator.plot_top_N_recall_percentiles(10)
evaluator.plot_log_user_percentile_positional_error()
evaluator.compute_mean_positional_error()
evaluator.compute_top_N_recall(10)
evaluator.plot_roc_curve()
evaluator.compute_auc_roc()
evaluator.plot_user_rank_roc_curve()
evaluator.compute_user_rank_auc_roc()
evaluator.plot_top_N_hit_percentage_percentiles(10)
evaluator.compute_top_N_hit_percentage(10)
evaluator.plot_top_N_hit_percentage_percentiles(50)
evaluator.compute_top_N_hit_percentage(50)
evaluator.compute_top_N_recall(50)
evaluator.plot_top_N_recall_percentiles(50)
evaluator.save_metrics('test_random_model', overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

Loading model: evaluation_test_random_model
Doen loading model. evaluation_test_random_model
Getting predictions.


User Predictions: 100%|██████████| 10000/10000 [03:02<00:00, 54.90it/s]


Done getting predictions.
Appending dataframe information.
Ranking top N.
Done getting edge results.
random
{'auc_roc': 0.43892371584260326,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 20.88192135711662,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall': 0.0003529251613792315,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'user_rank_auc_roc': 0.5003802848169288,
 'user_rank_roc_figure': <Figure size 640x480 with 1 Axes>}



In [3]:
popularity_model_initializer = lambda: GamePopularityModel()
evaluator.reset(popularity_model_initializer, network_save_file = 'evaluation_test_popularity_model')
evaluator.plot_top_N_recall_percentiles(10)
evaluator.plot_log_user_percentile_positional_error()
evaluator.compute_mean_positional_error()
evaluator.compute_top_N_recall(10)
evaluator.plot_roc_curve()
evaluator.compute_auc_roc()
evaluator.plot_user_rank_roc_curve()
evaluator.compute_user_rank_auc_roc()
evaluator.plot_top_N_hit_percentage_percentiles(10)
evaluator.compute_top_N_hit_percentage(10)
evaluator.plot_top_N_hit_percentage_percentiles(50)
evaluator.compute_top_N_hit_percentage(50)
evaluator.compute_top_N_recall(50)
evaluator.plot_top_N_recall_percentiles(50)
evaluator.save_metrics('test_popularity_model', overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

Loading model: evaluation_test_popularity_model
Doen loading model. evaluation_test_popularity_model
Getting predictions.


User Predictions:   0%|          | 0/10000 [00:00<?, ?it/s]

User Predictions: 100%|██████████| 10000/10000 [01:24<00:00, 117.69it/s]


Done getting predictions.
Appending dataframe information.
Ranking top N.
Done getting edge results.
game_popularity
{'auc_roc': 0.5515575916825022,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 18.282347335138635,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall': 0.03173301379601262,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'user_rank_auc_roc': 0.6601328958203395,
 'user_rank_roc_figure': <Figure size 640x480 with 1 Axes>}



In [5]:
common_neighbors_initializer = lambda: CommonNeighbors()
evaluator.reset(common_neighbors_initializer, network_save_file = 'evaluation_test_common_neighbors')
evaluator.plot_top_N_recall_percentiles(10)
evaluator.plot_log_user_percentile_positional_error()
evaluator.compute_mean_positional_error()
evaluator.compute_top_N_recall(10)
evaluator.plot_roc_curve()
evaluator.compute_auc_roc()
evaluator.plot_user_rank_roc_curve()
evaluator.compute_user_rank_auc_roc()
evaluator.plot_top_N_hit_percentage_percentiles(10)
evaluator.compute_top_N_hit_percentage(10)
evaluator.plot_top_N_hit_percentage_percentiles(50)
evaluator.compute_top_N_hit_percentage(50)
evaluator.compute_top_N_recall(50)
evaluator.plot_top_N_recall_percentiles(50)
evaluator.save_metrics('test_common_neighbors', overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

Loading model: evaluation_test_common_neighbors
Doen loading model. evaluation_test_common_neighbors
Getting predictions.


User Predictions: 100%|██████████| 10000/10000 [11:49<00:00, 14.09it/s]


Done getting predictions.
Appending dataframe information.
Ranking top N.
Done getting edge results.
common_neighbors
{'auc_roc': 0.8613416577261556,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 17.23173562413738,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall': 0.029543197199454814,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'user_rank_auc_roc': 0.6511261844784793,
 'user_rank_roc_figure': <Figure size 640x480 with 1 Axes>}



In [4]:
cf_initializer = lambda: NCFModel(num_epochs = 40, embedding_size = 100, batch_percent = 0.1, learning_rate = 1e-2, weight_decay=1e-6, mlp_hidden_layer_sizes = [], seed=0, model_type='cf', fine_tune_num_epochs=1, fine_tune_learning_rate=1e-1, fine_tune_weight_decay=1e-5)
evaluator.reset(cf_initializer, network_save_file = 'evaluation_test_cf_low_weight_decay')
evaluator.plot_top_N_recall_percentiles(10)
evaluator.plot_log_user_percentile_positional_error()
evaluator.compute_mean_positional_error()
evaluator.compute_top_N_recall(10)
evaluator.plot_roc_curve()
evaluator.compute_auc_roc()
evaluator.plot_user_rank_roc_curve()
evaluator.compute_user_rank_auc_roc()
evaluator.plot_top_N_hit_percentage_percentiles(10)
evaluator.compute_top_N_hit_percentage(10)
evaluator.plot_top_N_hit_percentage_percentiles(50)
evaluator.compute_top_N_hit_percentage(50)
evaluator.compute_top_N_recall(50)
evaluator.plot_top_N_recall_percentiles(50)
evaluator.save_metrics('test_cf_low_weight_decay', overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

Preparing model.
Known Game Embeddings:  []
Known User Embeddings:  []
Total Learnable Parameters: 11340200


Training: 100%|██████████| 40/40 [04:18<00:00,  6.46s/it]


Done preparing model.
Getting predictions.


User Predictions: 100%|██████████| 10000/10000 [14:57<00:00, 11.14it/s]


Done getting predictions.
Appending dataframe information.
Ranking top N.
Done getting edge results.
neural_collborative_filtering_cf
{'auc_roc': 0.7340210416150023,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 14.421820158164929,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage': 0.06012,
 'top_10_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall': 0.010103743191485428,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_hit_percentage': 0.02493795012409975,
 'top_50_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_recall': 0.020955351605893513,
 'top_50_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'user_rank_auc_roc': 0.6114655030547682,
 'user_rank_roc_figure': <Figure size 640x480 with 1 Axes>}



In [5]:
gcf_initializer = lambda: NCFModel(num_epochs = 40, embedding_size = 100, batch_percent = 0.1, learning_rate = 1e-3, weight_decay=1e-6, mlp_hidden_layer_sizes = [], seed=0, model_type='gcf', fine_tune_num_epochs=1, fine_tune_learning_rate=1e-1, fine_tune_weight_decay=1e-5)
evaluator.reset(gcf_initializer, network_save_file = 'evaluation_test_gcf_low_weight_decay')
evaluator.plot_top_N_recall_percentiles(10)
evaluator.plot_log_user_percentile_positional_error()
evaluator.compute_mean_positional_error()
evaluator.compute_top_N_recall(10)
evaluator.plot_roc_curve()
evaluator.compute_auc_roc()
evaluator.plot_user_rank_roc_curve()
evaluator.compute_user_rank_auc_roc()
evaluator.plot_top_N_hit_percentage_percentiles(10)
evaluator.compute_top_N_hit_percentage(10)
evaluator.plot_top_N_hit_percentage_percentiles(50)
evaluator.compute_top_N_hit_percentage(50)
evaluator.compute_top_N_recall(50)
evaluator.plot_top_N_recall_percentiles(50)
evaluator.save_metrics('test_gcf_low_weight_decay', overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

Preparing model.
Known Game Embeddings:  []
Known User Embeddings:  []
Total Learnable Parameters: 11340301


Training: 100%|██████████| 40/40 [18:15<00:00, 27.38s/it]


Done preparing model.
Getting predictions.


User Predictions: 100%|██████████| 10000/10000 [14:31<00:00, 11.47it/s]


Done getting predictions.
Appending dataframe information.
Ranking top N.
Done getting edge results.
neural_collborative_filtering_gcf
{'auc_roc': 0.7016251268778072,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 18.614020300150695,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage': 0.018589814101858983,
 'top_10_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall': 0.003124227976209483,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_hit_percentage': 0.011064,
 'top_50_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_recall': 0.0092970571083329,
 'top_50_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'user_rank_auc_roc': 0.5680052218503666,
 'user_rank_roc_figure': <Figure size 640x480 with 1 Axes>}



In [6]:
mlp_initializer = lambda: NCFModel(num_epochs = 40, embedding_size = 100, batch_percent = 0.1, learning_rate = 1e-3, weight_decay=1e-6, mlp_hidden_layer_sizes = [128, 128, 128], seed=0, model_type='mlp', fine_tune_num_epochs=1, fine_tune_learning_rate=1e-1, fine_tune_weight_decay=1e-5)
evaluator.reset(mlp_initializer, network_save_file = 'evaluation_test_mlp_low_weight_decay')
evaluator.plot_top_N_recall_percentiles(10)
evaluator.plot_log_user_percentile_positional_error()
evaluator.compute_mean_positional_error()
evaluator.compute_top_N_recall(10)
evaluator.plot_roc_curve()
evaluator.compute_auc_roc()
evaluator.plot_user_rank_roc_curve()
evaluator.compute_user_rank_auc_roc()
evaluator.plot_top_N_hit_percentage_percentiles(10)
evaluator.compute_top_N_hit_percentage(10)
evaluator.plot_top_N_hit_percentage_percentiles(50)
evaluator.compute_top_N_hit_percentage(50)
evaluator.compute_top_N_recall(50)
evaluator.plot_top_N_recall_percentiles(50)
evaluator.save_metrics('test_mlp_low_weight_decay', overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

Preparing model.
Known Game Embeddings:  []
Known User Embeddings:  []
Total Learnable Parameters: 11399081


Training: 100%|██████████| 40/40 [54:05<00:00, 81.14s/it] 


Done preparing model.
Getting predictions.


User Predictions: 100%|██████████| 10000/10000 [12:46<00:00, 13.05it/s]


Done getting predictions.
Appending dataframe information.
Ranking top N.
Done getting edge results.
neural_collborative_filtering_mlp
{'auc_roc': 0.6683662616649821,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 16.025783472765504,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage': 0.03668,
 'top_10_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall': 0.006164426152090578,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_hit_percentage': 0.011698,
 'top_50_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_recall': 0.009829806042414882,
 'top_50_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'user_rank_auc_roc': 0.6229606684785438,
 'user_rank_roc_figure': <Figure size 640x480 with 1 Axes>}



In [3]:
ncf_initializer = lambda: NCFModel(num_epochs = 40, embedding_size = 100, batch_percent = 0.1, learning_rate = 1e-3, weight_decay=1e-6, mlp_hidden_layer_sizes = [128, 128, 128], seed=0, model_type='ncf', fine_tune_num_epochs=1, fine_tune_learning_rate=1e-1, fine_tune_weight_decay=1e-5)
evaluator.reset(ncf_initializer, network_save_file = 'evaluation_test_ncf_low_weight_decay')
evaluator.plot_top_N_recall_percentiles(10)
evaluator.plot_log_user_percentile_positional_error()
evaluator.compute_mean_positional_error()
evaluator.compute_top_N_recall(10)
evaluator.plot_roc_curve()
evaluator.compute_auc_roc()
evaluator.plot_user_rank_roc_curve()
evaluator.compute_user_rank_auc_roc()
evaluator.plot_top_N_hit_percentage_percentiles(10)
evaluator.compute_top_N_hit_percentage(10)
evaluator.plot_top_N_hit_percentage_percentiles(50)
evaluator.compute_top_N_hit_percentage(50)
evaluator.compute_top_N_recall(50)
evaluator.plot_top_N_recall_percentiles(50)
evaluator.save_metrics('test_ncf_low_weight_decay', overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

Preparing model.
Known Game Embeddings:  []
Known User Embeddings:  []
Total Learnable Parameters: 22739381


Training: 100%|██████████| 40/40 [1:01:29<00:00, 92.25s/it] 


Done preparing model.
Getting predictions.


User Predictions: 100%|██████████| 10000/10000 [17:51<00:00,  9.33it/s] 


Done getting predictions.
Appending dataframe information.
Ranking top N.
Done getting edge results.
neural_collborative_filtering_ncf
{'auc_roc': 0.692817107699023,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 15.973808514978835,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage': 0.04527,
 'top_10_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall': 0.007608058121732291,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_hit_percentage': 0.014183971632056735,
 'top_50_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_recall': 0.011918786878578619,
 'top_50_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'user_rank_auc_roc': 0.6395964476603843,
 'user_rank_roc_figure': <Figure size 640x480 with 1 Axes>}

