In [1]:
import sys
import os
sys.path.append(os.path.join(os.path.abspath(''), '../'))

from evaluation.evaluation_harness import TrainEvaluator, include_learnable_users, include_all_users
from pprint import pprint

from dataset.data_loader import DataLoader, playtime_forever_users_games_edge_scoring_function, LinearNormalizer, GaussianNormalizer, PercentileNormalizer, never_remove_edge, remove_zero_playtime_edge
from models.random_model import RandomModel
from models.common_neighbors_model import CommonNeighbors
from models.popularity_model import GamePopularityModel
from models.ncf_model import NCFModel
from models.graph_sage_model import GraphSAGE

In [None]:
def get_evaluator_metrics(evaluator):
    evaluator.plot_top_N_recall_percentiles(10)
    evaluator.plot_log_user_percentile_positional_error()
    evaluator.compute_mean_positional_error()
    evaluator.compute_top_N_recall(10)
    evaluator.plot_roc_curve()
    evaluator.compute_auc_roc()
    evaluator.plot_user_rank_roc_curve()
    evaluator.compute_user_rank_auc_roc()
    evaluator.plot_top_N_hit_percentage_percentiles(10)
    evaluator.compute_top_N_hit_percentage(10)
    evaluator.plot_top_N_hit_percentage_percentiles(50)
    evaluator.compute_top_N_hit_percentage(50)
    evaluator.compute_top_N_recall(50)
    evaluator.plot_top_N_recall_percentiles(50)

In [2]:
data_loader = DataLoader(cache_local_dataset=True, users_games_edge_scoring_function = playtime_forever_users_games_edge_scoring_function, user_game_edge_embeddings = ['playtime_forever'], score_normalizers = [GaussianNormalizer(1.0, 1.0)])
data_loader.load_random_edge_train_test_split(train_percentage=0.8, test_percentage=0.2, seed=0)
data_loader.save_data_loader_parameters('test_evaluation_data_loader', overwrite=True)
evaluator = TrainEvaluator(data_loader, top_N_games_to_eval = 500, num_users_to_eval=10000, user_eval_include_function=include_learnable_users, seed=0, debug=True)

In [3]:
random_model_initializer = lambda: RandomModel()
evaluator.reset(random_model_initializer, network_save_file = 'evaluation_test_random_model')
get_evaluator_metrics(evaluator)
evaluator.save_metrics('evaluation_test_random_model_learnable_users', overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

Loading model: evaluation_test_random_model
Doen loading model. evaluation_test_random_model
Getting predictions.


User Predictions: 100%|██████████| 10000/10000 [01:09<00:00, 143.64it/s]


Done getting predictions.
Appending dataframe information.
Ranking top N.
Done getting edge results.
random
{'auc_roc': 0.43810692331704054,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 89.70599649385875,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage': 0.01022539857064321,
 'top_10_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage_variance': 0.00164374489338948,
 'top_10_recall': 0.00037955465587044535,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall_variance': 1.3764041825249321e-06,
 'top_50_hit_percentage': 0.009587685541506323,
 'top_50_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_hit_percentage_variance': 0.0008307902033865941,
 'top_50_recall': 0.001779417526446389,
 'top_50_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_recall_var

In [4]:
popularity_model_initializer = lambda: GamePopularityModel()
evaluator.reset(popularity_model_initializer, network_save_file = 'evaluation_test_popularity_model')
get_evaluator_metrics(evaluator)
evaluator.save_metrics('evaluation_test_popularity_model_learnable_users', overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

Loading model: evaluation_test_popularity_model
Doen loading model. evaluation_test_popularity_model
Getting predictions.


User Predictions: 100%|██████████| 10000/10000 [00:44<00:00, 222.83it/s]


Done getting predictions.
Appending dataframe information.
Ranking top N.
Done getting edge results.
game_popularity
{'auc_roc': 0.5292112223438967,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 77.60484166685433,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage': 0.36140736668499174,
 'top_10_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage_variance': 0.043834623751325956,
 'top_10_recall': 0.01341501240694789,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall_variance': 0.0003345151887539184,
 'top_50_hit_percentage': 0.2273556899395272,
 'top_50_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_hit_percentage_variance': 0.01889079641810728,
 'top_50_recall': 0.042195866527360584,
 'top_50_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_recall_va

In [5]:
common_neighbors_initializer = lambda: CommonNeighbors()
evaluator.reset(common_neighbors_initializer, network_save_file = 'evaluation_test_common_neighbors')
get_evaluator_metrics(evaluator)
evaluator.save_metrics('evaluation_test_common_neighbors_learnable_users', overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

Loading model: evaluation_test_common_neighbors
Doen loading model. evaluation_test_common_neighbors
Getting predictions.


User Predictions: 100%|██████████| 10000/10000 [02:59<00:00, 55.64it/s]


Done getting predictions.
Appending dataframe information.
Ranking top N.
Done getting edge results.
common_neighbors
{'auc_roc': 0.7480515654732455,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 72.84980013278626,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage': 0.3377680043980209,
 'top_10_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage_variance': 0.03986768821697541,
 'top_10_recall': 0.012537547342301163,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall_variance': 0.00029634629979166964,
 'top_50_hit_percentage': 0.2412314458493678,
 'top_50_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_hit_percentage_variance': 0.022383336573957517,
 'top_50_recall': 0.04477112446127726,
 'top_50_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_recall_v

In [6]:
save_file_name = 'evaluation_test_cf_low_weight_decay_increased_lr_best_model_bugfix'
cf_initializer = lambda: NCFModel(num_epochs = 40, embedding_size = 50, batch_percent = 0.1, learning_rate = 1e-2, weight_decay=1e-6, mlp_hidden_layer_sizes = [], seed=0, model_type='cf', fine_tune_num_epochs=1, fine_tune_learning_rate=1e-1, fine_tune_weight_decay=1e-5, save_file_name=save_file_name, nn_save_name='best')
evaluator.reset(cf_initializer, network_save_file = save_file_name)
get_evaluator_metrics(evaluator)
evaluator.save_metrics(f'{save_file_name}_learnable_users', overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

Loading model: evaluation_test_cf_low_weight_decay_increased_lr_best_model_bugfix
Doen loading model. evaluation_test_cf_low_weight_decay_increased_lr_best_model_bugfix
Getting predictions.


User Predictions: 100%|██████████| 10000/10000 [06:27<00:00, 25.83it/s]


Done getting predictions.
Appending dataframe information.
Ranking top N.
Done getting edge results.
neural_collborative_filtering_cf
{'auc_roc': 0.6238204020226044,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 57.508324852765995,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage': 0.09406267179769104,
 'top_10_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage_variance': 0.010113737344227484,
 'top_10_recall': 0.003491494710722215,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall_variance': 4.8222673505925964e-05,
 'top_50_hit_percentage': 0.04825728422210006,
 'top_50_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_hit_percentage_variance': 0.00242266302597285,
 'top_50_recall': 0.008956265508684863,
 'top_50_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes

In [7]:
save_file_name = 'evaluation_test_gcf_low_weight_decay_increased_lr_best_model_bugfix'
gcf_initializer = lambda: NCFModel(num_epochs = 40, embedding_size = 50, batch_percent = 0.1, learning_rate = 1e-2, weight_decay=1e-6, mlp_hidden_layer_sizes = [], seed=0, model_type='gcf', fine_tune_num_epochs=1, fine_tune_learning_rate=1e-1, fine_tune_weight_decay=1e-5, save_file_name=save_file_name, nn_save_name='best')
evaluator.reset(gcf_initializer, network_save_file = save_file_name)
get_evaluator_metrics(evaluator)
evaluator.save_metrics(f'{save_file_name}_learnable_users', overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

Loading model: evaluation_test_gcf_low_weight_decay_increased_lr_best_model_bugfix
Doen loading model. evaluation_test_gcf_low_weight_decay_increased_lr_best_model_bugfix
Getting predictions.


User Predictions: 100%|██████████| 10000/10000 [06:20<00:00, 26.31it/s]


Done getting predictions.
Appending dataframe information.
Ranking top N.
Done getting edge results.
neural_collborative_filtering_gcf
{'auc_roc': 0.7133159810114924,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 60.33040723358727,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage': 0.1394172622319956,
 'top_10_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage_variance': 0.01800196948593472,
 'top_10_recall': 0.005175003264986287,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall_variance': 6.38233178002083e-05,
 'top_50_hit_percentage': 0.08387025838372732,
 'top_50_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_hit_percentage_variance': 0.007838213383862515,
 'top_50_recall': 0.01556582212354708,
 'top_50_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 

In [8]:
save_file_name = 'evaluation_test_mlp_low_weight_decay_increased_lr_best_model_bugfix'
mlp_initializer = lambda: NCFModel(num_epochs = 40, embedding_size = 50, batch_percent = 0.1, learning_rate = 1e-2, weight_decay=1e-6, mlp_hidden_layer_sizes = [128, 128, 128], seed=0, model_type='mlp', fine_tune_num_epochs=1, fine_tune_learning_rate=1e-1, fine_tune_weight_decay=1e-5, save_file_name=save_file_name, nn_save_name='best')
evaluator.reset(mlp_initializer, network_save_file = save_file_name)
get_evaluator_metrics(evaluator)
evaluator.save_metrics(f'{save_file_name}_learnable_users', overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

Loading model: evaluation_test_mlp_low_weight_decay_increased_lr_best_model_bugfix
Doen loading model. evaluation_test_mlp_low_weight_decay_increased_lr_best_model_bugfix
Getting predictions.


User Predictions: 100%|██████████| 10000/10000 [06:34<00:00, 25.32it/s]


Done getting predictions.
Appending dataframe information.
Ranking top N.
Done getting edge results.
neural_collborative_filtering_mlp
{'auc_roc': 0.5957500798915274,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 55.424050392386036,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage': 0.06476085761407367,
 'top_10_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage_variance': 0.006295657250570246,
 'top_10_recall': 0.002403846153846154,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall_variance': 2.7174206776484547e-05,
 'top_50_hit_percentage': 0.04690489279824079,
 'top_50_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_hit_percentage_variance': 0.0019239763749211846,
 'top_50_recall': 0.008705269687867312,
 'top_50_recall_user_percentiles_figure': <Figure size 640x480 with 1 A

In [9]:
save_file_name = 'evaluation_test_ncf_low_weight_decay_increased_lr_best_model_bugfix'
ncf_initializer = lambda: NCFModel(num_epochs = 40, embedding_size = 25, batch_percent = 0.1, learning_rate = 1e-2, weight_decay=1e-6, mlp_hidden_layer_sizes = [128, 128, 128], seed=0, model_type='ncf', fine_tune_num_epochs=1, fine_tune_learning_rate=1e-1, fine_tune_weight_decay=1e-5, save_file_name=save_file_name, nn_save_name='best')
evaluator.reset(ncf_initializer, network_save_file = save_file_name)
get_evaluator_metrics(evaluator)
evaluator.save_metrics(f'{save_file_name}_learnable_users', overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

Loading model: evaluation_test_ncf_low_weight_decay_increased_lr_best_model_bugfix
Doen loading model. evaluation_test_ncf_low_weight_decay_increased_lr_best_model_bugfix
Getting predictions.


User Predictions: 100%|██████████| 10000/10000 [06:52<00:00, 24.24it/s]


Done getting predictions.
Appending dataframe information.
Ranking top N.
Done getting edge results.
neural_collborative_filtering_ncf
{'auc_roc': 0.5833818248975059,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 54.31967769856091,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage': 0.06432105552501374,
 'top_10_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage_variance': 0.006506370060791373,
 'top_10_recall': 0.002387521222410866,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall_variance': 2.8831103648955452e-05,
 'top_50_hit_percentage': 0.04697086311159978,
 'top_50_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_hit_percentage_variance': 0.002063359385889306,
 'top_50_recall': 0.008717513386443778,
 'top_50_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axe

In [None]:
save_file_name = 'evaluation_test_graphsage'
graphsage_initializer = lambda: GraphSAGE(
    hidden_channels=[32, 64],  
    out_channels=16,
    num_epochs=20,
    learning_rate=0.01,
    regularization=0.05,
)
evaluator.reset(graphsage_initializer, network_save_file = save_file_name)
get_evaluator_metrics(evaluator)
evaluator.save_metrics(save_file_name, overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()