In [1]:
import sys
import os
sys.path.append(os.path.join(os.path.abspath(''), '../'))

from evaluation.evaluation_harness import TrainEvaluator
from pprint import pprint

from dataset.data_loader import DataLoader, playtime_forever_users_games_edge_scoring_function, LinearNormalizer, GaussianNormalizer, PercentileNormalizer, never_remove_edge, remove_zero_playtime_edge
from models.random_model import RandomModel
from models.common_neighbors_model import CommonNeighbors
from models.popularity_model import GamePopularityModel
from models.ncf_model import NCFModel

In [2]:
data_loader = DataLoader(cache_local_dataset=True, users_games_edge_scoring_function = playtime_forever_users_games_edge_scoring_function, user_game_edge_embeddings = ['playtime_forever'], score_normalizers = [GaussianNormalizer(1.0, 1.0)])
data_loader.load_random_edge_train_test_split(train_percentage=0.8, test_percentage=0.2, seed=0)

evaluator = TrainEvaluator(data_loader, top_N_games_to_eval = 500, num_users_to_eval=10000, seed=0, debug=True)

In [3]:
random_model_initializer = lambda: RandomModel()
evaluator.reset(random_model_initializer, network_save_file = 'evaluation_test_random_model')
evaluator.plot_top_N_recall_percentiles(10)
evaluator.plot_log_user_percentile_positional_error()
evaluator.compute_mean_positional_error()
evaluator.compute_top_N_recall(10)
evaluator.plot_roc_curve()
evaluator.compute_auc_roc()
evaluator.plot_user_based_roc_curve()
evaluator.compute_user_based_auc_roc()
evaluator.save_metrics('test_random_model', overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

Loading model: evaluation_test_random_model
Doen loading model. evaluation_test_random_model
Getting predictions.


User Predictions: 100%|██████████| 10000/10000 [02:50<00:00, 58.65it/s]


Done getting predictions.
Appending dataframe information.
Ranking top N.
Constructing missed edge dataframe.
all test edges: (595027, 6)
missed test edges: (585025, 13)
Ranking missed.
Done getting edge results.
random
{'auc_roc': 0.43892371584260326,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 20.96001299483005,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall': 0.0003529251613792315,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'user_based_auc_roc': 0.49990812338859086,
 'user_based_roc_figure': <Figure size 640x480 with 1 Axes>}



In [4]:
popularity_model_initializer = lambda: GamePopularityModel()
evaluator.reset(popularity_model_initializer, network_save_file = 'evaluation_test_popularity_model')
evaluator.plot_top_N_recall_percentiles(10)
evaluator.plot_log_user_percentile_positional_error()
evaluator.compute_mean_positional_error()
evaluator.compute_top_N_recall(10)
evaluator.plot_roc_curve()
evaluator.compute_auc_roc()
evaluator.plot_user_based_roc_curve()
evaluator.compute_user_based_auc_roc()
evaluator.save_metrics('test_popularity_model', overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

Loading model: evaluation_test_popularity_model
Doen loading model. evaluation_test_popularity_model
Getting predictions.


User Predictions: 100%|██████████| 10000/10000 [01:20<00:00, 123.54it/s]


Done getting predictions.
Appending dataframe information.
Ranking top N.
Constructing missed edge dataframe.
all test edges: (595027, 6)
missed test edges: (427171, 13)
Ranking missed.
Done getting edge results.
game_popularity
{'auc_roc': 0.5516188837728797,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 18.301942615089626,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall': 0.03173301379601262,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'user_based_auc_roc': 0.6596504232912779,
 'user_based_roc_figure': <Figure size 640x480 with 1 Axes>}



In [5]:
common_neighbors_initializer = lambda: CommonNeighbors()
evaluator.reset(common_neighbors_initializer, network_save_file = 'evaluation_test_common_neighbors')
evaluator.plot_top_N_recall_percentiles(10)
evaluator.plot_log_user_percentile_positional_error()
evaluator.compute_mean_positional_error()
evaluator.compute_top_N_recall(10)
evaluator.plot_roc_curve()
evaluator.compute_auc_roc()
evaluator.plot_user_based_roc_curve()
evaluator.compute_user_based_auc_roc()
evaluator.save_metrics('test_common_neighbors', overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

Loading model: evaluation_test_common_neighbors
Doen loading model. evaluation_test_common_neighbors
Getting predictions.


User Predictions:  31%|███       | 3098/10000 [03:36<07:58, 14.42it/s]

In [None]:
cf_initializer = lambda: NCFModel(num_epochs = 40, embedding_size = 100, batch_percent = 0.1, learning_rate = 1e-2, weight_decay=1e-3, mlp_hidden_layer_sizes = [], seed=0, model_type='cf', fine_tune_num_epochs=1, fine_tune_learning_rate=1e-1, fine_tune_weight_decay=1e-5)
evaluator.reset(cf_initializer, network_save_file = 'evaluation_test_cf')
evaluator.plot_top_N_recall_percentiles(10)
evaluator.plot_log_user_percentile_positional_error()
evaluator.compute_mean_positional_error()
evaluator.compute_top_N_recall(10)
evaluator.plot_roc_curve()
evaluator.compute_auc_roc()
evaluator.plot_user_based_roc_curve()
evaluator.compute_user_based_auc_roc()
evaluator.save_metrics('test_cf', overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

In [None]:
gcf_initializer = lambda: NCFModel(num_epochs = 40, embedding_size = 100, batch_percent = 0.1, learning_rate = 1e-3, mlp_hidden_layer_sizes = [], seed=0, model_type='gcf', fine_tune_num_epochs=1, fine_tune_learning_rate=1e-1, fine_tune_weight_decay=1e-5)
evaluator.reset(gcf_initializer, network_save_file = 'evaluation_test_gcf')
evaluator.plot_top_N_recall_percentiles(10)
evaluator.plot_log_user_percentile_positional_error()
evaluator.compute_mean_positional_error()
evaluator.compute_top_N_recall(10)
evaluator.plot_roc_curve()
evaluator.compute_auc_roc()
evaluator.plot_user_based_roc_curve()
evaluator.compute_user_based_auc_roc()
evaluator.save_metrics('test_gcf', overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

In [None]:
mlp_initializer = lambda: NCFModel(num_epochs = 40, embedding_size = 100, batch_percent = 0.1, learning_rate = 1e-3, weight_decay=1e-3, mlp_hidden_layer_sizes = [128, 128, 128], seed=0, model_type='mlp', fine_tune_num_epochs=1, fine_tune_learning_rate=1e-1, fine_tune_weight_decay=1e-5)
evaluator.reset(mlp_initializer, network_save_file = 'evaluation_test_mlp')
evaluator.plot_top_N_recall_percentiles(10)
evaluator.plot_log_user_percentile_positional_error()
evaluator.compute_mean_positional_error()
evaluator.compute_top_N_recall(10)
evaluator.plot_roc_curve()
evaluator.compute_auc_roc()
evaluator.plot_user_based_roc_curve()
evaluator.compute_user_based_auc_roc()
evaluator.save_metrics('test_mlp', overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

In [None]:
ncf_initializer = lambda: NCFModel(num_epochs = 40, embedding_size = 100, batch_percent = 0.1, learning_rate = 1e-3, weight_decay=1e-3, mlp_hidden_layer_sizes = [128, 128, 128], seed=0, model_type='ncf', fine_tune_num_epochs=1, fine_tune_learning_rate=1e-1, fine_tune_weight_decay=1e-5)
evaluator.reset(ncf_initializer, network_save_file = 'evaluation_test_ncf')
evaluator.plot_top_N_recall_percentiles(10)
evaluator.plot_log_user_percentile_positional_error()
evaluator.compute_mean_positional_error()
evaluator.compute_top_N_recall(10)
evaluator.plot_roc_curve()
evaluator.compute_auc_roc()
evaluator.plot_user_based_roc_curve()
evaluator.compute_user_based_auc_roc()
evaluator.save_metrics('test_ncf', overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()