In [1]:
import sys
import os
sys.path.append(os.path.join(os.path.abspath(''), '../'))

from evaluation.evaluation_harness import ColdFineTuneEvaluator
from pprint import pprint

from dataset.data_loader import DataLoader, playtime_forever_users_games_edge_scoring_function, LinearNormalizer, GaussianNormalizer, PercentileNormalizer, never_remove_edge, remove_zero_playtime_edge
from models.random_model import RandomModel
from models.common_neighbors_model import CommonNeighbors
from models.popularity_model import GamePopularityModel
from models.ncf_model import NCFModel

In [2]:
data_loader = DataLoader(cache_local_dataset=True, users_games_edge_scoring_function = playtime_forever_users_games_edge_scoring_function, user_game_edge_embeddings = ['playtime_forever'], score_normalizers = [GaussianNormalizer(1.0, 1.0)])
data_loader.load_random_user_train_tune_test_split(train_user_percentage=0.8, test_user_percentage=0.2, fine_tune_edge_percentage=0.8, test_edge_percentage=0.2, seed=0)
data_loader.save_data_loader_parameters('test_evaluation_cold_fine_tune_data_loader', overwrite=True)
evaluator = ColdFineTuneEvaluator(data_loader, top_N_games_to_eval = 500, num_users_to_eval=10000, seed=842, debug=True)

In [3]:
random_model_initializer = lambda: RandomModel()
evaluator.reset(random_model_initializer, network_save_file = 'evaluation_cold_fine_tune_test_random_model')
evaluator.plot_top_N_recall_percentiles(10)
evaluator.plot_log_user_percentile_positional_error()
evaluator.compute_mean_positional_error()
evaluator.compute_top_N_recall(10)
evaluator.plot_roc_curve()
evaluator.compute_auc_roc()
evaluator.plot_user_rank_roc_curve()
evaluator.compute_user_rank_auc_roc()
evaluator.plot_top_N_hit_percentage_percentiles(10)
evaluator.compute_top_N_hit_percentage(10)
evaluator.plot_top_N_hit_percentage_percentiles(50)
evaluator.compute_top_N_hit_percentage(50)
evaluator.compute_top_N_recall(50)
evaluator.plot_top_N_recall_percentiles(50)
evaluator.save_metrics('evaluation_cold_fine_tune_test_random_model', overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

Preparing model.
Done preparing model.
Getting predictions.


User Predictions: 100%|██████████| 10000/10000 [00:58<00:00, 171.34it/s]


Done getting predictions.
Appending dataframe information.
Ranking top N.
Done getting edge results.
random
{'auc_roc': 0.43498920861717894,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 21.70571676690818,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage': 0.0018136020151133501,
 'top_10_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall': 0.0002945484000294548,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_hit_percentage': 0.0020251889168765745,
 'top_50_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_recall': 0.0016445619001644561,
 'top_50_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'user_rank_auc_roc': 0.4960961798658042,
 'user_rank_roc_figure': <Figure size 640x480 with 1 Axes>}



In [4]:
popularity_model_initializer = lambda: GamePopularityModel()
evaluator.reset(popularity_model_initializer, network_save_file = 'evaluation_cold_fine_tune_test_popularity_model')
evaluator.plot_top_N_recall_percentiles(10)
evaluator.plot_log_user_percentile_positional_error()
evaluator.compute_mean_positional_error()
evaluator.compute_top_N_recall(10)
evaluator.plot_roc_curve()
evaluator.compute_auc_roc()
evaluator.plot_user_rank_roc_curve()
evaluator.compute_user_rank_auc_roc()
evaluator.plot_top_N_hit_percentage_percentiles(10)
evaluator.compute_top_N_hit_percentage(10)
evaluator.plot_top_N_hit_percentage_percentiles(50)
evaluator.compute_top_N_hit_percentage(50)
evaluator.compute_top_N_recall(50)
evaluator.plot_top_N_recall_percentiles(50)
evaluator.save_metrics('evaluation_cold_fine_tune_test_popularity_model', overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

Preparing model.
Done preparing model.
Getting predictions.


User Predictions: 100%|██████████| 10000/10000 [00:40<00:00, 249.76it/s]


Done getting predictions.
Appending dataframe information.
Ranking top N.
Done getting edge results.
game_popularity
{'auc_roc': 0.5434224896217845,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 18.99860283182494,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage': 0.19682619647355165,
 'top_10_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall': 0.031966683303196665,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_hit_percentage': 0.09843531177898905,
 'top_50_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_recall': 0.07993716300799372,
 'top_50_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'user_rank_auc_roc': 0.6605269265172755,
 'user_rank_roc_figure': <Figure size 640x480 with 1 Axes>}



In [5]:
common_neighbors_initializer = lambda: CommonNeighbors()
evaluator.reset(common_neighbors_initializer, network_save_file = 'evaluation_cold_fine_tune_test_common_neighbors')
evaluator.plot_top_N_recall_percentiles(10)
evaluator.plot_log_user_percentile_positional_error()
evaluator.compute_mean_positional_error()
evaluator.compute_top_N_recall(10)
evaluator.plot_roc_curve()
evaluator.compute_auc_roc()
evaluator.plot_user_rank_roc_curve()
evaluator.compute_user_rank_auc_roc()
evaluator.plot_top_N_hit_percentage_percentiles(10)
evaluator.compute_top_N_hit_percentage(10)
evaluator.save_metrics('evaluation_cold_fine_tune_test_common_neighbors', overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

Preparing model.
Done preparing model.
Getting predictions.


User Predictions: 100%|██████████| 10000/10000 [26:21<00:00,  6.32it/s]


Done getting predictions.
Appending dataframe information.
Ranking top N.
Done getting edge results.
common_neighbors
{'auc_roc': 0.8145339123137406,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 17.302017805420554,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage': 0.18556430446194225,
 'top_10_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall': 0.0289230165028923,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'user_rank_auc_roc': 0.6528502239679645,
 'user_rank_roc_figure': <Figure size 640x480 with 1 Axes>}



In [6]:
save_file_name = 'evaluation_cold_fine_tune_test_cf'
cf_initializer = lambda: NCFModel(num_epochs = 40, embedding_size = 50, batch_percent = 0.1, learning_rate = 1e-2, weight_decay=1e-6, mlp_hidden_layer_sizes = [], seed=0, model_type='cf', fine_tune_num_epochs=1, fine_tune_learning_rate=1e-1, fine_tune_weight_decay=1e-5, save_file_name=save_file_name, nn_save_name='best')
evaluator.reset(cf_initializer, network_save_file = save_file_name)
evaluator.plot_top_N_recall_percentiles(10)
evaluator.plot_log_user_percentile_positional_error()
evaluator.compute_mean_positional_error()
evaluator.compute_top_N_recall(10)
evaluator.plot_roc_curve()
evaluator.compute_auc_roc()
evaluator.plot_user_rank_roc_curve()
evaluator.compute_user_rank_auc_roc()
evaluator.plot_top_N_hit_percentage_percentiles(10)
evaluator.compute_top_N_hit_percentage(10)
evaluator.plot_top_N_hit_percentage_percentiles(50)
evaluator.compute_top_N_hit_percentage(50)
evaluator.compute_top_N_recall(50)
evaluator.plot_top_N_recall_percentiles(50)
evaluator.save_metrics(save_file_name, overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

Preparing model.
Known Game Embeddings:  []
Total Learnable Parameters: 4876950


Training: 100%|██████████| 40/40 [02:37<00:00,  3.94s/it]


Done preparing model.
Getting predictions.


User Predictions: 100%|██████████| 10000/10000 [09:33<00:00, 17.44it/s]


Done getting predictions.
Appending dataframe information.
Ranking top N.
Done getting edge results.
neural_collborative_filtering_cf
{'auc_roc': 0.08667092280307143,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 21.937443140116056,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage': 0.002367758186397985,
 'top_10_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall': 0.00038454930003845494,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_hit_percentage': 0.0020654911838790934,
 'top_50_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_recall': 0.0016772895001677288,
 'top_50_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'user_rank_auc_roc': 0.5061805975057887,
 'user_rank_roc_figure': <Figure size 640x480 with 1 Axes>}



In [7]:
save_file_name = 'evaluation_cold_fine_tune_test_gcf'
gcf_initializer = lambda: NCFModel(num_epochs = 40, embedding_size = 50, batch_percent = 0.1, learning_rate = 1e-2, weight_decay=1e-6, mlp_hidden_layer_sizes = [], seed=0, model_type='gcf', fine_tune_num_epochs=1, fine_tune_learning_rate=1e-1, fine_tune_weight_decay=1e-5, save_file_name=save_file_name, nn_save_name='best')
evaluator.reset(gcf_initializer, network_save_file = save_file_name)
evaluator.plot_top_N_recall_percentiles(10)
evaluator.plot_log_user_percentile_positional_error()
evaluator.compute_mean_positional_error()
evaluator.compute_top_N_recall(10)
evaluator.plot_roc_curve()
evaluator.compute_auc_roc()
evaluator.plot_user_rank_roc_curve()
evaluator.compute_user_rank_auc_roc()
evaluator.plot_top_N_hit_percentage_percentiles(10)
evaluator.compute_top_N_hit_percentage(10)
evaluator.plot_top_N_hit_percentage_percentiles(50)
evaluator.compute_top_N_hit_percentage(50)
evaluator.compute_top_N_recall(50)
evaluator.plot_top_N_recall_percentiles(50)
evaluator.save_metrics(save_file_name, overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

Preparing model.
Known Game Embeddings:  []
Total Learnable Parameters: 4877001


Training: 100%|██████████| 40/40 [06:45<00:00, 10.13s/it]


Done preparing model.
Getting predictions.


User Predictions: 100%|██████████| 10000/10000 [10:39<00:00, 15.64it/s]


Done getting predictions.
Appending dataframe information.
Ranking top N.
Done getting edge results.
neural_collborative_filtering_gcf
{'auc_roc': 0.4175088141567461,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 21.3792070147687,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage': 0.0024181360201511333,
 'top_10_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall': 0.00039273120003927313,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_hit_percentage': 0.003748110831234257,
 'top_50_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_recall': 0.0030436668003043666,
 'top_50_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'user_rank_auc_roc': 0.4981494178793478,
 'user_rank_roc_figure': <Figure size 640x480 with 1 Axes>}



In [8]:
save_file_name = 'evaluation_cold_fine_tune_test_mlp'
mlp_initializer = lambda: NCFModel(num_epochs = 40, embedding_size = 50, batch_percent = 0.1, learning_rate = 1e-2, weight_decay=1e-6, mlp_hidden_layer_sizes = [128, 128, 128], seed=0, model_type='mlp', fine_tune_num_epochs=1, fine_tune_learning_rate=1e-1, fine_tune_weight_decay=1e-5, save_file_name=save_file_name, nn_save_name='best')
evaluator.reset(mlp_initializer, network_save_file = save_file_name)
evaluator.plot_top_N_recall_percentiles(10)
evaluator.plot_log_user_percentile_positional_error()
evaluator.compute_mean_positional_error()
evaluator.compute_top_N_recall(10)
evaluator.plot_roc_curve()
evaluator.compute_auc_roc()
evaluator.plot_user_rank_roc_curve()
evaluator.compute_user_rank_auc_roc()
evaluator.plot_top_N_hit_percentage_percentiles(10)
evaluator.compute_top_N_hit_percentage(10)
evaluator.plot_top_N_hit_percentage_percentiles(50)
evaluator.compute_top_N_hit_percentage(50)
evaluator.compute_top_N_recall(50)
evaluator.plot_top_N_recall_percentiles(50)
evaluator.save_metrics(save_file_name, overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

Preparing model.
Known Game Embeddings:  []
Total Learnable Parameters: 4923031


Training: 100%|██████████| 40/40 [35:58<00:00, 53.95s/it]


Done preparing model.
Getting predictions.


User Predictions: 100%|██████████| 10000/10000 [11:43<00:00, 14.22it/s]


Done getting predictions.
Appending dataframe information.
Ranking top N.
Done getting edge results.
neural_collborative_filtering_mlp
{'auc_roc': 0.47071648153148316,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 21.598368581993203,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage': 0.000755629439322956,
 'top_10_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall': 0.00012272850001227286,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_hit_percentage': 0.0017027536246486182,
 'top_50_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_recall': 0.001382741100138274,
 'top_50_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'user_rank_auc_roc': 0.49000379345968487,
 'user_rank_roc_figure': <Figure size 640x480 with 1 Axes>}



In [9]:
save_file_name = 'evaluation_cold_fine_tune_test_ncf'
ncf_initializer = lambda: NCFModel(num_epochs = 40, embedding_size = 25, batch_percent = 0.1, learning_rate = 1e-2, weight_decay=1e-6, mlp_hidden_layer_sizes = [128, 128, 128], seed=0, model_type='ncf', fine_tune_num_epochs=1, fine_tune_learning_rate=1e-1, fine_tune_weight_decay=1e-5, save_file_name=save_file_name, nn_save_name='best')
evaluator.reset(ncf_initializer, network_save_file = save_file_name)
evaluator.plot_top_N_recall_percentiles(10)
evaluator.plot_log_user_percentile_positional_error()
evaluator.compute_mean_positional_error()
evaluator.compute_top_N_recall(10)
evaluator.plot_roc_curve()
evaluator.compute_auc_roc()
evaluator.plot_user_rank_roc_curve()
evaluator.compute_user_rank_auc_roc()
evaluator.plot_top_N_hit_percentage_percentiles(10)
evaluator.compute_top_N_hit_percentage(10)
evaluator.plot_top_N_hit_percentage_percentiles(50)
evaluator.compute_top_N_hit_percentage(50)
evaluator.compute_top_N_recall(50)
evaluator.plot_top_N_recall_percentiles(50)
evaluator.save_metrics(save_file_name, overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

Preparing model.
Known Game Embeddings:  []
Total Learnable Parameters: 4916656


Training: 100%|██████████| 40/40 [34:32<00:00, 51.81s/it]


Done preparing model.
Getting predictions.


User Predictions: 100%|██████████| 10000/10000 [13:31<00:00, 12.33it/s]


Done getting predictions.
Appending dataframe information.
Ranking top N.
Done getting edge results.
neural_collborative_filtering_ncf
{'auc_roc': 0.48260195630632874,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 21.178240158634715,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage': 0.0020654911838790934,
 'top_10_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall': 0.0003354579000335458,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_hit_percentage': 0.0023576351106274936,
 'top_50_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_recall': 0.0019145646001914565,
 'top_50_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'user_rank_auc_roc': 0.5048937153127335,
 'user_rank_roc_figure': <Figure size 640x480 with 1 Axes>}

