In [1]:
import sys
import os
sys.path.append(os.path.join(os.path.abspath(''), '../'))

from evaluation.evaluation_harness import ColdFineTuneEvaluator, include_learnable_users, include_all_users
from pprint import pprint

from dataset.data_loader import DataLoader, playtime_forever_users_games_edge_scoring_function, LinearNormalizer, GaussianNormalizer, PercentileNormalizer, never_remove_edge, remove_zero_playtime_edge
from models.random_model import RandomModel
from models.common_neighbors_model import CommonNeighbors
from models.popularity_model import GamePopularityModel
from models.ncf_model import NCFModel

In [2]:
def get_evaluator_metrics(evaluator):
    evaluator.plot_top_N_recall_percentiles(10)
    evaluator.plot_log_user_percentile_positional_error()
    evaluator.compute_mean_positional_error()
    evaluator.compute_top_N_recall(10)
    evaluator.plot_roc_curve()
    evaluator.compute_auc_roc()
    evaluator.plot_user_rank_roc_curve()
    evaluator.compute_user_rank_auc_roc()
    evaluator.plot_top_N_hit_percentage_percentiles(10)
    evaluator.compute_top_N_hit_percentage(10)
    evaluator.plot_top_N_hit_percentage_percentiles(50)
    evaluator.compute_top_N_hit_percentage(50)
    evaluator.compute_top_N_recall(50)
    evaluator.plot_top_N_recall_percentiles(50)

In [3]:
data_loader = DataLoader(cache_local_dataset=True, users_games_edge_scoring_function = playtime_forever_users_games_edge_scoring_function, user_game_edge_embeddings = ['playtime_forever'], users_games_edge_score_normalizers = [GaussianNormalizer(1.0, 1.0)], interactions_score_normalizers = [GaussianNormalizer(0.0, 1.0)])
data_loader.load_random_user_train_tune_test_split(train_user_percentage=0.8, test_user_percentage=0.2, fine_tune_edge_percentage=0.8, test_edge_percentage=0.2, seed=0)
data_loader.save_data_loader_parameters('test_evaluation_cold_fine_tune_data_loader', overwrite=True)
evaluator = ColdFineTuneEvaluator(data_loader, top_N_games_to_eval = 500, num_users_to_eval=2000, user_eval_include_function=include_all_users, seed=842, debug=True)

In [4]:
random_model_initializer = lambda: RandomModel()
evaluator.reset(random_model_initializer, network_save_file = 'evaluation_cold_fine_tune_test_random_model')
get_evaluator_metrics(evaluator)
evaluator.save_metrics('evaluation_cold_fine_tune_test_random_model', overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

Loading model: evaluation_cold_fine_tune_test_random_model
Doen loading model. evaluation_cold_fine_tune_test_random_model
Getting predictions.


User Predictions:   0%|          | 0/10000 [00:00<?, ?it/s]

User Predictions: 100%|██████████| 10000/10000 [01:09<00:00, 144.21it/s]


Done getting predictions.
Appending dataframe information.
Ranking top N.
Done getting edge results.
random
{'auc_roc': 0.45207676373583294,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 22.313775691198884,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage': 0.0021662468513853903,
 'top_10_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage_variance': 0.0003026274061862229,
 'top_10_recall': nan,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall_variance': nan,
 'top_50_hit_percentage': 0.0022367758186397986,
 'top_50_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_hit_percentage_variance': 0.00016507406390252522,
 'top_50_recall': nan,
 'top_50_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_recall_variance': nan,
 'user_rank_auc_roc': 0.50737975044

In [5]:
popularity_model_initializer = lambda: GamePopularityModel()
evaluator.reset(popularity_model_initializer, network_save_file = 'evaluation_cold_fine_tune_test_popularity_model')
get_evaluator_metrics(evaluator)
evaluator.save_metrics('evaluation_cold_fine_tune_test_popularity_model', overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

Loading model: evaluation_cold_fine_tune_test_popularity_model
Doen loading model. evaluation_cold_fine_tune_test_popularity_model
Getting predictions.


User Predictions:   0%|          | 0/10000 [00:00<?, ?it/s]

User Predictions: 100%|██████████| 10000/10000 [00:49<00:00, 202.05it/s]


Done getting predictions.
Appending dataframe information.
Ranking top N.
Done getting edge results.
game_popularity
{'auc_roc': 0.5434632565331029,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 19.448392931882402,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage': 0.1930478589420655,
 'top_10_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage_variance': 0.03319263461806597,
 'top_10_recall': nan,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall_variance': nan,
 'top_50_hit_percentage': 0.0975577810466077,
 'top_50_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_hit_percentage_variance': 0.011203037338410287,
 'top_50_recall': nan,
 'top_50_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_recall_variance': nan,
 'user_rank_auc_roc': 0.6602946798030

In [6]:
common_neighbors_initializer = lambda: CommonNeighbors()
evaluator.reset(common_neighbors_initializer, network_save_file = 'evaluation_cold_fine_tune_test_common_neighbors')
get_evaluator_metrics(evaluator)
evaluator.save_metrics('evaluation_cold_fine_tune_test_common_neighbors', overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

Loading model: evaluation_cold_fine_tune_test_common_neighbors
Doen loading model. evaluation_cold_fine_tune_test_common_neighbors
Getting predictions.


User Predictions: 100%|██████████| 10000/10000 [28:39<00:00,  5.81it/s]


Done getting predictions.
Appending dataframe information.
Ranking top N.
Done getting edge results.
common_neighbors
{'auc_roc': 0.8103142364776144,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 17.84838093007182,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage': 0.18401682439537329,
 'top_10_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage_variance': 0.03152327223388177,
 'top_10_recall': nan,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall_variance': nan,
 'top_50_hit_percentage': 0.10077812828601472,
 'top_50_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_hit_percentage_variance': 0.013212992970555096,
 'top_50_recall': nan,
 'top_50_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_recall_variance': nan,
 'user_rank_auc_roc': 0.65313359725

In [5]:
save_file_name = 'evaluation_cold_fine_tune_test_cf'
cf_initializer = lambda: NCFModel(num_epochs = 40, embedding_size = 50, batch_percent = 0.1, learning_rate = 1e-2, weight_decay=1e-6, mlp_hidden_layer_sizes = [], seed=0, model_type='cf', fine_tune_num_epochs=1, fine_tune_learning_rate=1e-1, fine_tune_weight_decay=1e-5, save_file_name=save_file_name, nn_save_name='best')
evaluator.reset(cf_initializer, network_save_file = save_file_name)
get_evaluator_metrics(evaluator)
evaluator.save_metrics(f'{save_file_name}_slow_fine_tune', overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

Loading model: evaluation_cold_fine_tune_test_cf
Doen loading model. evaluation_cold_fine_tune_test_cf
Getting predictions.


User Predictions: 100%|██████████| 2000/2000 [1:07:07<00:00,  2.01s/it]


Done getting predictions.
Appending dataframe information.
Ranking top N.
Done getting edge results.
neural_collborative_filtering_cf
{'auc_roc': 0.23020168331572918,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 18.927944128694964,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage': 0.011886304909560724,
 'top_10_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage_variance': 0.0012026957803463849,
 'top_10_recall': nan,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall_variance': nan,
 'top_50_hit_percentage': 0.0049095607235142115,
 'top_50_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_hit_percentage_variance': 0.00017332119134147258,
 'top_50_recall': nan,
 'top_50_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_recall_variance': nan,
 'user_ran

In [4]:
save_file_name = 'evaluation_cold_fine_tune_test_gcf'
gcf_initializer = lambda: NCFModel(num_epochs = 40, embedding_size = 50, batch_percent = 0.1, learning_rate = 1e-2, weight_decay=1e-6, mlp_hidden_layer_sizes = [], seed=0, model_type='gcf', fine_tune_num_epochs=1, fine_tune_learning_rate=1e-1, fine_tune_weight_decay=1e-5, save_file_name=save_file_name, nn_save_name='best')
evaluator.reset(gcf_initializer, network_save_file = save_file_name)
get_evaluator_metrics(evaluator)
evaluator.save_metrics(f'{save_file_name}_slow_fine_tune', overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

Loading model: evaluation_cold_fine_tune_test_gcf
Doen loading model. evaluation_cold_fine_tune_test_gcf
Getting predictions.


  from .autonotebook import tqdm as notebook_tqdm
User Predictions: 100%|██████████| 2000/2000 [1:47:55<00:00,  3.24s/it]  


Done getting predictions.
Appending dataframe information.
Ranking top N.
Done getting edge results.
neural_collborative_filtering_gcf
{'auc_roc': 0.5601055859791808,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 18.653088116758376,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage': 0.026098191214470285,
 'top_10_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage_variance': 0.0029113679729486754,
 'top_10_recall': nan,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall_variance': nan,
 'top_50_hit_percentage': 0.011007751937984495,
 'top_50_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_hit_percentage_variance': 0.00046696722503536647,
 'top_50_recall': nan,
 'top_50_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_recall_variance': nan,
 'user_rank

In [6]:
save_file_name = 'evaluation_cold_fine_tune_test_mlp'
mlp_initializer = lambda: NCFModel(num_epochs = 40, embedding_size = 50, batch_percent = 0.1, learning_rate = 1e-2, weight_decay=1e-6, mlp_hidden_layer_sizes = [128, 128, 128], seed=0, model_type='mlp', fine_tune_num_epochs=1, fine_tune_learning_rate=1e-1, fine_tune_weight_decay=1e-5, save_file_name=save_file_name, nn_save_name='best')
evaluator.reset(mlp_initializer, network_save_file = save_file_name)
get_evaluator_metrics(evaluator)
evaluator.save_metrics(f'{save_file_name}_slow_fine_tune', overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

Loading model: evaluation_cold_fine_tune_test_mlp
Doen loading model. evaluation_cold_fine_tune_test_mlp
Getting predictions.


User Predictions: 100%|██████████| 2000/2000 [4:01:51<00:00,  7.26s/it]  


Done getting predictions.
Appending dataframe information.
Ranking top N.
Done getting edge results.
neural_collborative_filtering_mlp
{'auc_roc': 0.6409064510317832,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 17.14653920050041,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage': 0.01937984496124031,
 'top_10_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage_variance': 0.0021831252592160746,
 'top_10_recall': nan,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall_variance': nan,
 'top_50_hit_percentage': 0.013229974160206718,
 'top_50_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_hit_percentage_variance': 0.0005071645364094822,
 'top_50_recall': nan,
 'top_50_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_recall_variance': nan,
 'user_rank_au

In [7]:
save_file_name = 'evaluation_cold_fine_tune_test_ncf'
ncf_initializer = lambda: NCFModel(num_epochs = 40, embedding_size = 25, batch_percent = 0.1, learning_rate = 1e-2, weight_decay=1e-6, mlp_hidden_layer_sizes = [128, 128, 128], seed=0, model_type='ncf', fine_tune_num_epochs=1, fine_tune_learning_rate=1e-1, fine_tune_weight_decay=1e-5, save_file_name=save_file_name, nn_save_name='best')
evaluator.reset(ncf_initializer, network_save_file = save_file_name)
get_evaluator_metrics(evaluator)
evaluator.save_metrics(f'{save_file_name}_slow_fine_tune', overwrite=True)
print(evaluator.model.name())
pprint(evaluator.metrics)
print()

Loading model: evaluation_cold_fine_tune_test_ncf
Doen loading model. evaluation_cold_fine_tune_test_ncf
Getting predictions.


User Predictions: 100%|██████████| 2000/2000 [4:33:13<00:00,  8.20s/it]  


Done getting predictions.
Appending dataframe information.
Ranking top N.
Done getting edge results.
neural_collborative_filtering_ncf
{'auc_roc': 0.6190740017852419,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 16.724733370460733,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage': 0.019121447028423774,
 'top_10_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage_variance': 0.0018053794055044243,
 'top_10_recall': nan,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall_variance': nan,
 'top_50_hit_percentage': 0.00950904392764858,
 'top_50_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_hit_percentage_variance': 0.00031062772842589567,
 'top_50_recall': nan,
 'top_50_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_recall_variance': nan,
 'user_rank_

In [4]:
data_loader_embeddings = DataLoader(cache_local_dataset=True, users_games_edge_scoring_function = playtime_forever_users_games_edge_scoring_function, user_game_edge_embeddings = ['playtime_forever'], users_games_edge_score_normalizers = [GaussianNormalizer(1.0, 1.0)], interactions_score_normalizers = [GaussianNormalizer(0.0, 1.0)], game_embeddings=['numReviews', 'avgReviewScore', 'price', 'numFollowers'])
data_loader_embeddings.load_random_user_train_tune_test_split(train_user_percentage=0.8, test_user_percentage=0.2, fine_tune_edge_percentage=0.8, test_edge_percentage=0.2, seed=0)
data_loader_embeddings.save_data_loader_parameters('test_evaluation_cold_fine_tune_data_loader_embed_all_except_tags_genres', overwrite=True)
evaluator_embeddings = ColdFineTuneEvaluator(data_loader_embeddings, top_N_games_to_eval = 500, num_users_to_eval=2000, user_eval_include_function=include_all_users, seed=842, debug=True)

In [9]:
save_file_name = 'evaluation_cold_fine_tune_test_cf_embed_all_except_tags_genres'
cf_initializer = lambda: NCFModel(num_epochs = 40, embedding_size = 48, batch_percent = 0.1, learning_rate = 1e-2, weight_decay=1e-6, mlp_hidden_layer_sizes = [], seed=0, model_type='cf', fine_tune_num_epochs=1, fine_tune_learning_rate=1e-1, fine_tune_weight_decay=1e-5, save_file_name=save_file_name, nn_save_name='best')
evaluator_embeddings.reset(cf_initializer, network_save_file = save_file_name)
get_evaluator_metrics(evaluator_embeddings)
evaluator_embeddings.save_metrics(f'{save_file_name}_slow_fine_tune', overwrite=True)
print(evaluator_embeddings.model.name())
pprint(evaluator_embeddings.metrics)
print()

Loading model: evaluation_cold_fine_tune_test_cf_embed_all_except_tags_genres
Doen loading model. evaluation_cold_fine_tune_test_cf_embed_all_except_tags_genres
Getting predictions.


User Predictions: 100%|██████████| 10000/10000 [6:11:41<00:00,  2.23s/it]  


Done getting predictions.
Appending dataframe information.
Ranking top N.
Done getting edge results.
neural_collborative_filtering_cf
{'auc_roc': 0.20411834562316492,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 20.136622416828143,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage': 0.007153652392947104,
 'top_10_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage_variance': 0.0007851368512369114,
 'top_10_recall': nan,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall_variance': nan,
 'top_50_hit_percentage': 0.003486146095717884,
 'top_50_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_hit_percentage_variance': 0.00017364198582233418,
 'top_50_recall': nan,
 'top_50_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_recall_variance': nan,
 'user_rank

In [10]:
save_file_name = 'evaluation_cold_fine_tune_test_gcf_embed_all_except_tags_genres'
gcf_initializer = lambda: NCFModel(num_epochs = 40, embedding_size = 48, batch_percent = 0.1, learning_rate = 1e-2, weight_decay=1e-6, mlp_hidden_layer_sizes = [], seed=0, model_type='gcf', fine_tune_num_epochs=1, fine_tune_learning_rate=1e-1, fine_tune_weight_decay=1e-5, save_file_name=save_file_name, nn_save_name='best')
evaluator_embeddings.reset(gcf_initializer, network_save_file = save_file_name)
get_evaluator_metrics(evaluator_embeddings)
evaluator_embeddings.save_metrics(f'{save_file_name}_slow_fine_tune', overwrite=True)
print(evaluator_embeddings.model.name())
pprint(evaluator_embeddings.metrics)
print()

Loading model: evaluation_cold_fine_tune_test_gcf_embed_all_except_tags_genres
Doen loading model. evaluation_cold_fine_tune_test_gcf_embed_all_except_tags_genres
Getting predictions.


User Predictions: 100%|██████████| 10000/10000 [9:59:19<00:00,  3.60s/it]  


Done getting predictions.
Appending dataframe information.
Ranking top N.
Done getting edge results.
neural_collborative_filtering_gcf
{'auc_roc': 0.5581116157635702,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 17.167554327220834,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage': 0.033350125944584384,
 'top_10_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage_variance': 0.0038249895019742547,
 'top_10_recall': nan,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall_variance': nan,
 'top_50_hit_percentage': 0.015012594458438288,
 'top_50_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_hit_percentage_variance': 0.0007813805102008473,
 'top_50_recall': nan,
 'top_50_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_recall_variance': nan,
 'user_rank_

In [5]:
save_file_name = 'evaluation_cold_fine_tune_test_mlp_embed_all_except_tags_genres'
mlp_initializer = lambda: NCFModel(num_epochs = 40, embedding_size = 50, batch_percent = 0.1, learning_rate = 1e-2, weight_decay=1e-6, mlp_hidden_layer_sizes = [128, 128, 128], seed=0, model_type='mlp', fine_tune_num_epochs=1, fine_tune_learning_rate=1e-1, fine_tune_weight_decay=1e-5, save_file_name=save_file_name, nn_save_name='best')
evaluator_embeddings.reset(mlp_initializer, network_save_file = save_file_name)
get_evaluator_metrics(evaluator_embeddings)
evaluator_embeddings.save_metrics(f'{save_file_name}_slow_fine_tune', overwrite=True)
print(evaluator_embeddings.model.name())
pprint(evaluator_embeddings.metrics)
print()

Loading model: evaluation_cold_fine_tune_test_mlp_embed_all_except_tags_genres
Doen loading model. evaluation_cold_fine_tune_test_mlp_embed_all_except_tags_genres
Getting predictions.


  from .autonotebook import tqdm as notebook_tqdm
User Predictions: 100%|██████████| 2000/2000 [4:00:12<00:00,  7.21s/it]  


Done getting predictions.
Appending dataframe information.
Ranking top N.
Done getting edge results.
neural_collborative_filtering_mlp
{'auc_roc': 0.6671853385527783,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 16.74877080371823,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage': 0.027906976744186046,
 'top_10_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage_variance': 0.002839505430764515,
 'top_10_recall': nan,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall_variance': nan,
 'top_50_hit_percentage': 0.014108527131782947,
 'top_50_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_hit_percentage_variance': 0.0005730721015568332,
 'top_50_recall': nan,
 'top_50_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_recall_variance': nan,
 'user_rank_au

In [6]:
save_file_name = 'evaluation_cold_fine_tune_test_ncf_embed_all_except_tags_genres'
ncf_initializer = lambda: NCFModel(num_epochs = 40, embedding_size = 24, batch_percent = 0.1, learning_rate = 1e-2, weight_decay=1e-6, mlp_hidden_layer_sizes = [128, 128, 128], seed=0, model_type='ncf', fine_tune_num_epochs=1, fine_tune_learning_rate=1e-1, fine_tune_weight_decay=1e-5, save_file_name=save_file_name, nn_save_name='best')
evaluator_embeddings.reset(ncf_initializer, network_save_file = save_file_name)
get_evaluator_metrics(evaluator_embeddings)
evaluator_embeddings.save_metrics(f'{save_file_name}_slow_fine_tune', overwrite=True)
print(evaluator_embeddings.model.name())
pprint(evaluator_embeddings.metrics)
print()

Loading model: evaluation_cold_fine_tune_test_ncf_embed_all_except_tags_genres
Doen loading model. evaluation_cold_fine_tune_test_ncf_embed_all_except_tags_genres
Getting predictions.


User Predictions: 100%|██████████| 2000/2000 [4:41:12<00:00,  8.44s/it]  


Done getting predictions.
Appending dataframe information.
Ranking top N.
Done getting edge results.
neural_collborative_filtering_ncf
{'auc_roc': 0.671017974411372,
 'log_positional_error_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'mean_positional_error': 15.637717927782791,
 'roc_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage': 0.026098191214470285,
 'top_10_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_hit_percentage_variance': 0.0027045961972960515,
 'top_10_recall': nan,
 'top_10_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_10_recall_variance': nan,
 'top_50_hit_percentage': 0.014056847545219638,
 'top_50_hit_percentage_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_hit_percentage_variance': 0.00043912023710270925,
 'top_50_recall': nan,
 'top_50_recall_user_percentiles_figure': <Figure size 640x480 with 1 Axes>,
 'top_50_recall_variance': nan,
 'user_rank_