In [1]:
import sys
import cython
sys.path.append('../RecSysRep/')

In [2]:
import Basics.Load as ld

URM_all, ICM_genre_all, ICM_subgenre_all, ICM_channel_all, ICM_event_all = ld.getCOOs()
# URM_train, URM_val = ld.getSplit(URM_train_val, 5678, 0.8)

In [3]:
import os

output_folder_path = "result_experiments/FunkSVD"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)
    
n_cases = 10  # using 10 as an example
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"   
cutoff_to_optimize = 10

# SLIM Model

In [4]:
from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

URM_train_validation, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.8)
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train_validation, train_percentage = 0.8)



In [5]:
evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 13640 ( 0.1%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 13643 ( 0.1%) Users that have less than 1 test interactions


In [6]:
%load_ext Cython

In [10]:
from skopt.space import Real, Integer, Categorical

hyperparameters_range_dictionary = {
    "epochs": Categorical([500]),
    "num_factors": Integer(1, 200),
    "sgd_mode": Categorical(["sgd", "adagrad", "adam"]),
    "batch_size": Categorical([1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]),
    "item_reg": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'),
    "user_reg": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'),
    "learning_rate": Real(low = 1e-4, high = 1e-1, prior = 'log-uniform'),
}

earlystopping_keywargs = {"validation_every_n": 5,
                          "stop_on_validation": True,
                          "evaluator_object": evaluator_validation,
                          "lower_validations_allowed": 5,
                          "validation_metric": metric_to_optimize,
                          }

In [11]:
from Recommenders.MatrixFactorization.Cython.MatrixFactorization_Cython import MatrixFactorization_FunkSVD_Cython
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt

recommender_class = MatrixFactorization_FunkSVD_Cython

hyperparameterSearch = SearchBayesianSkopt(recommender_class,
                                         evaluator_validation=evaluator_validation,
                                         evaluator_test=evaluator_test)

In [12]:
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs
  
recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_train],     # For a CBF model simply put [URM_train, ICM_train]
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = earlystopping_keywargs     # Additiona hyperparameters for the fit function
)

In [13]:
recommender_input_args_last_test = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_train_validation],     # For a CBF model simply put [URM_train_validation, ICM_train]
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = earlystopping_keywargs     # Additiona hyperparameters for the fit function
)

In [None]:
hyperparameterSearch.search(recommender_input_args,
                       recommender_input_args_last_test = recommender_input_args_last_test,
                       hyperparameter_search_space = hyperparameters_range_dictionary,
                       n_cases = n_cases,
                       n_random_starts = n_random_starts,
                       save_model = "last",
                       output_folder_path = output_folder_path, # Where to save the results
                       output_file_name_root = recommender_class.RECOMMENDER_NAME, # How to call the files
                       metric_to_optimize = metric_to_optimize,
                       cutoff_to_optimize = cutoff_to_optimize,
                      )

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'epochs': 500, 'num_factors': 30, 'sgd_mode': 'adam', 'batch_size': 4, 'item_reg': 0.00047145504831292573, 'user_reg': 0.00027979309553109516, 'learning_rate': 0.0007989901095792431}
MatrixFactorization_FunkSVD_Cython_Recommender: URM Detected 2 ( 0.0%) items with no interactions.
FUNK_SVD: Processed 3382828 (100.0%) in 13.31 sec. MSE loss 1.18E-03. Sample per second: 254144
FUNK_SVD: Epoch 1 of 500. Elapsed time 12.94 sec
FUNK_SVD: Processed 3382828 (100.0%) in 10.16 sec. MSE loss 7.09E-05. Sample per second: 332865
FUNK_SVD: Epoch 2 of 500. Elapsed time 22.79 sec
FUNK_SVD: Processed 3382828 (100.0%) in 10.31 sec. MSE loss 2.92E-05. Sample per second: 328006
FUNK_SVD: Epoch 3 of 500. Elapsed time 32.94 sec
FUNK_SVD: Processed 3382828 (100.0%) in 10.47 sec. MSE loss 1.56E-05. Sample per second: 323082
FUNK_SVD: Epoch 4 of 500. Elapsed time 43.10 sec
FUNK_SVD: Processed 3382828 (100.0%) i

FUNK_SVD: Epoch 26 of 500. Elapsed time 5.11 min
FUNK_SVD: Processed 3382828 (100.0%) in 10.44 sec. MSE loss 1.14E-06. Sample per second: 324090
FUNK_SVD: Epoch 27 of 500. Elapsed time 5.28 min
FUNK_SVD: Processed 3382828 (100.0%) in 10.65 sec. MSE loss 1.23E-06. Sample per second: 317607
FUNK_SVD: Epoch 28 of 500. Elapsed time 5.45 min
FUNK_SVD: Processed 3382828 (100.0%) in 11.00 sec. MSE loss 1.27E-06. Sample per second: 307612
FUNK_SVD: Epoch 29 of 500. Elapsed time 5.63 min
FUNK_SVD: Processed 3382828 (100.0%) in 11.46 sec. MSE loss 1.30E-06. Sample per second: 295197
FUNK_SVD: Validation begins...
EvaluatorHoldout: Processed 13640 (100.0%) in 7.20 sec. Users per second: 1896
FUNK_SVD: CUTOFF: 10 - PRECISION: 0.0001540, PRECISION_RECALL_MIN_DEN: 0.0001540, RECALL: 0.0000240, MAP: 0.0000274, MAP_MIN_DEN: 0.0000274, MRR: 0.0002738, NDCG: 0.0001228, F1: 0.0000416, HIT_RATE: 0.0015396, ARHR_ALL_HITS: 0.0002738, NOVELTY: 0.0107889, AVERAGE_POPULARITY: 0.0010871, DIVERSITY_MEAN_INTER_LI

FUNK_SVD: Processed 3383296 (100.0%) in 40.34 sec. MSE loss 4.04E-01. Sample per second: 83866
FUNK_SVD: Epoch 16 of 500. Elapsed time 11.21 min
FUNK_SVD: Processed 3383296 (100.0%) in 40.57 sec. MSE loss 3.90E-01. Sample per second: 83389
FUNK_SVD: Epoch 17 of 500. Elapsed time 11.88 min
FUNK_SVD: Processed 3383296 (100.0%) in 40.32 sec. MSE loss 3.78E-01. Sample per second: 83912
FUNK_SVD: Epoch 18 of 500. Elapsed time 12.54 min
FUNK_SVD: Processed 3383296 (100.0%) in 40.30 sec. MSE loss 3.66E-01. Sample per second: 83960
FUNK_SVD: Epoch 19 of 500. Elapsed time 13.20 min
FUNK_SVD: Processed 3383296 (100.0%) in 40.21 sec. MSE loss 3.55E-01. Sample per second: 84140
FUNK_SVD: Validation begins...
EvaluatorHoldout: Processed 13640 (100.0%) in 7.76 sec. Users per second: 1758
FUNK_SVD: CUTOFF: 10 - PRECISION: 0.0185411, PRECISION_RECALL_MIN_DEN: 0.0186186, RECALL: 0.0033492, MAP: 0.0061898, MAP_MIN_DEN: 0.0062367, MRR: 0.0558891, NDCG: 0.0193875, F1: 0.0056736, HIT_RATE: 0.1673021, ARHR_

In [5]:
from Recommenders.DataIO import DataIO

data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(recommender_class.RECOMMENDER_NAME + "_metadata.zip")

search_metadata.keys()

dict_keys(['exception_list', 'result_on_test_df', 'result_on_last', 'time_on_train_total', 'hyperparameters_df', 'time_df', 'algorithm_name_recommender', 'metric_to_optimize', 'algorithm_name_search', 'hyperparameters_best_index', 'cutoff_to_optimize', 'time_on_validation_total', 'result_on_validation_best', 'hyperparameters_best', 'time_on_last_df', 'time_on_validation_avg', 'time_on_test_avg', 'time_on_test_total', 'result_on_test_best', 'result_on_validation_df', 'time_on_train_avg'])

In [6]:
hyperparameters_df = search_metadata["hyperparameters_best"]
hyperparameters_df

{'epochs': 10,
 'num_factors': 22,
 'sgd_mode': 'adagrad',
 'batch_size': 256,
 'item_reg': 0.00017575118842880344,
 'user_reg': 3.0144468387522633e-05,
 'learning_rate': 0.0010590667750450561}

In [8]:
result_on_validation_df = search_metadata["result_on_test_df"]
result_on_validation_df

Unnamed: 0_level_0,Unnamed: 1_level_0,PRECISION,PRECISION_RECALL_MIN_DEN,RECALL,MAP,MAP_MIN_DEN,MRR,NDCG,F1,HIT_RATE,ARHR_ALL_HITS,...,COVERAGE_ITEM_CORRECT,COVERAGE_USER,COVERAGE_USER_CORRECT,DIVERSITY_GINI,SHANNON_ENTROPY,RATIO_DIVERSITY_HERFINDAHL,RATIO_DIVERSITY_GINI,RATIO_SHANNON_ENTROPY,RATIO_AVERAGE_POPULARITY,RATIO_NOVELTY
Unnamed: 0_level_1,cutoff,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,10,0.16094,0.161367,0.024822,0.073238,0.073412,0.350664,0.166972,0.04301,0.768086,0.503847,...,0.019602,0.999487,0.767692,0.006387,7.253745,0.99074,0.025771,0.585432,2.906355,0.03165
1,10,,,,,,,,,,,...,,,,,,,,,,
2,10,,,,,,,,,,,...,,,,,,,,,,
3,10,,,,,,,,,,,...,,,,,,,,,,
4,10,,,,,,,,,,,...,,,,,,,,,,
5,10,,,,,,,,,,,...,,,,,,,,,,
6,10,,,,,,,,,,,...,,,,,,,,,,
7,10,,,,,,,,,,,...,,,,,,,,,,
8,10,,,,,,,,,,,...,,,,,,,,,,
9,10,,,,,,,,,,,...,,,,,,,,,,


In [18]:
from Recommenders.MatrixFactorization.Cython.MatrixFactorization_Cython import MatrixFactorization_FunkSVD_Cython
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt

recommender_object = MatrixFactorization_FunkSVD_Cython(URM_all.tocsr())

recommender_object.load_model(output_folder_path, 
                              file_name = recommender_object.RECOMMENDER_NAME + "_best_model_last.zip" )

MatrixFactorization_FunkSVD_Cython_Recommender: Loading model from file 'result_experiments/FunkSVDMatrixFactorization_FunkSVD_Cython_Recommender_best_model_last.zip'
MatrixFactorization_FunkSVD_Cython_Recommender: Loading complete


In [19]:
recommender = recommender_object # <-----
K = 10

recommender.fit(epochs=10, batch_size = 256, num_factors=22,
            learning_rate = 0.0010590667750450561,
            use_bias = True,
            use_embeddings = True,
            sgd_mode='adagrad',
            negative_interactions_quota = 0.0,
            dropout_quota = None,
            init_mean = 0.0, init_std_dev = 0.1,
            user_reg = 3.0144468387522633e-05, item_reg = 0.00017575118842880344, bias_reg = 0.0, positive_reg = 0.0, negative_reg = 0.0, random_seed = None)



FUNK_SVD: Processed 5285888 (100.0%) in 11.32 sec. MSE loss 5.53E-01. Sample per second: 467005
FUNK_SVD: Epoch 1 of 10. Elapsed time 10.47 sec
FUNK_SVD: Processed 5285888 (100.0%) in 10.74 sec. MSE loss 2.95E-01. Sample per second: 492381
FUNK_SVD: Epoch 2 of 10. Elapsed time 20.89 sec
FUNK_SVD: Processed 5285888 (100.0%) in 11.33 sec. MSE loss 1.87E-01. Sample per second: 466534
FUNK_SVD: Epoch 3 of 10. Elapsed time 31.48 sec
FUNK_SVD: Processed 5285888 (100.0%) in 10.84 sec. MSE loss 1.25E-01. Sample per second: 487617
FUNK_SVD: Epoch 4 of 10. Elapsed time 41.99 sec
FUNK_SVD: Processed 5285888 (100.0%) in 11.07 sec. MSE loss 8.73E-02. Sample per second: 477525
FUNK_SVD: Epoch 5 of 10. Elapsed time 52.22 sec
FUNK_SVD: Processed 5285888 (100.0%) in 10.12 sec. MSE loss 6.25E-02. Sample per second: 522184
FUNK_SVD: Epoch 6 of 10. Elapsed time 1.04 min
FUNK_SVD: Processed 5285888 (100.0%) in 10.08 sec. MSE loss 4.59E-02. Sample per second: 524516
FUNK_SVD: Epoch 7 of 10. Elapsed time 1.2

In [20]:
import pandas as pd

user_test_path = '../data/data_target_users_test.csv'
user_test_dataframe = pd.read_csv(filepath_or_buffer=user_test_path,
								sep=",",
								dtype={0:int})

subm_set = user_test_dataframe.to_numpy().T[0]


subm_res = {"user_id":[], "item_list":[]}

for user_id in subm_set:
	subm_res["user_id"].append(user_id)
	res = recommender.recommend(user_id, K)
	res = ' '.join(map(str, res))
	if user_id < 3:
		print(user_id)
		print(res)
	subm_res["item_list"].append(res)


# print(subm_res)

submission = pd.DataFrame.from_dict(subm_res)
# submission

from datetime import datetime
now = datetime.now() # current date and time


submission.to_csv('../subs/submission {:%Y_%m_%d %H_%M_%S}.csv'.format(now), index=False)

0
8635 10370 1259 13914 15778 17135 1619 13229 12457 13178
1
8207 6011 14917 2665 16365 14400 9776 15119 3829 12943
2
16364 12492 16365 4582 640 5925 16846 13971 5273 1259
