In [1]:
import os

os.chdir('/Users/gabriele/PycharmProjects/RecSys')

##BEST MODEL UP TO TODAY

In [2]:
from src.Recommenders.SLIM.SLIMElasticNetRecommender import MultiThreadSLIM_SLIMElasticNetRecommender
from src.Recommenders.MatrixFactorization.IALSRecommenderLinear import IALSRecommender
from src.Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender
from Utils.utils import create_URM, create_ICM, combine_matrices
from src.Hybrids.MergingModelsByScores import MergeThreeModelsByScores
from src.Evaluation.Evaluator import EvaluatorHoldout
from src.Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from skopt.space import Real
from src.HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs

##CREATE URM,ICM, ICM STACKED COMPLETELY TO TRAIN THE SINGLE RECOMMENDERS, AND STACKED PARTIALLY TO TUNE THE HYBRID
URM = create_URM()
ICM = create_ICM()

URM_train_validation, URM_test = split_train_in_two_percentage_global_sample(URM, train_percentage=0.85)
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train_validation, train_percentage=0.85)

evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])
evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
combined_matrices = combine_matrices(ICM=ICM, URM=URM_train)
ICM_combined = combine_matrices(ICM=ICM,URM=URM)

### RP3Beta Recommender
RP3Beta_recommender = RP3betaRecommender(URM_train=ICM_combined)
RP3Beta_recommender.fit(topK=181, alpha=0.5709402717259106, beta=0.3566066452521787, implicit=True, normalize_similarity=True)

## IALS Recommender
IALS_recommender = IALSRecommender(URM_train=ICM_combined)
IALS_recommender.fit(epochs=50, num_factors=40, alpha=0.7917156202136513, reg=1.0)

##SLIMELASTICNET Recommender
SLIME_recommender = MultiThreadSLIM_SLIMElasticNetRecommender(URM_train=ICM_combined)
SLIME_recommender.fit(topK=420, l1_ratio=0.06061216785568925, alpha=0.00554982938879437, workers = 10)

from src.HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt

hyperparameters_range_dictionary = {
    "alpha": Real(0.0, 1.0),
    "beta": Real(0.0, 1.0),
    "gamma": Real(0.0, 1.0)
}
recommenders = [RP3Beta_recommender, IALS_recommender, SLIME_recommender]

recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS=[combined_matrices, recommenders],
    CONSTRUCTOR_KEYWORD_ARGS={},
    FIT_POSITIONAL_ARGS=[],
    FIT_KEYWORD_ARGS={}
)
recommender_input_args_last_test = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS=[URM_train_validation],
    CONSTRUCTOR_KEYWORD_ARGS={},
    FIT_POSITIONAL_ARGS=[],
    FIT_KEYWORD_ARGS={}
)

tuning_class = SearchBayesianSkopt(recommender_class=MergeThreeModelsByScores,
                                   evaluator_validation=evaluator_validation,
                                   evaluator_test=evaluator_test)

n_cases = 70
n_random_starts = n_cases * 0.3
output_folder_path = "logs/"

tuning_class.search(recommender_input_args=recommender_input_args,
                    hyperparameter_search_space=hyperparameters_range_dictionary,
                    metric_to_optimize="MAP",
                    cutoff_to_optimize=10,
                    n_cases=n_cases,
                    n_random_starts=n_random_starts,
                    output_folder_path=output_folder_path,
                    output_file_name_root=MergeThreeModelsByScores.RECOMMENDER_NAME,
                    save_model="best"
                    )



EvaluatorHoldout: Ignoring 13640 ( 0.1%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 13628 ( 0.2%) Users that have less than 1 test interactions
IALSRecommenderLinear: Epoch 1 of 50. Elapsed time 4.81 sec
IALSRecommenderLinear: Epoch 2 of 50. Elapsed time 9.72 sec
IALSRecommenderLinear: Epoch 3 of 50. Elapsed time 14.52 sec
IALSRecommenderLinear: Epoch 4 of 50. Elapsed time 19.41 sec
IALSRecommenderLinear: Epoch 5 of 50. Elapsed time 24.23 sec
IALSRecommenderLinear: Epoch 6 of 50. Elapsed time 29.13 sec
IALSRecommenderLinear: Epoch 7 of 50. Elapsed time 34.08 sec
IALSRecommenderLinear: Epoch 8 of 50. Elapsed time 38.98 sec
IALSRecommenderLinear: Epoch 9 of 50. Elapsed time 44.02 sec
IALSRecommenderLinear: Epoch 10 of 50. Elapsed time 49.07 sec
IALSRecommenderLinear: Epoch 11 of 50. Elapsed time 54.23 sec
IALSRecommenderLinear: Epoch 12 of 50. Elapsed time 59.52 sec
IALSRecommenderLinear: Epoch 13 of 50. Elapsed time 1.08 min
IALSRecommenderLinear: Epoch 14 

100%|████████████████████████████████████▉| 18032/18059 [11:17<00:00, 47.69it/s]

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'alpha': 0.2560066241646047, 'beta': 0.6883207296131294, 'gamma': 0.12426050049505158}


100%|████████████████████████████████████▉| 18056/18059 [11:30<00:00, 47.69it/s]

EvaluatorHoldout: Processed 13628 (100.0%) in 15.90 sec. Users per second: 857
SearchBayesianSkopt: New best config found. Config 0: {'alpha': 0.2560066241646047, 'beta': 0.6883207296131294, 'gamma': 0.12426050049505158} - results: PRECISION: 0.2366818, PRECISION_RECALL_MIN_DEN: 0.2410143, RECALL: 0.0678853, MAP: 0.1151143, MAP_MIN_DEN: 0.1169211, MRR: 0.4615044, NDCG: 0.2451573, F1: 0.1055085, HIT_RATE: 0.9125330, ARHR_ALL_HITS: 0.7266670, NOVELTY: 0.0055543, AVERAGE_POPULARITY: 0.5331289, DIVERSITY_MEAN_INTER_LIST: 0.9111257, DIVERSITY_HERFINDAHL: 0.9911059, COVERAGE_ITEM: 0.0603023, COVERAGE_ITEM_CORRECT: 0.0467911, COVERAGE_USER: 0.9983883, COVERAGE_USER_CORRECT: 0.9110623, DIVERSITY_GINI: 0.0112226, SHANNON_ENTROPY: 7.8944083, RATIO_DIVERSITY_HERFINDAHL: 0.9914788, RATIO_DIVERSITY_GINI: 0.0433335, RATIO_SHANNON_ENTROPY: 0.6344814, RATIO_AVERAGE_POPULARITY: 2.6745651, RATIO_NOVELTY: 0.0290592, 

EvaluatorHoldout: Processed 13640 (100.0%) in 15.35 sec. Users per second: 889
SearchBa

100%|████████████████████████████████████▉| 18056/18059 [13:11<00:00, 22.81it/s]


EvaluatorHoldout: Processed 13628 (100.0%) in 14.82 sec. Users per second: 920
SearchBayesianSkopt: Config 5 is suboptimal. Config: {'alpha': 0.9591375723058375, 'beta': 0.937272953577882, 'gamma': 0.16668445772644883} - results: PRECISION: 0.2319563, PRECISION_RECALL_MIN_DEN: 0.2365460, RECALL: 0.0666116, MAP: 0.1117690, MAP_MIN_DEN: 0.1136449, MRR: 0.4535462, NDCG: 0.2399024, F1: 0.1035006, HIT_RATE: 0.9079836, ARHR_ALL_HITS: 0.7094458, NOVELTY: 0.0055303, AVERAGE_POPULARITY: 0.5567556, DIVERSITY_MEAN_INTER_LIST: 0.8809192, DIVERSITY_HERFINDAHL: 0.9880855, COVERAGE_ITEM: 0.0625173, COVERAGE_ITEM_CORRECT: 0.0472894, COVERAGE_USER: 0.9983883, COVERAGE_USER_CORRECT: 0.9065201, DIVERSITY_GINI: 0.0095612, SHANNON_ENTROPY: 7.5680914, RATIO_DIVERSITY_HERFINDAHL: 0.9884572, RATIO_DIVERSITY_GINI: 0.0369186, RATIO_SHANNON_ENTROPY: 0.6082550, RATIO_AVERAGE_POPULARITY: 2.7930944, RATIO_NOVELTY: 0.0289338, 

Iteration No: 6 ended. Evaluation done at random point.
Time taken: 14.9323
Function valu

EvaluatorHoldout: Processed 13640 (100.0%) in 14.59 sec. Users per second: 935
SearchBayesianSkopt: Config evaluated with evaluator_test. Config: {'alpha': 0.3012224242038139, 'beta': 0.3026659768966358, 'gamma': 0.9863694858776371} - results:
CUTOFF: 10 - PRECISION: 0.2886290, PRECISION_RECALL_MIN_DEN: 0.2916898, RECALL: 0.0700466, MAP: 0.1488949, MAP_MIN_DEN: 0.1501573, MRR: 0.5039556, NDCG: 0.2935601, F1: 0.1127341, HIT_RATE: 0.9475073, ARHR_ALL_HITS: 0.8621199, NOVELTY: 0.0055982, AVERAGE_POPULARITY: 0.5209277, DIVERSITY_MEAN_INTER_LIST: 0.9151762, DIVERSITY_HERFINDAHL: 0.9915109, COVERAGE_ITEM: 0.0961847, COVERAGE_ITEM_CORRECT: 0.0639570, COVERAGE_USER: 0.9992674, COVERAGE_USER_CORRECT: 0.9468132, DIVERSITY_GINI: 0.0133687, SHANNON_ENTROPY: 8.0758927, RATIO_DIVERSITY_HERFINDAHL: 0.9918840, RATIO_DIVERSITY_GINI: 0.0516203, RATIO_SHANNON_ENTROPY: 0.6490675, RATIO_AVERAGE_POPULARITY: 2.6133554, RATIO_NOVELTY: 0.0292888, 


SearchBayesianSkopt: Saving model in logs/MergeThreeModelsByS

EvaluatorHoldout: Processed 13628 (100.0%) in 14.51 sec. Users per second: 939
SearchBayesianSkopt: Config 16 is suboptimal. Config: {'alpha': 0.8645695722986306, 'beta': 0.6205299508238166, 'gamma': 0.6386209222312726} - results: PRECISION: 0.2357793, PRECISION_RECALL_MIN_DEN: 0.2396959, RECALL: 0.0669469, MAP: 0.1127541, MAP_MIN_DEN: 0.1143284, MRR: 0.4470002, NDCG: 0.2411799, F1: 0.1042836, HIT_RATE: 0.9122395, ARHR_ALL_HITS: 0.7078506, NOVELTY: 0.0055448, AVERAGE_POPULARITY: 0.5516047, DIVERSITY_MEAN_INTER_LIST: 0.8858161, DIVERSITY_HERFINDAHL: 0.9885751, COVERAGE_ITEM: 0.0769699, COVERAGE_ITEM_CORRECT: 0.0523839, COVERAGE_USER: 0.9983883, COVERAGE_USER_CORRECT: 0.9107692, DIVERSITY_GINI: 0.0104275, SHANNON_ENTROPY: 7.6673594, RATIO_DIVERSITY_HERFINDAHL: 0.9889471, RATIO_DIVERSITY_GINI: 0.0402634, RATIO_SHANNON_ENTROPY: 0.6162332, RATIO_AVERAGE_POPULARITY: 2.7672537, RATIO_NOVELTY: 0.0290095, 

Iteration No: 17 ended. Evaluation done at random point.
Time taken: 14.6245
Function va

Iteration No: 22 ended. Search finished for the next optimal point.
Time taken: 14.8871
Function value obtained: -0.1179
Current minimum: -0.1184
Iteration No: 23 started. Searching for the next optimal point.
SearchBayesianSkopt: Testing config: {'alpha': 0.04173341909688567, 'beta': 0.027391228828595578, 'gamma': 0.017275387351494368}
EvaluatorHoldout: Processed 13628 (100.0%) in 14.48 sec. Users per second: 941
SearchBayesianSkopt: Config 22 is suboptimal. Config: {'alpha': 0.04173341909688567, 'beta': 0.027391228828595578, 'gamma': 0.017275387351494368} - results: PRECISION: 0.2331890, PRECISION_RECALL_MIN_DEN: 0.2374387, RECALL: 0.0664997, MAP: 0.1111299, MAP_MIN_DEN: 0.1127748, MRR: 0.4446336, NDCG: 0.2387828, F1: 0.1034874, HIT_RATE: 0.9090109, ARHR_ALL_HITS: 0.7007453, NOVELTY: 0.0055340, AVERAGE_POPULARITY: 0.5591173, DIVERSITY_MEAN_INTER_LIST: 0.8758602, DIVERSITY_HERFINDAHL: 0.9875796, COVERAGE_ITEM: 0.0723185, COVERAGE_ITEM_CORRECT: 0.0507780, COVERAGE_USER: 0.9983883, COVE

Iteration No: 28 ended. Search finished for the next optimal point.
Time taken: 29.3926
Function value obtained: -0.1184
Current minimum: -0.1184
Iteration No: 29 started. Searching for the next optimal point.
SearchBayesianSkopt: Testing config: {'alpha': 0.039593913171246616, 'beta': 0.9941098665149061, 'gamma': 0.6645113221514666}
EvaluatorHoldout: Processed 13628 (100.0%) in 14.53 sec. Users per second: 938
SearchBayesianSkopt: Config 28 is suboptimal. Config: {'alpha': 0.039593913171246616, 'beta': 0.9941098665149061, 'gamma': 0.6645113221514666} - results: PRECISION: 0.2431832, PRECISION_RECALL_MIN_DEN: 0.2471325, RECALL: 0.0691044, MAP: 0.1178172, MAP_MIN_DEN: 0.1194274, MRR: 0.4595376, NDCG: 0.2494045, F1: 0.1076253, HIT_RATE: 0.9173026, ARHR_ALL_HITS: 0.7341333, NOVELTY: 0.0055806, AVERAGE_POPULARITY: 0.5142339, DIVERSITY_MEAN_INTER_LIST: 0.9276123, DIVERSITY_HERFINDAHL: 0.9927544, COVERAGE_ITEM: 0.0724293, COVERAGE_ITEM_CORRECT: 0.0503350, COVERAGE_USER: 0.9983883, COVERAGE_U

Iteration No: 33 ended. Search finished for the next optimal point.
Time taken: 29.4505
Function value obtained: -0.1186
Current minimum: -0.1186
Iteration No: 34 started. Searching for the next optimal point.
SearchBayesianSkopt: Testing config: {'alpha': 0.5065649301697975, 'beta': 0.09057052425591851, 'gamma': 0.0021712658455640414}
EvaluatorHoldout: Processed 13628 (100.0%) in 14.50 sec. Users per second: 940
SearchBayesianSkopt: Config 33 is suboptimal. Config: {'alpha': 0.5065649301697975, 'beta': 0.09057052425591851, 'gamma': 0.0021712658455640414} - results: PRECISION: 0.2168917, PRECISION_RECALL_MIN_DEN: 0.2228273, RECALL: 0.0634811, MAP: 0.1023698, MAP_MIN_DEN: 0.1052424, MRR: 0.4316853, NDCG: 0.2251098, F1: 0.0982158, HIT_RATE: 0.8900059, ARHR_ALL_HITS: 0.6620483, NOVELTY: 0.0055328, AVERAGE_POPULARITY: 0.5794726, DIVERSITY_MEAN_INTER_LIST: 0.8298078, DIVERSITY_HERFINDAHL: 0.9829747, COVERAGE_ITEM: 0.0833933, COVERAGE_ITEM_CORRECT: 0.0551526, COVERAGE_USER: 0.9983883, COVERA

Iteration No: 39 ended. Search finished for the next optimal point.
Time taken: 29.5311
Function value obtained: -0.1187
Current minimum: -0.1187
Iteration No: 40 started. Searching for the next optimal point.
SearchBayesianSkopt: Testing config: {'alpha': 0.0, 'beta': 1.0, 'gamma': 1.0}
EvaluatorHoldout: Processed 13628 (100.0%) in 14.55 sec. Users per second: 937
SearchBayesianSkopt: Config 39 is suboptimal. Config: {'alpha': 0.0, 'beta': 1.0, 'gamma': 1.0} - results: PRECISION: 0.2438656, PRECISION_RECALL_MIN_DEN: 0.2477059, RECALL: 0.0690298, MAP: 0.1180898, MAP_MIN_DEN: 0.1196763, MRR: 0.4578615, NDCG: 0.2496831, F1: 0.1076014, HIT_RATE: 0.9175961, ARHR_ALL_HITS: 0.7339271, NOVELTY: 0.0055895, AVERAGE_POPULARITY: 0.5108856, DIVERSITY_MEAN_INTER_LIST: 0.9291400, DIVERSITY_HERFINDAHL: 0.9929072, COVERAGE_ITEM: 0.0776344, COVERAGE_ITEM_CORRECT: 0.0528268, COVERAGE_USER: 0.9983883, COVERAGE_USER_CORRECT: 0.9161172, DIVERSITY_GINI: 0.0130151, SHANNON_ENTROPY: 8.1535851, RATIO_DIVERSITY

Iteration No: 45 ended. Search finished for the next optimal point.
Time taken: 29.7280
Function value obtained: -0.1187
Current minimum: -0.1187
Iteration No: 46 started. Searching for the next optimal point.
SearchBayesianSkopt: Testing config: {'alpha': 0.0, 'beta': 0.2703351681126784, 'gamma': 1.0}
EvaluatorHoldout: Processed 13628 (100.0%) in 14.61 sec. Users per second: 933
SearchBayesianSkopt: New best config found. Config 45: {'alpha': 0.0, 'beta': 0.2703351681126784, 'gamma': 1.0} - results: PRECISION: 0.2450323, PRECISION_RECALL_MIN_DEN: 0.2487904, RECALL: 0.0690874, MAP: 0.1187130, MAP_MIN_DEN: 0.1202312, MRR: 0.4577386, NDCG: 0.2503777, F1: 0.1077847, HIT_RATE: 0.9178896, ARHR_ALL_HITS: 0.7352856, NOVELTY: 0.0056253, AVERAGE_POPULARITY: 0.5005800, DIVERSITY_MEAN_INTER_LIST: 0.9318927, DIVERSITY_HERFINDAHL: 0.9931824, COVERAGE_ITEM: 0.0991749, COVERAGE_ITEM_CORRECT: 0.0629603, COVERAGE_USER: 0.9983883, COVERAGE_USER_CORRECT: 0.9164103, DIVERSITY_GINI: 0.0147629, SHANNON_ENTR

Iteration No: 51 ended. Search finished for the next optimal point.
Time taken: 14.8846
Function value obtained: -0.1185
Current minimum: -0.1187
Iteration No: 52 started. Searching for the next optimal point.
SearchBayesianSkopt: Testing config: {'alpha': 0.6024086967798707, 'beta': 0.2538249863369079, 'gamma': 0.7660249894943786}
EvaluatorHoldout: Processed 13628 (100.0%) in 14.54 sec. Users per second: 937
SearchBayesianSkopt: Config 51 is suboptimal. Config: {'alpha': 0.6024086967798707, 'beta': 0.2538249863369079, 'gamma': 0.7660249894943786} - results: PRECISION: 0.2386997, PRECISION_RECALL_MIN_DEN: 0.2424617, RECALL: 0.0675407, MAP: 0.1136491, MAP_MIN_DEN: 0.1151590, MRR: 0.4432039, NDCG: 0.2425217, F1: 0.1052895, HIT_RATE: 0.9148077, ARHR_ALL_HITS: 0.7079243, NOVELTY: 0.0055722, AVERAGE_POPULARITY: 0.5413985, DIVERSITY_MEAN_INTER_LIST: 0.8930067, DIVERSITY_HERFINDAHL: 0.9892941, COVERAGE_ITEM: 0.0918656, COVERAGE_ITEM_CORRECT: 0.0588072, COVERAGE_USER: 0.9983883, COVERAGE_USER_

Iteration No: 58 ended. Search finished for the next optimal point.
Time taken: 14.9825
Function value obtained: -0.1138
Current minimum: -0.1187
Iteration No: 59 started. Searching for the next optimal point.
SearchBayesianSkopt: Testing config: {'alpha': 0.1273118559213315, 'beta': 0.5976700374977204, 'gamma': 1.0}
EvaluatorHoldout: Processed 13628 (100.0%) in 14.56 sec. Users per second: 936
SearchBayesianSkopt: Config 58 is suboptimal. Config: {'alpha': 0.1273118559213315, 'beta': 0.5976700374977204, 'gamma': 1.0} - results: PRECISION: 0.2440637, PRECISION_RECALL_MIN_DEN: 0.2477990, RECALL: 0.0689399, MAP: 0.1181563, MAP_MIN_DEN: 0.1197137, MRR: 0.4567121, NDCG: 0.2495700, F1: 0.1075114, HIT_RATE: 0.9175227, ARHR_ALL_HITS: 0.7330541, NOVELTY: 0.0055935, AVERAGE_POPULARITY: 0.5147047, DIVERSITY_MEAN_INTER_LIST: 0.9241395, DIVERSITY_HERFINDAHL: 0.9924072, COVERAGE_ITEM: 0.0852207, COVERAGE_ITEM_CORRECT: 0.0564262, COVERAGE_USER: 0.9983883, COVERAGE_USER_CORRECT: 0.9160440, DIVERSITY_

EvaluatorHoldout: Processed 13640 (100.0%) in 14.69 sec. Users per second: 929
SearchBayesianSkopt: Config evaluated with evaluator_test. Config: {'alpha': 0.0, 'beta': 0.3023745211799622, 'gamma': 1.0} - results:
CUTOFF: 10 - PRECISION: 0.2912170, PRECISION_RECALL_MIN_DEN: 0.2942925, RECALL: 0.0705663, MAP: 0.1511817, MAP_MIN_DEN: 0.1524617, MRR: 0.5096775, NDCG: 0.2967496, F1: 0.1136045, HIT_RATE: 0.9474340, ARHR_ALL_HITS: 0.8730996, NOVELTY: 0.0056226, AVERAGE_POPULARITY: 0.5014181, DIVERSITY_MEAN_INTER_LIST: 0.9316227, DIVERSITY_HERFINDAHL: 0.9931554, COVERAGE_ITEM: 0.0990088, COVERAGE_ITEM_CORRECT: 0.0661720, COVERAGE_USER: 0.9992674, COVERAGE_USER_CORRECT: 0.9467399, DIVERSITY_GINI: 0.0146451, SHANNON_ENTROPY: 8.2804482, RATIO_DIVERSITY_HERFINDAHL: 0.9935291, RATIO_DIVERSITY_GINI: 0.0565491, RATIO_SHANNON_ENTROPY: 0.6655078, RATIO_AVERAGE_POPULARITY: 2.5154805, RATIO_NOVELTY: 0.0294166, 


SearchBayesianSkopt: Saving model in logs/MergeThreeModelsByScore

MergeThreeModelsByScore:

In [3]:
recommender = MergeThreeModelsByScores(ICM_combined,recommenders)
recommender.fit(alpha=0.0, beta=0.3023745211799622, gamma=1.0)

In [5]:
from Utils.utils import create_submission, write_submission

submission = create_submission(recommender)
write_submission(submission,"SLIME_RP3_IALS_Stacked_hybridBeta")