# Hybrid

In [1]:
## Allow more than one output for a single code cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [2]:
import pandas as pd
import scipy.sparse as sps
import numpy as np
import os

from skopt.space import Real, Integer, Categorical

## Set the numpy random seed
SEED = 42
np.random.seed(SEED)

In [3]:
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

from Evaluation.Evaluator import EvaluatorHoldout

from Recommenders.Recommender_import_list import *
from Recommenders.Hybrid.DifferentLossScoresHybridRecommender import DifferentLossScoresHybridRecommender

from Recommenders.DataIO import DataIO

In [4]:
## Utility Functions
from Dataset.load_data import load_data
from Dataset.write_submission import write_submission
from Dataset.load_test_user_array import load_test_user_array

In [5]:
URM_all, ICM_dict = load_data()

In [6]:
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)

URM_aug_train = sps.vstack([URM_train.copy().tocoo(), 
                            #ICM_dict['genre'].T.tocoo(),
                            ICM_dict['ICM_subgenre'].T.tocoo(), 
                            #ICM_dict['event'].T.tocoo(), 
                            ICM_dict['ICM_channel'].T.tocoo()], format='csr')




In [7]:
evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10], exclude_seen = True)

EvaluatorHoldout: Ignoring 13646 ( 0.0%) Users that have less than 1 test interactions


In [8]:
test_UserID_array = load_test_user_array()

In [9]:
output_folder_path = "result_experiments/ScoresHybrid_RP3beta_SLIMElasticNet/"

data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data("RP3betaRecommender_SLIMElasticNetRecommender_metadata.zip")
#search_metadata.keys()

hyperparameters_df = search_metadata["hyperparameters_df"]
result_on_validation_df = search_metadata["result_on_validation_df"]

hyperparameters_df['MAP'] = result_on_validation_df['MAP'].values.tolist()

threshold = 0.2492

hyperparameters_df[hyperparameters_df['MAP'] > threshold]
print('Max value for the range: ', max(hyperparameters_df[hyperparameters_df['MAP'] > threshold].alpha))
print('Min value for the range: ', min(hyperparameters_df[hyperparameters_df['MAP'] > threshold].alpha))
print('Best MAP: ', max(hyperparameters_df['MAP']))
hyperparameters_df[max(hyperparameters_df['MAP']) == hyperparameters_df['MAP']]

Unnamed: 0,alpha,MAP
1,0.179126,0.249305
3,0.097846,0.249249
4,0.085722,0.249238
6,0.093221,0.249264
12,0.058649,0.249263
19,0.068366,0.249322
20,0.090797,0.249294
23,0.10836,0.249229
24,0.169875,0.24937
25,0.160786,0.24936


Max value for the range:  0.18459510477167498
Min value for the range:  0.05864858110109953
Best MAP:  0.2493893873197331


Unnamed: 0,alpha,MAP
38,0.126547,0.249389


In [10]:
output_folder_path = "result_experiments/ScoresHybrid_RP3beta_IALS/"

data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data("RP3betaRecommender_IALSRecommender_metadata.zip")
#search_metadata.keys()

hyperparameters_df = search_metadata["hyperparameters_df"]
result_on_validation_df = search_metadata["result_on_validation_df"]

hyperparameters_df['MAP'] = result_on_validation_df['MAP'].values.tolist()

threshold = 0.242

hyperparameters_df[hyperparameters_df['MAP'] > threshold]
print('Max value for the range: ', max(hyperparameters_df[hyperparameters_df['MAP'] > threshold].alpha))
print('Min value for the range: ', min(hyperparameters_df[hyperparameters_df['MAP'] > threshold].alpha))
print('Best MAP: ', max(hyperparameters_df['MAP']))
hyperparameters_df[max(hyperparameters_df['MAP']) == hyperparameters_df['MAP']]

Unnamed: 0,alpha,MAP
15,0.3,0.2421
17,0.3,0.2421
19,0.3,0.2421
22,0.3,0.2421
25,0.3,0.2421
27,0.3,0.2421
29,0.3,0.2421
30,0.3,0.2421
39,0.296169,0.242097
44,0.298094,0.242135


Max value for the range:  0.3
Min value for the range:  0.29616943093704623
Best MAP:  0.2421349590204923


Unnamed: 0,alpha,MAP
44,0.298094,0.242135


In [11]:
output_folder_path = "result_experiments/ScoresHybrid_RP3beta_IALS_separate_weights/"

data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data("RP3betaRecommender_IALSRecommender_metadata.zip")
#search_metadata.keys()

hyperparameters_df = search_metadata["hyperparameters_df"]
result_on_validation_df = search_metadata["result_on_validation_df"]

hyperparameters_df['MAP'] = result_on_validation_df['MAP'].values.tolist()

threshold = 0.2432

hyperparameters_df[hyperparameters_df['MAP'] > threshold]
print('Max value for the range: ', max(hyperparameters_df[hyperparameters_df['MAP'] > threshold].alpha))
print('Min value for the range: ', min(hyperparameters_df[hyperparameters_df['MAP'] > threshold].alpha))
print('Best MAP: ', max(hyperparameters_df['MAP']))
hyperparameters_df[max(hyperparameters_df['MAP']) == hyperparameters_df['MAP']]

Unnamed: 0,alpha,beta,gamma,delta,MAP
4,0.54826,0.559405,0,0,0.243257
30,1.0,1.0,0,0,0.243406
38,0.996756,0.92316,0,0,0.243237
78,0.855813,0.887564,0,0,0.243212
83,0.345041,0.320944,0,0,0.243259
87,1.0,0.963778,0,0,0.243264


Max value for the range:  1.0
Min value for the range:  0.34504058756186956
Best MAP:  0.24340612633738892


Unnamed: 0,alpha,beta,gamma,delta,MAP
30,1.0,1.0,0,0,0.243406


In [12]:
output_folder_path = "result_experiments/ScoresHybrid_DifferentLossSLIM_RP3beta/"

data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data("DifferentLossScoresHybridRecommender_RP3betaRecommender_metadata.zip")
#search_metadata.keys()

hyperparameters_df = search_metadata["hyperparameters_df"]
result_on_validation_df = search_metadata["result_on_validation_df"]

hyperparameters_df['MAP'] = result_on_validation_df['MAP'].values.tolist()

threshold = 0.0

hyperparameters_df[hyperparameters_df['MAP'] > threshold]
print('Max value for the range: ', max(hyperparameters_df[hyperparameters_df['MAP'] > threshold].alpha))
print('Min value for the range: ', min(hyperparameters_df[hyperparameters_df['MAP'] > threshold].alpha))
print('Best MAP: ', max(hyperparameters_df['MAP']))
hyperparameters_df[max(hyperparameters_df['MAP']) == hyperparameters_df['MAP']]

Unnamed: 0,alpha,MAP
0,0.653104,0.227285
1,0.254516,0.227149
2,0.116613,0.227141
3,0.899746,0.227913
4,0.756824,0.227376
5,0.012251,0.227149


Max value for the range:  0.8997460478162632
Min value for the range:  0.012250596716553989
Best MAP:  0.22791306365723676


Unnamed: 0,alpha,MAP
3,0.899746,0.227913


In [21]:
output_folder_path = "result_experiments/ScoresHybrid_SLIMElasticNet_IALS/"

data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data("SLIMElasticNetRecommender_IALSRecommender_metadata.zip")
#search_metadata.keys()

hyperparameters_df = search_metadata["hyperparameters_df"]
result_on_validation_df = search_metadata["result_on_validation_df"]

hyperparameters_df['MAP'] = result_on_validation_df['MAP'].values.tolist()

threshold = 0.2501

hyperparameters_df[hyperparameters_df['MAP'] > threshold]
print('Max value for the range: ', max(hyperparameters_df[hyperparameters_df['MAP'] > threshold].alpha))
print('Min value for the range: ', min(hyperparameters_df[hyperparameters_df['MAP'] > threshold].alpha))
print('Best MAP: ', max(hyperparameters_df['MAP']))
hyperparameters_df[max(hyperparameters_df['MAP']) == hyperparameters_df['MAP']]

Unnamed: 0,alpha,MAP
35,0.759515,0.250106
43,0.79719,0.250127
44,0.789777,0.250129
55,0.793885,0.250101
58,0.785876,0.250173
60,0.783476,0.250139
64,0.784601,0.250165
67,0.784386,0.250169
68,0.784194,0.250169
70,0.784073,0.250167


Max value for the range:  0.7971900785890443
Min value for the range:  0.7595145328142148
Best MAP:  0.25017529411491135


Unnamed: 0,alpha,MAP
88,0.784515,0.250175


## PRovare varie combinazioni diff loss e altri

In [22]:
output_folder_path = "result_experiments/ScoresHybrid_DiffLossSLIM_IALS/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

n_cases = 100  # 50 with 30% random is a good number
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"   
cutoff_to_optimize = 10

In [23]:
print('Fitting ...')

RP3beta = RP3betaRecommender(URM_aug_train)
RP3beta.fit(topK = 51, alpha = 0.620106, beta =	0.566617, normalize_similarity = True)

#Top = TopPop(URM_train)
#Top.fit()

#UserKNNCF = UserKNNCFRecommender(URM_train)
#UserKNNCF.fit(topK= 777, shrink = 0, similarity = 'tversky', normalize = True, tversky_alpha = 2.0, tversky_beta = 1.2531451455933782)

SLIMElasticNet = MultiThreadSLIM_SLIMElasticNetRecommender(URM_aug_train)
SLIMElasticNet.load_model(folder_path = 'result_experiments/SLIM_elasticNet_AUG_subgenre_channel_topK_categorical/', file_name = 'SLIMElasticNetRecommender_best_model.zip')
#SLIMElasticNet.fit(topK = 673, l1_ratio = 0.001155, alpha = 0.073102)

earlystopping_keywargs = {"validation_every_n": 1,
                          "stop_on_validation": True,
                          "evaluator_object": evaluator_validation,
                          "lower_validations_allowed": 20,
                          "validation_metric": metric_to_optimize,
                         }

SLIM_BPR = SLIM_BPR_Cython(URM_aug_train)
SLIM_BPR.load_model(folder_path = 'result_experiments/SLIM_BPR_AUG_subgenre_channel/', file_name = 'SLIM_BPR_Recommender_best_model.zip')

DiffLoss = DifferentLossScoresHybridRecommender(URM_aug_train, [SLIMElasticNet, SLIM_BPR])
DiffLoss.fit(norm = np.inf, alpha = 0.826909)

IALS = IALSRecommender(URM_aug_train)
IALS.load_model(folder_path = 'result_experiments/IALS_NO_AUG/', file_name = 'IALSRecommender_best_model.zip')
#IALS.fit(num_factors = 52, epochs = 300, confidence_scaling = 'linear', alpha = 1.3346069348060516, epsilon = 0.17158990394166584, reg = 0.0005841170332713697, **earlystopping_keywargs)

print('Fitting Ended')

Fitting ...
SLIMElasticNetRecommender: Loading model from file 'result_experiments/SLIM_elasticNet_AUG_subgenre_channel_topK_categorical/SLIMElasticNetRecommender_best_model.zip'
SLIMElasticNetRecommender: Loading complete
SLIM_BPR_Recommender: Loading model from file 'result_experiments/SLIM_BPR_AUG_subgenre_channel/SLIM_BPR_Recommender_best_model.zip'
SLIM_BPR_Recommender: Loading complete
IALSRecommender: Loading model from file 'result_experiments/IALS_NO_AUG/IALSRecommender_best_model.zip'
IALSRecommender: Loading complete
Fitting Ended


In [24]:
from functools import partial
import os, multiprocessing

from HyperparameterTuning.run_hyperparameter_search_ScoresHybrid import runHyperparameterSearch_ScoresHybrid

runHyperparameterSearch_ScoresHybrid([DiffLoss, IALS],
                                      URM_train = URM_aug_train,
                                      ICM_object = None,
                                      ICM_name = '',
                                      URM_train_last_test = None,
                                      metric_to_optimize = metric_to_optimize,
                                      cutoff_to_optimize = cutoff_to_optimize,
                                      n_cases = n_cases,
                                      n_random_starts = n_random_starts,
                                      evaluator_validation_earlystopping = evaluator_validation,
                                      evaluator_validation = evaluator_validation,
                                      evaluator_test = None,
                                      output_folder_path = output_folder_path,
                                      resume_from_saved = False,
                                      similarity_type_list = None,
                                      parallelizeKNN = True)

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'alpha': 0.1446018485160938}
EvaluatorHoldout: Processed 13646 (100.0%) in 37.80 sec. Users per second: 361
SearchBayesianSkopt: New best config found. Config 0: {'alpha': 0.1446018485160938} - results: PRECISION: 0.3833358, PRECISION_RECALL_MIN_DEN: 0.3848390, RECALL: 0.0681410, MAP: 0.2339243, MAP_MIN_DEN: 0.2346333, MRR: 0.6343746, NDCG: 0.3975659, F1: 0.1157131, HIT_RATE: 0.9659241, ARHR_ALL_HITS: 1.1975065, NOVELTY: 0.0056407, AVERAGE_POPULARITY: 0.4756843, DIVERSITY_MEAN_INTER_LIST: 0.9515626, DIVERSITY_HERFINDAHL: 0.9951493, COVERAGE_ITEM: 0.0734260, COVERAGE_ITEM_CORRECT: 0.0610222, COVERAGE_USER: 0.9997070, COVERAGE_USER_CORRECT: 0.9656410, DIVERSITY_GINI: 0.0163044, SHANNON_ENTROPY: 8.5454943, RATIO_DIVERSITY_HERFINDAHL: 0.9955271, RATIO_DIVERSITY_GINI: 0.0639261, RATIO_SHANNON_ENTROPY: 0.6878146, RATIO_AVERAGE_POPULARITY: 2.3854442, RATIO_NOVELTY: 0.0268287, 

Iteration No: 1 

KeyboardInterrupt: 