# Different Loss Score Hybrid

In [1]:
## Allow more than one output for a single code cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [2]:
import pandas as pd
import scipy.sparse as sps
import numpy as np
import os

from skopt.space import Real, Integer, Categorical

## Set the numpy random seed
SEED = 42
np.random.seed(SEED)

In [22]:
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

from Evaluation.Evaluator import EvaluatorHoldout

from Recommenders.Recommender_import_list import *
from Recommenders.Hybrid.DifferentLossScoresHybridRecommender import DifferentLossScoresHybridRecommender

from Recommenders.DataIO import DataIO

In [4]:
## Utility Functions
from Dataset.load_data import load_data
from Dataset.write_submission import write_submission
from Dataset.load_test_user_array import load_test_user_array

In [5]:
URM_all, ICM_dict = load_data()

In [6]:
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)

URM_aug_train = sps.vstack([URM_train.copy().tocoo(), 
                            #ICM_dict['genre'].T.tocoo(),
                            ICM_dict['ICM_subgenre'].T.tocoo(), 
                            #ICM_dict['event'].T.tocoo(), 
                            ICM_dict['ICM_channel'].T.tocoo()], format='csr')




In [7]:
evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10], exclude_seen = True)

EvaluatorHoldout: Ignoring 13646 ( 0.0%) Users that have less than 1 test interactions


In [8]:
test_UserID_array = load_test_user_array()

## Optimization

In [9]:
output_folder_path = "result_experiments/DifferentLossScoresHybrid_SLIMElasticNet_SLIMBPR/"

data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data("SLIMElasticNetRecommender_SLIM_BPR_Recommender_metadata.zip")
#search_metadata.keys()

hyperparameters_df = search_metadata["hyperparameters_df"]
result_on_validation_df = search_metadata["result_on_validation_df"]

hyperparameters_df['MAP'] = result_on_validation_df['MAP'].values.tolist()

threshold = 0.2505

hyperparameters_df[hyperparameters_df['MAP'] > threshold]
print('Max value for the range: ', max(hyperparameters_df[hyperparameters_df['MAP'] > threshold].alpha))
print('Min value for the range: ', min(hyperparameters_df[hyperparameters_df['MAP'] > threshold].alpha))
print('Best MAP: ', max(hyperparameters_df['MAP']))
hyperparameters_df[max(hyperparameters_df['MAP']) == hyperparameters_df['MAP']]

Unnamed: 0,norm,alpha,MAP
0,1.0,0.689094,0.250504
31,inf,0.829324,0.250559
43,inf,0.813892,0.250533
64,inf,0.812183,0.250529
67,inf,0.810747,0.250502
73,inf,0.810928,0.25051
82,inf,0.809496,0.250501
91,1.0,0.714934,0.250584


Max value for the range:  0.8293235105898995
Min value for the range:  0.6890941949275555
Best MAP:  0.25058410337118475


Unnamed: 0,norm,alpha,MAP
91,1.0,0.714934,0.250584


In [10]:
output_folder_path = "result_experiments/DifferentLossScoresHybrid_SLIMElasticNet_SLIMBPR_separate_weights/"

data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data("SLIMElasticNetRecommender_SLIM_BPR_Recommender_metadata.zip")
#search_metadata.keys()

hyperparameters_df = search_metadata["hyperparameters_df"]
result_on_validation_df = search_metadata["result_on_validation_df"]

hyperparameters_df['MAP'] = result_on_validation_df['MAP'].values.tolist()

threshold = 0.2505

hyperparameters_df[hyperparameters_df['MAP'] > threshold]
print('Max value for the range: ', max(hyperparameters_df[hyperparameters_df['MAP'] > threshold].alpha))
print('Min value for the range: ', min(hyperparameters_df[hyperparameters_df['MAP'] > threshold].alpha))
print('Best MAP: ', max(hyperparameters_df['MAP']))
hyperparameters_df[max(hyperparameters_df['MAP']) == hyperparameters_df['MAP']]

Unnamed: 0,norm,alpha,beta,gamma,delta,MAP
3,2.0,0.479011,0.104115,0,0,0.250566
22,2.0,0.923438,0.191761,0,0,0.250551
28,inf,0.33118,0.068812,0,0,0.250583
80,inf,0.99593,0.257393,0,0,0.250507
86,2.0,0.999,0.290241,0,0,0.250527


Max value for the range:  0.999
Min value for the range:  0.3311796359004038
Best MAP:  0.25058295471200487


Unnamed: 0,norm,alpha,beta,gamma,delta,MAP
28,inf,0.33118,0.068812,0,0,0.250583


In [11]:
output_folder_path = "result_experiments/DifferentLossScoresHybrid_SLIMElasticNet_SLIMBPR_separate_weights_load/"

data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data("SLIMElasticNetRecommender_SLIM_BPR_Recommender_metadata.zip")
#search_metadata.keys()

hyperparameters_df = search_metadata["hyperparameters_df"]
result_on_validation_df = search_metadata["result_on_validation_df"]

hyperparameters_df['MAP'] = result_on_validation_df['MAP'].values.tolist()

threshold = 0.2505

hyperparameters_df[hyperparameters_df['MAP'] > threshold]
print('Max value for the range: ', max(hyperparameters_df[hyperparameters_df['MAP'] > threshold].alpha))
print('Min value for the range: ', min(hyperparameters_df[hyperparameters_df['MAP'] > threshold].alpha))
print('Best MAP: ', max(hyperparameters_df['MAP']))
hyperparameters_df[max(hyperparameters_df['MAP']) == hyperparameters_df['MAP']]

Unnamed: 0,norm,alpha,beta,gamma,delta,MAP
74,1.0,0.999,0.438261,0,0,0.250502
75,inf,0.999,0.26941,0,0,0.250532
81,1.0,0.276294,0.124128,0,0,0.250522
84,-inf,0.831087,0.217555,0,0,0.250524


Max value for the range:  0.999
Min value for the range:  0.27629417607486695
Best MAP:  0.2505315820206637


Unnamed: 0,norm,alpha,beta,gamma,delta,MAP
75,inf,0.999,0.26941,0,0,0.250532


In [12]:
output_folder_path = "result_experiments/DifferentLossScoresHybrid_SLIMElasticNet_SLIMBPR_load/"

data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data("SLIMElasticNetRecommender_SLIM_BPR_Recommender_metadata.zip")
#search_metadata.keys()

hyperparameters_df = search_metadata["hyperparameters_df"]
result_on_validation_df = search_metadata["result_on_validation_df"]

hyperparameters_df['MAP'] = result_on_validation_df['MAP'].values.tolist()

threshold = 0.2505

hyperparameters_df[hyperparameters_df['MAP'] > threshold]
print('Max value for the range: ', max(hyperparameters_df[hyperparameters_df['MAP'] > threshold].alpha))
print('Min value for the range: ', min(hyperparameters_df[hyperparameters_df['MAP'] > threshold].alpha))
print('Best MAP: ', max(hyperparameters_df['MAP']))
hyperparameters_df[max(hyperparameters_df['MAP']) == hyperparameters_df['MAP']]

Unnamed: 0,norm,alpha,MAP
17,1.0,0.667851,0.25051
27,2.0,0.785279,0.25051
33,inf,0.827963,0.250583
35,inf,0.787912,0.250509
68,inf,0.811425,0.250508
69,inf,0.812357,0.250525
70,inf,0.811928,0.250513
77,inf,0.826667,0.250575
79,inf,0.826739,0.250572
83,inf,0.826909,0.250588


Max value for the range:  0.8280829208840986
Min value for the range:  0.6678510490224594
Best MAP:  0.25058841302410984


Unnamed: 0,norm,alpha,MAP
83,inf,0.826909,0.250588


In [18]:
output_folder_path = "result_experiments/DifferentLossScoresHybrid_SLIMElasticNet_IALS/"

data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data("SLIMElasticNetRecommender_IALSRecommender_metadata.zip")
#search_metadata.keys()

hyperparameters_df = search_metadata["hyperparameters_df"]
result_on_validation_df = search_metadata["result_on_validation_df"]

hyperparameters_df['MAP'] = result_on_validation_df['MAP'].values.tolist()

threshold = 0.2502

hyperparameters_df[hyperparameters_df['MAP'] > threshold]
print('Max value for the range: ', max(hyperparameters_df[hyperparameters_df['MAP'] > threshold].alpha))
print('Min value for the range: ', min(hyperparameters_df[hyperparameters_df['MAP'] > threshold].alpha))
print('Best MAP: ', max(hyperparameters_df['MAP']))
hyperparameters_df[max(hyperparameters_df['MAP']) == hyperparameters_df['MAP']]

Unnamed: 0,norm,alpha,MAP
87,inf,0.712836,0.250207
91,inf,0.712971,0.250202
93,inf,0.713169,0.250212
94,inf,0.713247,0.250212
97,inf,0.713347,0.25021
98,inf,0.713387,0.25021


Max value for the range:  0.7133869818889053
Min value for the range:  0.7128361826446833
Best MAP:  0.25021211227663814


Unnamed: 0,norm,alpha,MAP
93,inf,0.713169,0.250212


In [20]:
output_folder_path = "result_experiments/DifferentLossScoresHybrid_DiffLoss_IALS/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

n_cases = 50  # 50 with 30% random is a good number
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"   
cutoff_to_optimize = 10

In [23]:
print('Fitting ...')

#RP3beta = RP3betaRecommender(URM_aug_train)
#RP3beta.fit(topK = 51, alpha = 0.620106, beta = 0.566617, normalize_similarity = True)

#Top = TopPop(URM_train)
#Top.fit()

#UserKNNCF = UserKNNCFRecommender(URM_train)
#UserKNNCF.fit(topK= 777, shrink = 0, similarity = 'tversky', normalize = True, tversky_alpha = 2.0, tversky_beta = 1.2531451455933782)

SLIMElasticNet = MultiThreadSLIM_SLIMElasticNetRecommender(URM_aug_train)
SLIMElasticNet.load_model(folder_path = 'result_experiments/SLIM_elasticNet_AUG_subgenre_channel_topK_categorical/', file_name = 'SLIMElasticNetRecommender_best_model.zip')
#SLIMElasticNet.fit(topK = 673, l1_ratio = 0.001155, alpha = 0.073102)

earlystopping_keywargs = {"validation_every_n": 1,
                          "stop_on_validation": True,
                          "evaluator_object": evaluator_validation,
                          "lower_validations_allowed": 20,
                          "validation_metric": metric_to_optimize,
                         }

SLIM_BPR = SLIM_BPR_Cython(URM_aug_train)
SLIM_BPR.load_model(folder_path = 'result_experiments/SLIM_BPR_AUG_subgenre_channel/', file_name = 'SLIM_BPR_Recommender_best_model.zip')
"""
SLIM_BPR.fit(topK = 262, epochs = 315, symmetric = True, lambda_i = 0.000022, lambda_j = 0.000333, learning_rate = 0.037189, sgd_mode='adagrad',
             positive_threshold_BPR = None,
             train_with_sparse_weights = None,
             allow_train_with_sparse_weights = False,
             **earlystopping_keywargs)
"""
IALS = IALSRecommender(URM_aug_train)
IALS.load_model(folder_path = 'result_experiments/IALS_NO_AUG/', file_name = 'IALSRecommender_best_model.zip')
#IALS.fit(num_factors = 52, epochs = 300, confidence_scaling = 'linear', alpha = 1.3346069348060516, epsilon = 0.17158990394166584, reg = 0.0005841170332713697, **earlystopping_keywargs)

DiffLoss = DifferentLossScoresHybridRecommender(URM_aug_train, [SLIMElasticNet, SLIM_BPR])
DiffLoss.fit(norm = np.inf, alpha = 0.826909)

print('Fitting Ended')

Fitting ...
SLIMElasticNetRecommender: Loading model from file 'result_experiments/SLIM_elasticNet_AUG_subgenre_channel_topK_categorical/SLIMElasticNetRecommender_best_model.zip'
SLIMElasticNetRecommender: Loading complete
SLIM_BPR_Recommender: Loading model from file 'result_experiments/SLIM_BPR_AUG_subgenre_channel/SLIM_BPR_Recommender_best_model.zip'
SLIM_BPR_Recommender: Loading complete


"\nSLIM_BPR.fit(topK = 262, epochs = 315, symmetric = True, lambda_i = 0.000022, lambda_j = 0.000333, learning_rate = 0.037189, sgd_mode='adagrad',\n             positive_threshold_BPR = None,\n             train_with_sparse_weights = None,\n             allow_train_with_sparse_weights = False,\n             **earlystopping_keywargs)\n"

IALSRecommender: Loading model from file 'result_experiments/IALS_NO_AUG/IALSRecommender_best_model.zip'
IALSRecommender: Loading complete
Fitting Ended


In [24]:
from functools import partial
import os, multiprocessing

from HyperparameterTuning.run_hyperparameter_search_DifferentLossScoresHybrid import runHyperparameterSearch_DifferentLossScoresHybrid

runHyperparameterSearch_DifferentLossScoresHybrid([DiffLoss, IALS],
                                                    URM_train = URM_aug_train,
                                                    ICM_object = None,
                                                    ICM_name = '',
                                                    URM_train_last_test = None,
                                                    metric_to_optimize = metric_to_optimize,
                                                    cutoff_to_optimize = cutoff_to_optimize,
                                                    n_cases = n_cases,
                                                    n_random_starts = n_random_starts,
                                                    evaluator_validation_earlystopping = evaluator_validation,
                                                    evaluator_validation = evaluator_validation,
                                                    evaluator_test = None,
                                                    output_folder_path = output_folder_path,
                                                    resume_from_saved = False,
                                                    similarity_type_list = None,
                                                    parallelizeKNN = True)

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'norm': 2.0, 'alpha': 0.21574855997490533}
EvaluatorHoldout: Processed 13646 (100.0%) in 1.96 min. Users per second: 116
SearchBayesianSkopt: New best config found. Config 0: {'norm': 2.0, 'alpha': 0.21574855997490533} - results: PRECISION: 0.3939836, PRECISION_RECALL_MIN_DEN: 0.3955222, RECALL: 0.0701882, MAP: 0.2431386, MAP_MIN_DEN: 0.2438800, MRR: 0.6449510, NDCG: 0.4085909, F1: 0.1191498, HIT_RATE: 0.9699546, ARHR_ALL_HITS: 1.2306057, NOVELTY: 0.0055998, AVERAGE_POPULARITY: 0.4990493, DIVERSITY_MEAN_INTER_LIST: 0.9399903, DIVERSITY_HERFINDAHL: 0.9939921, COVERAGE_ITEM: 0.0695498, COVERAGE_ITEM_CORRECT: 0.0563154, COVERAGE_USER: 0.9997070, COVERAGE_USER_CORRECT: 0.9696703, DIVERSITY_GINI: 0.0141399, SHANNON_ENTROPY: 8.3111915, RATIO_DIVERSITY_HERFINDAHL: 0.9943695, RATIO_DIVERSITY_GINI: 0.0554394, RATIO_SHANNON_ENTROPY: 0.6689559, RATIO_AVERAGE_POPULARITY: 2.5026142, RATIO_NOVELTY: 0.