# RP3beta

## Import

In [1]:
## Allow more than one output for a single code cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [2]:
import pandas as pd
import scipy.sparse as sps
import numpy as np
import os

from skopt.space import Real, Integer, Categorical

## Set the numpy random seed
SEED = 42
np.random.seed(SEED)

os.getcwd()

'/home/alessio/Scrivania/RecSysChallenge2021'

In [3]:
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

from Evaluation.Evaluator import EvaluatorHoldout

from Recommenders.Recommender_import_list import *

Tensorflow is not available


In [4]:
## Utility Functions
from Dataset.load_data import load_data
from Dataset.write_submission import write_submission
from Dataset.load_test_user_array import load_test_user_array

## Data Loading and Split

In [5]:
URM_all, ICM_dict = load_data()

In [None]:
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)

URM_aug_train = sps.vstack([URM_train.tocoo(), 
                            ICM_dict['ICM_genre'].T.tocoo(),
                            ICM_dict['ICM_subgenre'].T.tocoo(), 
                            #ICM_dict['ICM_event'].T.tocoo(), 
                            ICM_dict['ICM_channel'].T.tocoo()], format='csr')

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10], exclude_seen = True)

EvaluatorHoldout: Ignoring 13646 ( 0.0%) Users that have less than 1 test interactions


In [None]:
test_UserID_array = load_test_user_array()

## Optimization

In [None]:
output_folder_path = "result_experiments/RP3beta_augmented_all_no_event_improved_range_topK_categorical/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

n_cases = 100  # 50 with 30% random is a good number
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"   
cutoff_to_optimize = 10

In [None]:
from functools import partial
import os, multiprocessing

from HyperparameterTuning.run_hyperparameter_search import runHyperparameterSearch_Collaborative
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender

runHyperparameterSearch_Collaborative(RP3betaRecommender,
                                      URM_train = URM_aug_train,
                                      URM_train_last_test = None,
                                      metric_to_optimize = metric_to_optimize,
                                      cutoff_to_optimize = cutoff_to_optimize,
                                      n_cases = n_cases,
                                      n_random_starts = n_random_starts,
                                      evaluator_validation_earlystopping = evaluator_validation,
                                      evaluator_validation = evaluator_validation,
                                      evaluator_test = None,
                                      output_folder_path = output_folder_path,
                                      resume_from_saved = True,
                                      similarity_type_list = None,
                                      parallelizeKNN = True)

SearchBayesianSkopt: Extending previous number of cases from 50 to 100.

SearchBayesianSkopt: Resuming 'RP3betaRecommender'... Loaded 50 configurations.
Iteration No: 1 started. Evaluating function at random point.
Iteration No: 1 ended. Evaluation done at random point.
Time taken: 1.3059
Function value obtained: -0.1390
Current minimum: -0.2264
Iteration No: 2 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'topK': 70, 'alpha': 0.5181509793978214, 'beta': 0.3580157120684137, 'normalize_similarity': True}
EvaluatorHoldout: Processed 13646 (100.0%) in 11.05 sec. Users per second: 1235
SearchBayesianSkopt: Config 50 is suboptimal. Config: {'topK': 70, 'alpha': 0.5181509793978214, 'beta': 0.3580157120684137, 'normalize_similarity': True} - results: PRECISION: 0.3631101, PRECISION_RECALL_MIN_DEN: 0.3645224, RECALL: 0.0637845, MAP: 0.2215760, MAP_MIN_DEN: 0.2221982, MRR: 0.6311031, NDCG: 0.3815157, F1: 0.1085082, HIT_RATE: 0.9620402, ARHR_ALL_HITS: 1.1622

In [10]:
from Recommenders.DataIO import DataIO

output_folder_path = "result_experiments/RP3beta_augmented_all/"

data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(RP3betaRecommender.RECOMMENDER_NAME + "_metadata.zip")
#search_metadata.keys()

hyperparameters_df = search_metadata["hyperparameters_df"]
result_on_validation_df = search_metadata["result_on_validation_df"]

hyperparameters_df['MAP'] = result_on_validation_df['MAP'].values.tolist()

threshold = 0.22

hyperparameters_df[hyperparameters_df['MAP'] > threshold]
print('Max value for the range: ', max(hyperparameters_df[hyperparameters_df['MAP'] > threshold].topK))
print('Min value for the range: ', min(hyperparameters_df[hyperparameters_df['MAP'] > threshold].topK))
print('Best MAP: ', max(hyperparameters_df['MAP']))
hyperparameters_df[max(hyperparameters_df['MAP']) == hyperparameters_df['MAP']]

FileNotFoundError: [Errno 2] No such file or directory: 'result_experiments/RP3beta_augmented_all/RP3betaRecommender_metadata.zip'

In [None]:
from Recommenders.DataIO import DataIO

output_folder_path = "result_experiments/RP3beta_augmented_all_event_preprocessed/"

data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(RP3betaRecommender.RECOMMENDER_NAME + "_metadata.zip")
#search_metadata.keys()

hyperparameters_df = search_metadata["hyperparameters_df"]
result_on_validation_df = search_metadata["result_on_validation_df"]

hyperparameters_df['MAP'] = result_on_validation_df['MAP'].values.tolist()

threshold = 0.219

hyperparameters_df[hyperparameters_df['MAP'] > threshold]
print('Max value for the range: ', max(hyperparameters_df[hyperparameters_df['MAP'] > threshold].topK))
print('Min value for the range: ', min(hyperparameters_df[hyperparameters_df['MAP'] > threshold].topK))
print('Best MAP: ', max(hyperparameters_df['MAP']))
hyperparameters_df[max(hyperparameters_df['MAP']) == hyperparameters_df['MAP']]

Unnamed: 0,topK,alpha,beta,normalize_similarity,MAP
13,122,0.449787,0.395651,True,0.220561
38,93,0.20678,0.380367,True,0.21995
46,94,0.0,0.390115,True,0.219409


Max value for the range:  122
Min value for the range:  93
Best MAP:  0.2205607899518212


Unnamed: 0,topK,alpha,beta,normalize_similarity,MAP
13,122,0.449787,0.395651,True,0.220561


In [16]:
from Recommenders.DataIO import DataIO

output_folder_path = "result_experiments/RP3beta_augmented_all_no_event/"

data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(RP3betaRecommender.RECOMMENDER_NAME + "_metadata.zip")
#search_metadata.keys()

hyperparameters_df = search_metadata["hyperparameters_df"]
result_on_validation_df = search_metadata["result_on_validation_df"]

hyperparameters_df['MAP'] = result_on_validation_df['MAP'].values.tolist()

threshold = 0.224

hyperparameters_df[hyperparameters_df['MAP'] > threshold]
print('Max value for the range: ', max(hyperparameters_df[hyperparameters_df['MAP'] > threshold].topK))
print('Min value for the range: ', min(hyperparameters_df[hyperparameters_df['MAP'] > threshold].topK))
print('Best MAP: ', max(hyperparameters_df['MAP']))
hyperparameters_df[max(hyperparameters_df['MAP']) == hyperparameters_df['MAP']]

Unnamed: 0,topK,alpha,beta,normalize_similarity,MAP
30,62,0.0,0.533488,True,0.224454
34,85,1.0,0.543791,True,0.224019
40,56,0.48885,0.543524,True,0.226386
44,37,0.0,0.536916,True,0.224072


Max value for the range:  85
Min value for the range:  37
Best MAP:  0.22638588201903473


Unnamed: 0,topK,alpha,beta,normalize_similarity,MAP
40,56,0.48885,0.543524,True,0.226386


In [20]:
from Recommenders.DataIO import DataIO

output_folder_path = "result_experiments/RP3beta_augmented_all_no_event_improved_range/"

data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(RP3betaRecommender.RECOMMENDER_NAME + "_metadata.zip")
#search_metadata.keys()

hyperparameters_df = search_metadata["hyperparameters_df"]
result_on_validation_df = search_metadata["result_on_validation_df"]

hyperparameters_df['MAP'] = result_on_validation_df['MAP'].values.tolist()

threshold = 0.226

hyperparameters_df[hyperparameters_df['MAP'] > threshold]
print('Max value for the range: ', max(hyperparameters_df[hyperparameters_df['MAP'] > threshold].topK))
print('Min value for the range: ', min(hyperparameters_df[hyperparameters_df['MAP'] > threshold].topK))
print('Best MAP: ', max(hyperparameters_df['MAP']))
hyperparameters_df[max(hyperparameters_df['MAP']) == hyperparameters_df['MAP']]

Unnamed: 0,topK,alpha,beta,normalize_similarity,MAP
47,61,0.297542,0.563447,True,0.226389
57,55,0.885547,0.547405,True,0.226444
60,59,0.581572,0.540392,True,0.226535
61,50,0.690909,0.569618,True,0.226775
62,56,0.639613,0.577889,True,0.226722
64,65,0.53406,0.576047,True,0.226314
81,60,0.740864,0.559624,True,0.226896
85,55,0.533698,0.562787,True,0.226601
90,60,0.788324,0.551148,True,0.226615
95,58,0.737131,0.568683,True,0.226612


Max value for the range:  65
Min value for the range:  50
Best MAP:  0.22714496253335392


Unnamed: 0,topK,alpha,beta,normalize_similarity,MAP
98,56,0.624694,0.568462,True,0.227145


In [None]:
from Recommenders.DataIO import DataIO

output_folder_path = "result_experiments/RP3beta_augmented_all_no_event_topK_categorical/"

data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(RP3betaRecommender.RECOMMENDER_NAME + "_metadata.zip")
#search_metadata.keys()

hyperparameters_df = search_metadata["hyperparameters_df"]
result_on_validation_df = search_metadata["result_on_validation_df"]

hyperparameters_df['MAP'] = result_on_validation_df['MAP'].values.tolist()

threshold = 0.226

hyperparameters_df[hyperparameters_df['MAP'] > threshold]
print('Max value for the range: ', max(hyperparameters_df[hyperparameters_df['MAP'] > threshold].topK))
print('Min value for the range: ', min(hyperparameters_df[hyperparameters_df['MAP'] > threshold].topK))
print('Best MAP: ', max(hyperparameters_df['MAP']))
hyperparameters_df[max(hyperparameters_df['MAP']) == hyperparameters_df['MAP']]

Unnamed: 0,topK,alpha,beta,normalize_similarity,MAP
25,66,0.667368,0.555375,True,0.226365
30,51,0.478767,0.574051,True,0.226439
34,51,0.531833,0.54934,True,0.2263
36,51,0.743044,0.563758,True,0.226602
37,51,0.650132,0.57081,True,0.226861
38,51,0.651618,0.575714,True,0.22728
41,51,0.642757,0.58423,True,0.22683
45,51,0.582075,0.5893,True,0.226184
48,66,0.614196,0.52372,True,0.226039


Max value for the range:  66
Min value for the range:  51
Best MAP:  0.227279727881186


Unnamed: 0,topK,alpha,beta,normalize_similarity,MAP
38,51,0.651618,0.575714,True,0.22728
