# SLIM-BPR

## Import

In [1]:
## Allow more than one output for a single code cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [2]:
import pandas as pd
import scipy.sparse as sps
import numpy as np
import os

from skopt.space import Real, Integer, Categorical

## Set the numpy random seed
SEED = 42
np.random.seed(SEED)

os.getcwd()

'/home/jupyter/RecSysChallenge2021'

In [3]:
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

from Evaluation.Evaluator import EvaluatorHoldout

from Recommenders.Recommender_import_list import *

from Recommenders.DataIO import DataIO

In [4]:
## Utility Functions
from Dataset.load_data import load_data
from Dataset.write_submission import write_submission
from Dataset.load_test_user_array import load_test_user_array

## Data Loading and Split

In [5]:
URM_all, ICM_dict = load_data()

In [6]:
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)

URM_aug_train = sps.vstack([URM_train.copy().tocoo(), 
                            #ICM_dict['ICM_genre'].T.tocoo(),
                            ICM_dict['ICM_subgenre'].T.tocoo(), 
                            #ICM_dict['ICM_event'].T.tocoo(), 
                            ICM_dict['ICM_channel'].T.tocoo()], format='csr')

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10], exclude_seen = True)

EvaluatorHoldout: Ignoring 13646 ( 0.0%) Users that have less than 1 test interactions


In [7]:
test_UserID_array = load_test_user_array()

## Optimization

In [8]:
output_folder_path = "result_experiments/SLIM_BPR_AUG_subgenre_channel/"

data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(SLIM_BPR_Cython.RECOMMENDER_NAME + "_metadata.zip")
#search_metadata.keys()

hyperparameters_df = search_metadata["hyperparameters_df"]
result_on_validation_df = search_metadata["result_on_validation_df"]

hyperparameters_df['MAP'] = result_on_validation_df['MAP'].values.tolist()

threshold = 0.20

hyperparameters_df[hyperparameters_df['MAP'] > threshold]
print('Max value for the range: ', max(hyperparameters_df[hyperparameters_df['MAP'] > threshold].topK))
print('Min value for the range: ', min(hyperparameters_df[hyperparameters_df['MAP'] > threshold].topK))
print('Best MAP: ', max(hyperparameters_df['MAP']))
hyperparameters_df[max(hyperparameters_df['MAP']) == hyperparameters_df['MAP']]

Unnamed: 0,topK,epochs,symmetric,sgd_mode,lambda_i,lambda_j,learning_rate,MAP
3,418,365,False,adagrad,0.000574,0.000426,0.013204,0.202714
6,262,315,True,adagrad,2.2e-05,0.000333,0.037189,0.230958
8,348,155,True,adam,0.005378,0.000109,0.018956,0.208146
9,344,115,True,adam,0.007422,8e-05,0.016553,0.202981


Max value for the range:  418
Min value for the range:  262
Best MAP:  0.23095772294456904


Unnamed: 0,topK,epochs,symmetric,sgd_mode,lambda_i,lambda_j,learning_rate,MAP
6,262,315,True,adagrad,2.2e-05,0.000333,0.037189,0.230958


In [9]:
output_folder_path = "result_experiments/SLIM_BPR_AUG_subgenre_channel_improved_range/"

data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(SLIM_BPR_Cython.RECOMMENDER_NAME + "_metadata.zip")
#search_metadata.keys()

hyperparameters_df = search_metadata["hyperparameters_df"]
result_on_validation_df = search_metadata["result_on_validation_df"]

hyperparameters_df['MAP'] = result_on_validation_df['MAP'].values.tolist()

threshold = 0.20

hyperparameters_df[hyperparameters_df['MAP'] > threshold]
print('Max value for the range: ', max(hyperparameters_df[hyperparameters_df['MAP'] > threshold].topK))
print('Min value for the range: ', min(hyperparameters_df[hyperparameters_df['MAP'] > threshold].topK))
print('Best MAP: ', max(hyperparameters_df['MAP']))
hyperparameters_df[max(hyperparameters_df['MAP']) == hyperparameters_df['MAP']]

Unnamed: 0,topK,epochs,symmetric,sgd_mode,lambda_i,lambda_j,learning_rate,MAP
0,216,445,False,sgd,0.009199,0.00013,0.000788,0.201837
3,227,65,False,adam,2.7e-05,0.000321,0.011407,0.202417
4,262,550,False,adagrad,0.007106,0.00919,0.028988,0.220181
5,287,105,True,adam,0.000596,0.009792,0.010926,0.205473


Max value for the range:  287
Min value for the range:  216
Best MAP:  0.22018115954672438


Unnamed: 0,topK,epochs,symmetric,sgd_mode,lambda_i,lambda_j,learning_rate,MAP
4,262,550,False,adagrad,0.007106,0.00919,0.028988,0.220181


In [11]:
output_folder_path = "result_experiments/SLIM_BPR_AUG_subgenre_channel_categorical_topK/"

data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(SLIM_BPR_Cython.RECOMMENDER_NAME + "_metadata.zip")
#search_metadata.keys()

hyperparameters_df = search_metadata["hyperparameters_df"]
result_on_validation_df = search_metadata["result_on_validation_df"]

hyperparameters_df['MAP'] = result_on_validation_df['MAP'].values.tolist()

threshold = 0

hyperparameters_df[hyperparameters_df['MAP'] > threshold]
print('Max value for the range: ', max(hyperparameters_df[hyperparameters_df['MAP'] > threshold].topK))
print('Min value for the range: ', min(hyperparameters_df[hyperparameters_df['MAP'] > threshold].topK))
print('Best MAP: ', max(hyperparameters_df['MAP']))
hyperparameters_df[max(hyperparameters_df['MAP']) == hyperparameters_df['MAP']]

Unnamed: 0,topK,epochs,symmetric,sgd_mode,lambda_i,lambda_j,learning_rate,MAP
0,262,90,True,adam,0.000429,0.004976,0.019826,0.205104
2,262,145,True,adam,0.001232,0.005572,0.023837,0.202914
4,262,40,True,sgd,5.1e-05,0.000462,0.049378,0.177011
5,262,85,True,adam,0.004321,0.006096,0.003203,0.201931
7,262,590,True,adagrad,4.1e-05,0.000975,0.001094,0.209877
8,262,75,True,adam,1.1e-05,0.008512,0.000995,0.199914
9,262,70,True,adam,0.001028,0.003379,0.043781,0.203011
11,262,145,True,adam,0.007848,0.005507,0.00783,0.205867
12,262,230,True,adagrad,3e-05,0.000215,0.043239,0.228072
13,262,285,True,adagrad,0.000181,5e-05,0.058408,0.222782


Max value for the range:  262
Min value for the range:  262
Best MAP:  0.22807226781962908


Unnamed: 0,topK,epochs,symmetric,sgd_mode,lambda_i,lambda_j,learning_rate,MAP
12,262,230,True,adagrad,3e-05,0.000215,0.043239,0.228072


In [8]:
output_folder_path = "result_experiments/SLIM_BPR_AUG_subgenre_channel_categorical_topK_300_epochs/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

n_cases = 20  # 50 with 30% random is a good number
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"   
cutoff_to_optimize = 10

In [None]:
from functools import partial
import os, multiprocessing

from HyperparameterTuning.run_hyperparameter_search import runHyperparameterSearch_Collaborative
from Recommenders.SLIM.Cython.SLIM_BPR_Cython import SLIM_BPR_Cython

runHyperparameterSearch_Collaborative(SLIM_BPR_Cython,
                                      URM_train = URM_aug_train,
                                      URM_train_last_test = None,
                                      metric_to_optimize = metric_to_optimize,
                                      cutoff_to_optimize = cutoff_to_optimize,
                                      n_cases = n_cases,
                                      n_random_starts = n_random_starts,
                                      evaluator_validation_earlystopping = evaluator_validation,
                                      evaluator_validation = evaluator_validation,
                                      evaluator_test = None,
                                      output_folder_path = output_folder_path,
                                      resume_from_saved = True,
                                      similarity_type_list = None,
                                      parallelizeKNN = True)

In [None]:
from Recommenders.DataIO import DataIO

recommender_class = SLIM_BPR_Cython

data_loader = DataIO(folder_path = output_folder_path)

search_metadata = data_loader.load_data(recommender_class.RECOMMENDER_NAME + "_metadata.zip")
result_on_validation_df = search_metadata["result_on_validation_df"]
print('Best asymmetric MAP: ', max(result_on_validation_df.MAP))