# IALS implicit

## Import

In [1]:
## Allow more than one output for a single code cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [2]:
import pandas as pd
import scipy.sparse as sps
import numpy as np
import os

from skopt.space import Real, Integer, Categorical

## Set the numpy random seed
SEED = 42
np.random.seed(SEED)

os.getcwd()

'/home/alessio/Scrivania/RecSysChallenge2021'

In [4]:
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

from Evaluation.Evaluator import EvaluatorHoldout

from Recommenders.Recommender_import_list import *

from Recommenders.DataIO import DataIO

In [4]:
## Utility Functions
from Dataset.load_data import load_data
from Dataset.write_submission import write_submission
from Dataset.load_test_user_array import load_test_user_array

## Data Loading and Split

In [5]:
URM_all, ICM_dict = load_data()

In [16]:
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)

URM_aug_train = sps.vstack([URM_train.copy().tocoo(), 
                            #ICM_dict['ICM_genre'].T.tocoo(),
                            ICM_dict['ICM_subgenre'].T.tocoo(), 
                            #ICM_dict['ICM_event'].T.tocoo(), 
                            ICM_dict['ICM_channel'].T.tocoo()], format='csr')

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10], exclude_seen = True)

EvaluatorHoldout: Ignoring 13646 ( 0.0%) Users that have less than 1 test interactions


In [6]:
test_UserID_array = load_test_user_array()

## Optimization

In [10]:
output_folder_path = "result_experiments/IALS_NO_AUG/"

data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(IALSRecommender.RECOMMENDER_NAME + "_metadata.zip")

#search_metadata.keys()

hyperparameters_df = search_metadata["hyperparameters_df"]
result_on_validation_df = search_metadata["result_on_validation_df"]

hyperparameters_df['MAP'] = result_on_validation_df['MAP'].values.tolist()

threshold = 0.23

hyperparameters_df[hyperparameters_df['MAP'] > threshold]
print('Max value for the range: ', max(hyperparameters_df[hyperparameters_df['MAP'] > threshold].num_factors))
print('Min value for the range: ', min(hyperparameters_df[hyperparameters_df['MAP'] > threshold].num_factors))
print('Best MAP: ', max(hyperparameters_df['MAP']))
hyperparameters_df[max(hyperparameters_df['MAP']) == hyperparameters_df['MAP']]

Unnamed: 0,num_factors,epochs,confidence_scaling,alpha,epsilon,reg,MAP
17,52,15,linear,1.334607,0.17159,0.000584,0.233916
25,61,45,log,4.43449,10.0,0.000609,0.230052
36,36,35,linear,0.394312,0.275704,0.000606,0.232647
37,57,30,log,0.802954,0.514866,0.000552,0.233861


Max value for the range:  61
Min value for the range:  36
Best MAP:  0.23391613682944476


Unnamed: 0,num_factors,epochs,confidence_scaling,alpha,epsilon,reg,MAP
17,52,15,linear,1.334607,0.17159,0.000584,0.233916


In [17]:
output_folder_path = "result_experiments/IALS_AUG_rangeImproved/" # range num_factors 20->100

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

n_cases = 20  # 50 with 30% random is a good number
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"   
cutoff_to_optimize = 10

In [18]:
from functools import partial
import os, multiprocessing

from HyperparameterTuning.run_hyperparameter_search import runHyperparameterSearch_Collaborative
from Recommenders.MatrixFactorization.IALSRecommender_implicit import IALSRecommender_implicit
from Recommenders.MatrixFactorization.IALSRecommender import IALSRecommender

runHyperparameterSearch_Collaborative(IALSRecommender,
                                      URM_train = URM_aug_train,
                                      URM_train_last_test = None,
                                      metric_to_optimize = metric_to_optimize,
                                      cutoff_to_optimize = cutoff_to_optimize,
                                      n_cases = n_cases,
                                      n_random_starts = n_random_starts,
                                      evaluator_validation_earlystopping = evaluator_validation,
                                      evaluator_validation = evaluator_validation,
                                      evaluator_test = None,
                                      output_folder_path = output_folder_path,
                                      resume_from_saved = True,
                                      similarity_type_list = None,
                                      parallelizeKNN = True)

SearchBayesianSkopt: Resuming 'IALSRecommender' Failed, no such file exists.

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'num_factors': 83, 'epochs': 50, 'confidence_scaling': 'linear', 'alpha': 0.006717929382722986, 'epsilon': 0.02924040065342526, 'reg': 0.0013821558766662337}
IALSRecommender: Epoch 1 of 50. Elapsed time 21.87 sec
IALSRecommender: Epoch 2 of 50. Elapsed time 41.37 sec
IALSRecommender: Epoch 3 of 50. Elapsed time 58.32 sec
IALSRecommender: Epoch 4 of 50. Elapsed time 1.26 min
IALSRecommender: Validation begins...
EvaluatorHoldout: Processed 13646 (100.0%) in 12.91 sec. Users per second: 1057
IALSRecommender: CUTOFF: 10 - PRECISION: 0.3439763, PRECISION_RECALL_MIN_DEN: 0.3450910, RECALL: 0.0593893, MAP: 0.2020635, MAP_MIN_DEN: 0.2025931, MRR: 0.5944092, NDCG: 0.3575433, F1: 0.1012904, HIT_RATE: 0.9529532, ARHR_ALL_HITS: 1.0798703, NOVELTY: 0.0056634, AVERAGE_POPULARITY: 0.4445944, DIVERSITY_MEAN_INTER_LIST: 0.9644

In [7]:
output_folder_path = "result_experiments/IALS_AUG_rangeImproved/"

data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(IALSRecommender.RECOMMENDER_NAME + "_metadata.zip")

#search_metadata.keys()

hyperparameters_df = search_metadata["hyperparameters_df"]
result_on_validation_df = search_metadata["result_on_validation_df"]

hyperparameters_df['MAP'] = result_on_validation_df['MAP'].values.tolist()

threshold = 0.23

hyperparameters_df[hyperparameters_df['MAP'] > threshold]
print('Max value for the range: ', max(hyperparameters_df[hyperparameters_df['MAP'] > threshold].num_factors))
print('Min value for the range: ', min(hyperparameters_df[hyperparameters_df['MAP'] > threshold].num_factors))
print('Best MAP: ', max(hyperparameters_df['MAP']))
hyperparameters_df[max(hyperparameters_df['MAP']) == hyperparameters_df['MAP']]

Unnamed: 0,num_factors,epochs,confidence_scaling,alpha,epsilon,reg,MAP
12,20,25,linear,0.001,5.532497,0.002147,0.23045
14,32,35,linear,0.010477,0.373426,0.005897,0.231824
15,37,45,log,0.001946,0.001079,0.000179,0.232057
17,20,10,log,0.009769,0.001,1e-05,0.230765
18,44,45,linear,0.0719,0.001554,0.005092,0.230649
19,48,35,log,0.010645,0.001,1e-05,0.230431


Max value for the range:  48
Min value for the range:  20
Best MAP:  0.23205652450046393


Unnamed: 0,num_factors,epochs,confidence_scaling,alpha,epsilon,reg,MAP
15,37,45,log,0.001946,0.001079,0.000179,0.232057


In [15]:
earlystopping_keywargs = {"validation_every_n": 5,
                          "stop_on_validation": True,
                          "evaluator_object": evaluator_validation,
                          "lower_validations_allowed": 10,
                          "validation_metric": metric_to_optimize,
                         }

recommender_obj = IALSRecommender(URM_train)
recommender_obj.fit(num_factors = 52, epochs = 300, confidence_scaling = 'linear', alpha = 1.3346069348060516, epsilon = 0.17158990394166584, reg = 0.0005841170332713697, **earlystopping_keywargs)

IALSRecommender: Epoch 1 of 300. Elapsed time 7.72 sec
IALSRecommender: Epoch 2 of 300. Elapsed time 15.29 sec
IALSRecommender: Epoch 3 of 300. Elapsed time 24.37 sec
IALSRecommender: Epoch 4 of 300. Elapsed time 33.70 sec
IALSRecommender: Validation begins...
EvaluatorHoldout: Processed 13646 (100.0%) in 11.85 sec. Users per second: 1152
IALSRecommender: CUTOFF: 10 - PRECISION: 0.3791807, PRECISION_RECALL_MIN_DEN: 0.3807608, RECALL: 0.0678468, MAP: 0.2298383, MAP_MIN_DEN: 0.2305708, MRR: 0.6312621, NDCG: 0.3933336, F1: 0.1150989, HIT_RATE: 0.9690752, ARHR_ALL_HITS: 1.1847845, NOVELTY: 0.0056274, AVERAGE_POPULARITY: 0.4824054, DIVERSITY_MEAN_INTER_LIST: 0.9510018, DIVERSITY_HERFINDAHL: 0.9950932, COVERAGE_ITEM: 0.0718201, COVERAGE_ITEM_CORRECT: 0.0595825, COVERAGE_USER: 0.9997070, COVERAGE_USER_CORRECT: 0.9687912, DIVERSITY_GINI: 0.0160187, SHANNON_ENTROPY: 8.5223161, RATIO_DIVERSITY_HERFINDAHL: 0.9954773, RATIO_DIVERSITY_GINI: 0.0646122, RATIO_SHANNON_ENTROPY: 0.6878396, RATIO_AVERAGE

### Prova con URM_aug e vedi se fa meglio
### Ultimo allenamento con validation 1 a 1 e pazienza di 50

In [20]:
earlystopping_keywargs = {"validation_every_n": 1,
                          "stop_on_validation": True,
                          "evaluator_object": evaluator_validation,
                          "lower_validations_allowed": 50,
                          "validation_metric": metric_to_optimize,
                         }

recommender_obj = IALSRecommender(URM_aug_train)
recommender_obj.fit(num_factors = 52, epochs = 300, confidence_scaling = 'linear', alpha = 1.3346069348060516, epsilon = 0.17158990394166584, reg = 0.0005841170332713697, **earlystopping_keywargs)

IALSRecommender: Validation begins...
EvaluatorHoldout: Processed 13646 (100.0%) in 12.58 sec. Users per second: 1085
IALSRecommender: CUTOFF: 10 - PRECISION: 0.2463139, PRECISION_RECALL_MIN_DEN: 0.2467125, RECALL: 0.0380161, MAP: 0.1339501, MAP_MIN_DEN: 0.1341011, MRR: 0.4762564, NDCG: 0.2581426, F1: 0.0658664, HIT_RATE: 0.8560750, ARHR_ALL_HITS: 0.7869499, NOVELTY: 0.0054478, AVERAGE_POPULARITY: 0.5542007, DIVERSITY_MEAN_INTER_LIST: 0.9267974, DIVERSITY_HERFINDAHL: 0.9926730, COVERAGE_ITEM: 0.0511102, COVERAGE_ITEM_CORRECT: 0.0303450, COVERAGE_USER: 0.9997070, COVERAGE_USER_CORRECT: 0.8558242, DIVERSITY_GINI: 0.0090816, SHANNON_ENTROPY: 7.7505407, RATIO_DIVERSITY_HERFINDAHL: 0.9930498, RATIO_DIVERSITY_GINI: 0.0356069, RATIO_SHANNON_ENTROPY: 0.6238299, RATIO_AVERAGE_POPULARITY: 2.7791856, RATIO_NOVELTY: 0.0259112, 

IALSRecommender: New best model found! Updating.
IALSRecommender: Epoch 1 of 300. Elapsed time 22.43 sec
IALSRecommender: Validation begins...
EvaluatorHoldout: Processed 