In [1]:
import os
from typing import Tuple, Callable, Dict, Optional, List

import numpy as np
import pandas as pd
import scipy.sparse as sp
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Recommenders.SLIM.Cython.SLIM_BPR_Cython import SLIM_BPR_Cython
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs
from Recommenders.DataIO import DataIO
from skopt.space import Real, Integer, Categorical
from Recommenders.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender
from Recommenders.SLIM.Cython.SLIM_BPR_Cython import SLIM_BPR_Cython
from Recommenders.SLIM.SLIMElasticNetRecommender import SLIMElasticNetRecommender,MultiThreadSLIM_SLIMElasticNetRecommender



columns_name = ['user_id','item_id','impression_list','data']

dataset_l = pd.read_csv('/Users/loren/Documents/GitHub/RecSys/dati/data_ICM_length.csv', sep=',')
dataset_t = pd.read_csv('/Users/loren/Documents/GitHub/RecSys/dati/data_ICM_type.csv', sep=',')
dataset = pd.read_csv('/Users/loren/Documents/GitHub/RecSys/dati/interactions_and_impressions.csv', sep=',')
test_users = pd.read_csv('/Users/loren/Documents/GitHub/RecSys/dati/data_target_users_test.csv',sep = ',')
dataset.columns = columns_name

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [2]:
len(dataset.item_id.unique()),dataset.item_id.unique().min(),dataset.item_id.unique().max()

(24507, 0, 24506)

In [3]:
from Data_manager.Build_Matrix.Data_import import build_URM_ICM,build_URM_impression,build_URM_ICM_onlyURM_item
URM,ICM=build_URM_ICM_onlyURM_item(dataset, dataset_t, dataset_l)

In [4]:
URM_train_validation, URM_test = split_train_in_two_percentage_global_sample(URM, train_percentage = 0.80)
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train_validation, train_percentage = 0.80)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 771 ( 1.9%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 316 ( 0.8%) Users that have less than 1 test interactions


In [5]:
import os

output_folder_path = "result_experiments_SLIMElasticNetRecommender_2"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

n_cases = 10
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"   
cutoff_to_optimize = 10

hyperparameters_range_dictionary = {
                "topK": Integer(500, 2000),
                "l1_ratio": Real(low = 1e-4, high = 0.01, prior = 'log-uniform'),
                "alpha": Real(low = 1e-4, high = 0.01, prior = 'uniform'),
            }

earlystopping_keywargs = {"validation_every_n": 5,
                              "stop_on_validation": True,
                              "evaluator_object": URM_validation,
                              "lower_validations_allowed": 5,
                              "validation_metric": metric_to_optimize,
                              }
recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS = [URM_train],
                CONSTRUCTOR_KEYWORD_ARGS = {},
                FIT_POSITIONAL_ARGS = [],
                FIT_KEYWORD_ARGS = {},
                EARLYSTOPPING_KEYWORD_ARGS = {},
            )

recommender_class = SLIMElasticNetRecommender


hyperparameterSearch = SearchBayesianSkopt(recommender_class,
                                        evaluator_validation=evaluator_validation,
                                        evaluator_test=evaluator_test)

In [6]:
hyperparameterSearch.search(recommender_input_args,
                    hyperparameter_search_space = hyperparameters_range_dictionary,
                    n_cases = n_cases,
                    n_random_starts = n_random_starts,
                    save_model = "last",
                    output_folder_path = output_folder_path, # Where to save the results
                    output_file_name_root = recommender_class.RECOMMENDER_NAME, # How to call the files
                    metric_to_optimize = metric_to_optimize,
                    cutoff_to_optimize = cutoff_to_optimize,
                    resume_from_saved = True,
                    recommender_input_args_last_test=recommender_input_args
                    )

SearchBayesianSkopt: Resuming 'SLIMElasticNetRecommender'... Loaded 8 configurations.
Iteration No: 1 started. Evaluating function at random point.
Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.6260
Function value obtained: -0.0223
Current minimum: -0.0223
Iteration No: 2 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'topK': 1592, 'l1_ratio': 0.00010331259115778786, 'alpha': 0.0015810250823676266}
SLIMElasticNetRecommender: Processed 1701 ( 6.9%) in 5.00 min. Items per second: 5.67
SLIMElasticNetRecommender: Processed 3853 (15.7%) in 10.00 min. Items per second: 6.42
SLIMElasticNetRecommender: Processed 6355 (25.9%) in 15.00 min. Items per second: 7.06
SLIMElasticNetRecommender: Processed 8850 (36.1%) in 49.66 min. Items per second: 2.97
SLIMElasticNetRecommender: Processed 9736 (39.7%) in 54.66 min. Items per second: 2.97
SLIMElasticNetRecommender: Processed 12116 (49.4%) in 59.67 min. Items per second: 3.38
SLIMElasticNetR

In [8]:
recommender_class = SLIMElasticNetRecommender(URM_train)
output_folder_path = "/Users/loren/Documents/GitHub/RecSys/result_experiments_SLIMElasticNetRecommender_2/"
recommender_class.load_model(output_folder_path,file_name = "SLIMElasticNetRecommender_best_model_last.zip" )

SLIMElasticNetRecommender: Loading model from file '/Users/loren/Documents/GitHub/RecSys/result_experiments_SLIMElasticNetRecommender_2/SLIMElasticNetRecommender_best_model_last.zip'
SLIMElasticNetRecommender: Loading complete


In [9]:
evaluator_validation.evaluateRecommender(recommender_class)

EvaluatorHoldout: Processed 41313 (100.0%) in 1.04 min. Users per second: 665


(       PRECISION PRECISION_RECALL_MIN_DEN    RECALL       MAP MAP_MIN_DEN  \
 cutoff                                                                      
 10      0.060272                 0.082384  0.071952  0.028806    0.039067   
 
              MRR      NDCG        F1  HIT_RATE ARHR_ALL_HITS  ...  \
 cutoff                                                        ...   
 10      0.186115  0.083852  0.065596  0.383439      0.229785  ...   
 
        COVERAGE_USER COVERAGE_USER_HIT USERS_IN_GT DIVERSITY_GINI  \
 cutoff                                                              
 10          0.992409          0.380528    0.992409       0.020074   
 
        SHANNON_ENTROPY RATIO_DIVERSITY_HERFINDAHL RATIO_DIVERSITY_GINI  \
 cutoff                                                                   
 10             9.00217                   0.995167             0.039842   
 
        RATIO_SHANNON_ENTROPY RATIO_AVERAGE_POPULARITY RATIO_NOVELTY  
 cutoff                                   

In [11]:
from Recommenders.DataIO import DataIO
from Recommenders.SLIM.SLIMElasticNetRecommender import SLIMElasticNetRecommender
import pandas as pd
recommender_class = SLIMElasticNetRecommender

output_folder_path = "result_experiments_SLIMElasticNetRecommender_2"
data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(recommender_class.RECOMMENDER_NAME + "_metadata.zip")
time_df = search_metadata["time_df"]

print('mean-time:')
print(time_df['train'].mean())


result_on_test_df = search_metadata["result_on_test_df"]
result_best_on_test = search_metadata["result_on_last"]
print('BEST_HYPERPARAMETERS: ')
print(search_metadata["hyperparameters_best"])
print(pd.DataFrame(result_on_test_df))
print(result_best_on_test)

mean-time:
2781.210033893585
BEST_HYPERPARAMETERS: 
{'topK': 760, 'l1_ratio': 0.0018938490805141548, 'alpha': 0.0027914541410741936}
         PRECISION PRECISION_RECALL_MIN_DEN    RECALL       MAP MAP_MIN_DEN  \
  cutoff                                                                      
0 10      0.059746                 0.081714  0.071245  0.028574    0.038615   
1 10           NaN                      NaN       NaN       NaN         NaN   
2 10      0.059768                 0.081999  0.071651  0.028639    0.038793   
3 10           NaN                      NaN       NaN       NaN         NaN   
4 10           NaN                      NaN       NaN       NaN         NaN   
5 10           NaN                      NaN       NaN       NaN         NaN   
6 10           NaN                      NaN       NaN       NaN         NaN   
7 10      0.059627                 0.081176  0.070624  0.028728    0.038658   
8 10           NaN                      NaN       NaN       NaN         NaN  