In [1]:

import os 
import sys
while os.path.split(os.getcwd())[1] != 'RecSysChallenge2023-Team':
    os.chdir('..')
sys.path.insert(1, os.getcwd())

################################# IMPORT HyperTuning  #################################
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs
from skopt.space import Real, Integer, Categorical

################################# IMPORT RECOMMENDERS #################################
#from Recommenders.SLIM.SLIMElasticNetRecommender import SLIMElasticNetRecommender
from Recommenders.SLIM.SLIMElasticNetRecommender import MultiThreadSLIM_SLIMElasticNetRecommender
from Recommenders.BaseRecommender import BaseRecommender
from Recommenders.IR_feature_weighting import okapi_BM_25

################################## IMPORT LIBRARIES ##################################
import scipy.sparse as sps
from tqdm import tqdm
import pandas as pd
import numpy as np
import similaripy
import math 
import os

import Daniele.Utils.MyDataManager as dm
import Daniele.Utils.MatrixManipulation as mm
import Daniele.Utils.SaveSparceMatrix as ssm


from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Evaluation.Evaluator import EvaluatorHoldout
from sklearn.preprocessing import normalize

import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

URMv = dm.getURMviews()
URMo = dm.getURMopen()
ICMt = dm.getICMt()
ICMl = dm.getICMl()

path_save= "Daniele/Recommenders/SSLIM_Elastic/saved_models"
if not os.path.exists(path_save):
    os.makedirs(path_save)

name="train.csv"
dir = os.path.join(path_save,name)
if not os.path.exists(dir):
    URMv_train_val, URMv_test = split_train_in_two_percentage_global_sample(URMv, train_percentage = 0.80)
    URMv_train, URMv_validation = split_train_in_two_percentage_global_sample(URMv_train_val, train_percentage = 0.80)

    ssm.saveMatrix(dir,URMv_train)

    name="URMv_validation.csv"
    dir = os.path.join(path_save,name)
    ssm.saveMatrix(dir,URMv_validation)

    name="test.csv"
    dir = os.path.join(path_save,name)
    ssm.saveMatrix(dir,URMv_test)

    urm_def_nn = mm.defaultExplicitURM(urmv=URMv_train,urmo=URMo,icml=ICMl,icmt=ICMt, add_aug=True,appendICM=False,normalize=False)
    name="urm_def_nn.csv"
    dir = os.path.join(path_save,name)
    ssm.saveMatrix(dir,urm_def_nn)

    urm_def_val_nn = mm.defaultExplicitURM(urmv=URMv_train_val,urmo=URMo,icml=ICMl,icmt=ICMt, add_aug=True,appendICM=False,normalize=False)
    name="urm_def_val_nn.csv"
    dir = os.path.join(path_save,name)
    ssm.saveMatrix(dir,urm_def_val_nn)

else:
    URMv_train=ssm.readMatrix(dir)

    name="test.csv"
    dir = os.path.join(path_save,name)
    URMv_test=ssm.readMatrix(dir)

    name="URMv_validation.csv"
    dir = os.path.join(path_save,name)
    URMv_validation=ssm.readMatrix(dir)

    name="urm_def_nn.csv"
    dir = os.path.join(path_save,name)
    urm_def_nn = ssm.readMatrix(dir)

    name="urm_def_val_nn.csv"
    dir = os.path.join(path_save,name)
    urm_def_val_nn = ssm.readMatrix(dir)




"""
################################### USER GROUP ######################################
profile_length = np.ediff1d(sps.csr_matrix(URMv_train).indptr)
sorted_users = np.argsort(profile_length)
users_in_group = sorted_users[0:int(10*(URMv_train.shape[0]/20))]
users_not_in_group_flag = np.isin(sorted_users, users_in_group, invert=True)
users_not_in_group = sorted_users[users_not_in_group_flag]
"""


evaluator_validation = EvaluatorHoldout(URMv_validation, [10]) # ignore_users=users_not_in_group
evaluator_test = EvaluatorHoldout(URMv_test, [10]) #ignore_users=users_not_in_group

metric_to_optimize = "MAP_MIN_DEN" 

recommender_class = MultiThreadSLIM_SLIMElasticNetRecommender

hyperparameterSearch = SearchBayesianSkopt(recommender_class,
                                         evaluator_validation=evaluator_validation,
                                         evaluator_test=evaluator_test)


recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [urm_def_nn],
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {},
    EARLYSTOPPING_KEYWORD_ARGS = {},
)

recommender_input_args_last_test = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [urm_def_val_nn],
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {},
    EARLYSTOPPING_KEYWORD_ARGS = {},
)

hyperparameters_range_dictionary = {
                "topK": Integer(5, 750),
                "l1_ratio": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'),
                "alpha": Real(low = 1, high = 4.0, prior = 'uniform'),
                "workers":Categorical([2]),
            }


output_folder_path = "Daniele/Recommenders/SSLIM_Elastic/result_experiments/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)
    
#n_cases = 200  # using 10 as an example
n_cases = 1000
n_random_starts = int(n_cases*0.3)  
cutoff_to_optimize = 10

hyperparameterSearch.search(recommender_input_args,
                       recommender_input_args_last_test = recommender_input_args_last_test,
                       hyperparameter_search_space = hyperparameters_range_dictionary,
                       n_cases = n_cases,
                       n_random_starts = n_random_starts,
                       save_model = "no",
                       output_folder_path = output_folder_path, # Where to save the results
                       output_file_name_root = "explicit_nn_matrix", # How to call the files
                       metric_to_optimize = metric_to_optimize,
                       cutoff_to_optimize = cutoff_to_optimize,
                       resume_from_saved=False
                      )

/Users/daniele/Desktop/RecSys/RecSysChallenge2023-Team
1 Physical GPUs, 1 Logical GPUs
Metal device set to: Apple M1

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



2022-12-29 11:01:30.448114: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-12-29 11:01:30.448263: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


EvaluatorHoldout: Ignoring 2706 ( 6.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 1548 ( 3.7%) Users that have less than 1 test interactions
Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'topK': 72, 'l1_ratio': 0.009779101088427757, 'alpha': 2.603816404657903, 'workers': 2}
SLIMElasticNetRecommender: URM Detected 3461 (12.4%) items with no interactions.


100%|██████████| 27968/27968 [22:27<00:00, 20.75it/s] 


EvaluatorHoldout: Processed 38923 (100.0%) in 8.52 sec. Users per second: 4568
SearchBayesianSkopt: New best config found. Config 0: {'topK': 72, 'l1_ratio': 0.009779101088427757, 'alpha': 2.603816404657903, 'workers': 2} - results: PRECISION: 0.0047684, PRECISION_RECALL_MIN_DEN: 0.0098381, RECALL: 0.0095362, MAP: 0.0029707, MAP_MIN_DEN: 0.0060991, MRR: 0.0272829, NDCG: 0.0114026, F1: 0.0063577, HIT_RATE: 0.0438044, ARHR_ALL_HITS: 0.0284738, NOVELTY: 0.0050940, AVERAGE_POPULARITY: 0.1009872, DIVERSITY_MEAN_INTER_LIST: 0.4836400, DIVERSITY_HERFINDAHL: 0.9483628, COVERAGE_ITEM: 0.0037185, COVERAGE_ITEM_HIT: 0.0008224, ITEMS_IN_GT: 0.8417120, COVERAGE_USER: 0.9349972, COVERAGE_USER_HIT: 0.0409570, USERS_IN_GT: 0.9349972, DIVERSITY_GINI: 0.0006735, SHANNON_ENTROPY: 4.5556004, RATIO_DIVERSITY_HERFINDAHL: 0.9487003, RATIO_DIVERSITY_GINI: 0.0015676, RATIO_SHANNON_ENTROPY: 0.3393344, RATIO_AVERAGE_POPULARITY: 1.9219525, RATIO_NOVELTY: 0.3915411, 



  return np.sum(np.divide(np.power(2, scores) - 1, np.log2(np.arange(scores.shape[0], dtype=np.float64) + 2)),


EvaluatorHoldout: Processed 40081 (100.0%) in 8.73 sec. Users per second: 4589
SearchBayesianSkopt: Config evaluated with evaluator_test. Config: {'topK': 72, 'l1_ratio': 0.009779101088427757, 'alpha': 2.603816404657903, 'workers': 2} - results:
CUTOFF: 10 - PRECISION: 0.0058507, PRECISION_RECALL_MIN_DEN: 0.0099325, RECALL: 0.0093412, MAP: 0.0036036, MAP_MIN_DEN: 0.0060644, MRR: 0.0327882, NDCG: 0.0133257, F1: 0.0071949, HIT_RATE: 0.0532671, ARHR_ALL_HITS: 0.0343742, NOVELTY: 0.0050989, AVERAGE_POPULARITY: 0.1000198, DIVERSITY_MEAN_INTER_LIST: 0.4821179, DIVERSITY_HERFINDAHL: 0.9482106, COVERAGE_ITEM: 0.0037185, COVERAGE_ITEM_HIT: 0.0009654, ITEMS_IN_GT: 0.8586241, COVERAGE_USER: 0.9628144, COVERAGE_USER_HIT: 0.0512864, USERS_IN_GT: 0.9628144, DIVERSITY_GINI: 0.0006716, SHANNON_ENTROPY: 4.5515149, RATIO_DIVERSITY_HERFINDAHL: 0.9485481, RATIO_DIVERSITY_GINI: 0.0015633, RATIO_SHANNON_ENTROPY: 0.3390301, RATIO_AVERAGE_POPULARITY: 1.9035424, RATIO_NOVELTY: 0.3919193, 


Iteration No: 1 end

 92%|█████████▏| 25832/27968 [12:54<07:46,  4.58it/s] 

KeyboardInterrupt: 

 92%|█████████▏| 25856/27968 [13:12<07:40,  4.58it/s]