In [1]:
import os
os.chdir( "../")

In [2]:
import pandas as pd
import numpy as np

path = "Dataset/data_train.csv"
df = pd.read_csv(filepath_or_buffer=path,
                               sep=",",
                               header=1,
                               engine='python',
                               names=['UserID', 'ItemID', 'Interaction'])


df

Unnamed: 0,UserID,ItemID,Interaction
0,1,15,1.0
1,1,16,1.0
2,1,133,1.0
3,1,161,1.0
4,1,187,1.0
...,...,...,...
478724,13024,13605,1.0
478725,13024,13823,1.0
478726,13024,15122,1.0
478727,13024,18185,1.0


In [3]:
user_ids = df["UserID"].unique().tolist()
user2user_encoded = {x: i for i, x in enumerate(user_ids)}
userencoded2user = {i: x for i, x in enumerate(user_ids)}
item_ids = df["ItemID"].unique().tolist()
item2item_encoded = {x: i for i, x in enumerate(item_ids)}
item_encoded2item = {i: x for i, x in enumerate(item_ids)}
df["User"] = df["UserID"].map(user2user_encoded)
df["Item"] = df["ItemID"].map(item2item_encoded)

num_users = len(user2user_encoded)
num_items = len(item_encoded2item)
df["Interaction"] = df["Interaction"].values.astype(np.float32)

# min and max ratings will be used to normalize the ratings later
min_rating = 0.0
max_rating = max(df["Interaction"])

print(
    "Number of users: {}, Number of Items: {}, Min rating: {}, Max rating: {}".format(
        num_users, num_items, min_rating, max_rating
    )
)

Number of users: 12638, Number of Items: 22222, Min rating: 0.0, Max rating: 1.0


In [4]:
userId_unique = df["UserID"].unique()
itemId_unique = df["ItemID"].unique()

In [5]:
from sklearn.model_selection import train_test_split
import numpy as np
import scipy.sparse as sps
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample



urm_all = sps.coo_matrix((df["Interaction"].values, 
                          (df["User"].values, df["Item"].values)))

urm_train_validation, urm_test = split_train_in_two_percentage_global_sample(urm_all, train_percentage = 0.80,seed=55)
urm_train, urm_validation = split_train_in_two_percentage_global_sample(urm_train_validation, train_percentage = 0.80,seed=55)



In [6]:
num_users = len(userId_unique)
num_items = len(itemId_unique)

In [7]:
from Recommenders.Recommender_import_list import *
from Evaluation.Evaluator import EvaluatorHoldout
evaluator_validation = EvaluatorHoldout(urm_validation, cutoff_list=[10], ignore_users=[])



EvaluatorHoldout: Ignoring 2608 (20.6%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 0 Users


In [8]:
from Evaluation.Evaluator import EvaluatorHoldout
evaluator_test = EvaluatorHoldout(urm_test, cutoff_list=[10], ignore_users=[])

EvaluatorHoldout: Ignoring 2144 (17.0%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 0 Users


In [4]:
import os

output_folder_path = "result_experiments_parallel/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)
    
n_cases = 200  # using 10 as an example
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"   
cutoff_to_optimize = 10

In [10]:
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt


In [11]:
earlystopping_keywargs = {"validation_every_n": 5,
                          "stop_on_validation": True,
                          "evaluator_object": evaluator_validation,
                          "lower_validations_allowed": 5,
                          "validation_metric": metric_to_optimize,
                          }

In [12]:
recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [urm_train],     # For a CBF model simply put [URM_train, ICM_train]
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {},
    EARLYSTOPPING_KEYWORD_ARGS = earlystopping_keywargs,     # Additional hyperparameters for the fit function
)

In [13]:
recommender_input_args_last_test = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [urm_train_validation],     # For a CBF model simply put [URM_train_validation, ICM_train]
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {},
    EARLYSTOPPING_KEYWORD_ARGS = earlystopping_keywargs,     # Additional hyperparameters for the fit function
)

In [14]:
#hyperparameterSearch = SearchBayesianSkopt(recommender_class,
#                                         evaluator_validation=evaluator_validation,
#                                         evaluator_test=evaluator_test)

In [15]:
similarity_type_list = ['cosine', 'jaccard', "asymmetric", "dice", "tversky"]

In [16]:
from HyperparameterTuning.run_hyperparameter_search import runHyperparameterSearch_Collaborative, runHyperparameterSearch_Content
from Recommenders.NonPersonalizedRecommender import TopPop, Random
from Recommenders.GraphBased.P3alphaRecommender import P3alphaRecommender
from Recommenders.SLIM.SLIMElasticNetRecommender import SLIMElasticNetRecommender
import os, multiprocessing
from functools import partial


runHyperparameterSearch_Collaborative_partial = partial(runHyperparameterSearch_Collaborative,
       URM_train = urm_train,
       URM_train_last_test = urm_train_validation,
       metric_to_optimize = metric_to_optimize,
       cutoff_to_optimize = cutoff_to_optimize,
       evaluator_validation_earlystopping = evaluator_validation,
       evaluator_validation = evaluator_validation,
       evaluator_test = evaluator_test,
       output_folder_path = output_folder_path,
       parallelizeKNN = True,
       allow_weighting = True,
       resume_from_saved = True,
       save_model = "best",
       similarity_type_list = ['cosine', 'jaccard', "asymmetric", "dice", "tversky"],
       n_cases = n_cases,
       n_random_starts = n_random_starts)


In [17]:
SLIMEN = {'topK': 530, 'l1_ratio': 0.05017569359096808, 'alpha': 0.001}
P3ALPHA = {'topK': 400, 'alpha': 1.6632815179401539, 'normalize_similarity': True}
RP3 = {'topK': 71, 'alpha': 0.31274648571776065, 'beta': 0.3586324430664178, 'normalize_similarity': True}

In [18]:
#runHyperparameterSearch_Collaborative(recommender_class=RP3betaRecommender,URM_train = urm_train,
#       URM_train_last_test = urm_train_validation,
#       metric_to_optimize = metric_to_optimize,
#       cutoff_to_optimize = cutoff_to_optimize,
#       evaluator_validation_earlystopping = evaluator_validation,
#       evaluator_validation = evaluator_validation,
#       output_folder_path = output_folder_path,
#       evaluator_test = evaluator_test,
#       allow_weighting = True,
#       parallelizeKNN = True,
#       resume_from_saved = True,
#       save_model = "best",
#       similarity_type_list = ['cosine', 'jaccard', "asymmetric", "dice", "tversky"],
#       n_cases = n_cases,
#       n_random_starts = n_random_starts)

In [19]:
collaborative_algorithm_list = [
    SLIMElasticNetRecommender,
    P3alphaRecommender,
    RP3betaRecommender
]

In [20]:
#pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1)
#pool.map(runHyperparameterSearch_Collaborative_partial, collaborative_algorithm_list)

In [21]:
from skopt.space import Real, Integer, Categorical


hyperparameters_range_dictionary = {
    "topK": Integer(5, 300),
    "alpha": Real(low = 0.2651626829923486, high = 0.37644615066224263, prior = 'uniform'),
    "beta": Real(low = 0, high = 1.35
, prior = 'uniform'),
    "normalize_similarity": Categorical([True, False]),
}

In [22]:
recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS = [urm_train],
                CONSTRUCTOR_KEYWORD_ARGS = {},
                FIT_POSITIONAL_ARGS = [],
                FIT_KEYWORD_ARGS = {},
                EARLYSTOPPING_KEYWORD_ARGS = {},
            )

In [23]:
hyperparameterSearch = SearchBayesianSkopt(RP3betaRecommender,
                                         evaluator_validation=evaluator_validation,
                                         evaluator_test=evaluator_test)

In [24]:
hyperparameterSearch.search(recommender_input_args,
                               hyperparameter_search_space= hyperparameters_range_dictionary,
                               n_cases = 500,
                               n_random_starts = n_random_starts,
                               resume_from_saved = True,
                               save_model = "best",
                               evaluate_on_test = "best",
                               max_total_time = None,
                               output_folder_path = output_folder_path,
                               output_file_name_root = RP3betaRecommender.RECOMMENDER_NAME+ "_last_try",
                               metric_to_optimize = metric_to_optimize,
                               cutoff_to_optimize = cutoff_to_optimize,
                               recommender_input_args_last_test = recommender_input_args_last_test)

SearchBayesianSkopt: Resuming 'RP3betaRecommender_last_try'... Loaded 6 configurations.
Iteration No: 1 started. Evaluating function at random point.
Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.0002
Function value obtained: -0.0005
Current minimum: -0.0291
Iteration No: 2 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'topK': 201, 'alpha': 0.30334227053358803, 'beta': 0.40658420763691366, 'normalize_similarity': False}
RP3betaRecommender: URM Detected 470 ( 3.7%) users with no interactions.
RP3betaRecommender: URM Detected 329 ( 1.5%) items with no interactions.
RP3betaRecommender: Similarity column 22222 (100.0%), 2953.49 column/sec. Elapsed time 7.52 sec
EvaluatorHoldout: Processed 10030 (100.0%) in 6.70 sec. Users per second: 1497
SearchBayesianSkopt: Config 6 is suboptimal. Config: {'topK': 201, 'alpha': 0.30334227053358803, 'beta': 0.40658420763691366, 'normalize_similarity': False} - results: PRECISION: 0.0549053, PRE

In [25]:
from Recommenders.DataIO import DataIO


data_loader = DataIO(folder_path = output_folder_path)

search_metadata = data_loader.load_data(RP3betaRecommender.RECOMMENDER_NAME + "_lower_topK" + "_metadata.zip")

In [6]:
from Recommenders.DataIO import DataIO
from Recommenders.SLIM.SLIMElasticNetRecommender import SLIMElasticNetRecommender


data_loader = DataIO(folder_path = output_folder_path)

search_metadata = data_loader.load_data(SLIMElasticNetRecommender.RECOMMENDER_NAME + "_metadata.zip")

In [7]:


df = search_metadata["hyperparameters_df"]
df["result"] = pd.DataFrame(search_metadata["result_on_validation_df"]["MAP"].to_numpy(),columns=["MAP"])["MAP"]
df

Unnamed: 0,topK,l1_ratio,alpha,result
0,977,0.000019,0.564221,0.017875
1,127,0.000024,0.422338,0.021034
2,452,0.01146,0.969692,0.007588
3,730,0.003337,0.366839,0.017683
4,859,0.001629,0.345141,0.018318
...,...,...,...,...
195,392,0.011957,0.001,0.029264
196,5,0.000142,0.001,0.025104
197,355,0.00001,0.142083,0.023117
198,491,0.000111,0.001,0.02849


In [10]:
df[df["result"] == df["result"].max()]

Unnamed: 0,topK,l1_ratio,alpha,result
124,530,0.050176,0.001,0.030464


In [27]:
_feature_importance(df.drop(columns=["beta"]))

NameError: name '_feature_importance' is not defined