In [1]:
import numpy
print(numpy.__version__)

1.24.3


In [2]:
import time
import pandas as pd
import numpy as np
import scipy.sparse as sps
import random as rnd

from scipy.sparse import *

In [8]:
urm_path = './content/data_train.csv'
urm_all_df = pd.read_csv(filepath_or_buffer=urm_path,
                                sep=",",
                                header=0,
                                dtype={0:int, 1:int, 2:float},
                                engine='python')

urm_all_df.columns = ["UserID", "ItemID", "Interaction"]
print ("The number of interactions is {}".format(len(urm_all_df)))


The number of interactions is 478730


In [9]:
userID_unique = urm_all_df["UserID"].unique()
itemID_unique = urm_all_df["ItemID"].unique()

n_users = len(userID_unique)
n_items = len(itemID_unique)
n_interactions = len(urm_all_df)

In [10]:
urm_all = sps.coo_matrix((urm_all_df["Interaction"].values,
                          (urm_all_df["UserID"].values, urm_all_df["ItemID"].values)))

urm_all

<13025x22348 sparse matrix of type '<class 'numpy.float64'>'
	with 478730 stored elements in COOrdinate format>

In [11]:
URM_all = urm_all

### Step 1: Split the data and create the evaluator objects

In [4]:
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

In [12]:
from Evaluation.Evaluator import EvaluatorHoldout

URM_train_validation, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.8)
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train_validation, train_percentage = 0.8)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 2974 (22.8%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 2625 (20.2%) Users that have less than 1 test interactions


### Step 2: Define hyperparameter set for the desired model, in this case rp3beta

In [13]:
from skopt.space import Real, Integer, Categorical

hyperparameters_range_dictionary = {
    "topK": Integer(0, 500),
    "l2_norm": Real(0.00001, 0.9999)
}

### Step 3: Create SearchBayesianSkopt object, providing the desired recommender class and evaluator objects

In [14]:
from Recommenders.EASE_R.EASE_R_Recommender import EASE_R_Recommender
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt

recommender_class = EASE_R_Recommender

hyperparameterSearch = SearchBayesianSkopt(recommender_class,
                                         evaluator_validation=evaluator_validation,
                                         evaluator_test=evaluator_test)

Tensorflow is not available


### Step 4: Provide the data needed to create an instance of the model, one trained only on URM_train, the other on URM_train_validation

In [15]:
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs
  
recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_train],     # For a CBF model simply put [URM_train, ICM_train]
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {},
    EARLYSTOPPING_KEYWORD_ARGS = {},
)

In [16]:
recommender_input_args_last_test = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_train_validation],     # For a CBF model simply put [URM_train_validation, ICM_train]
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {},
    EARLYSTOPPING_KEYWORD_ARGS = {},
)

### Step 5: Create a result folder and select the number of cases (50 with 30% random is a good number)

In [18]:
import os

output_folder_path = "result_experiments/easer"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)
    
n_cases = 10  # using 10 as an example
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"   
cutoff_to_optimize = 10

In [21]:
!scikit-optimize --version

'scikit-optimize' is not recognized as an internal or external command,
operable program or batch file.


In [22]:
!numpy --version

'numpy' is not recognized as an internal or external command,
operable program or batch file.


### Step 5: Run!

In [19]:
hyperparameterSearch.search(recommender_input_args,
                       recommender_input_args_last_test = recommender_input_args_last_test,
                       hyperparameter_search_space = hyperparameters_range_dictionary,
                       n_cases = n_cases,
                       n_random_starts = n_random_starts,
                       save_model = "last",
                       output_folder_path = output_folder_path, # Where to save the results
                       output_file_name_root = recommender_class.RECOMMENDER_NAME, # How to call the files
                       metric_to_optimize = metric_to_optimize,
                       cutoff_to_optimize = cutoff_to_optimize,
                      )

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'topK': 172, 'l2_norm': 0.5920600794727431}
EASE_R_Recommender: URM Detected 836 ( 6.4%) users with no interactions.
EASE_R_Recommender: URM Detected 441 ( 2.0%) items with no interactions.
EASE_R_Recommender: Fitting model... 
SearchBayesianSkopt: Search for 'EASE_R_Recommender' interrupted due to MemoryError.
SearchBayesianSkopt: Search interrupted due to ValueError. The evaluated configurations may have had all the same value.



Traceback (most recent call last):
  File "d:\Programmazione\RecSys\Challenge\RecSys_Course_AT_PoliMi\HyperparameterTuning\SearchBayesianSkopt.py", line 340, in search
    self.result = gp_minimize(self._objective_function_list_input,
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Anaconda\Lib\site-packages\skopt\optimizer\gp.py", line 259, in gp_minimize
    return base_minimize(
           ^^^^^^^^^^^^^^
  File "d:\Anaconda\Lib\site-packages\skopt\optimizer\base.py", line 300, in base_minimize
    result = optimizer.tell(next_x, next_y)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Anaconda\Lib\site-packages\skopt\optimizer\optimizer.py", line 483, in tell
    self._check_y_is_valid(x, y)
  File "d:\Anaconda\Lib\site-packages\skopt\optimizer\optimizer.py", line 636, in _check_y_is_valid
    raise ValueError("`func` should return a scalar")
ValueError: `func` should return a scalar


### Check the best model

In [None]:
from Recommenders.DataIO import DataIO

data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(recommender_class.RECOMMENDER_NAME + "_metadata.zip")

search_metadata.keys()

In [None]:
hyperparameters_df = search_metadata["hyperparameters_df"]
hyperparameters_df

In [None]:
result_on_validation_df = search_metadata["result_on_validation_df"]
result_on_validation_df

In [None]:
result_best_on_test = search_metadata["result_on_last"]
result_best_on_test

In [None]:
best_hyperparameters = search_metadata["hyperparameters_best"]
best_hyperparameters

In [None]:
time_df = search_metadata["time_df"]
time_df

In [None]:
exception_list = search_metadata["exception_list"]
exception_list