In [None]:
!cp -r ../input/recsys-repo/RecSys_Course_AT_PoliMi-master/* ./
%config Completer.use_jedi = False
import gc
import pandas as pd
import numpy as np
from scipy import sparse as sps
from Recommenders.EASE_R.EASE_R_Recommender import EASE_R_Recommender
from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Recommenders.BaseRecommender import BaseRecommender

In [None]:
#!python run_compile_all_cython.py

In [None]:
URM_all_path = "/kaggle/input/urm-true-binary/URM_True_Binary.csv"
URM_all_dataframe = pd.read_csv(filepath_or_buffer=URM_all_path, 
                                sep=",",
                                dtype={0:int, 1:int, 2:float},
                                header=0)
URM_all_dataframe.columns = ["UserID", "ItemID", "Data"]
URM_all = sps.coo_matrix((URM_all_dataframe["Data"].values, 
                          (URM_all_dataframe["UserID"].values, URM_all_dataframe["ItemID"].values)))
URM_all = URM_all.tocsr() # to obtain fast access to rows (users)

In [None]:
URM_all

In [None]:
URM_train_path = "/kaggle/input/urm-split/Train_df.csv"
URM_train_dataframe = pd.read_csv(filepath_or_buffer=URM_train_path, 
                                sep=",",
                                dtype={0:int, 1:int, 2:float},
                                header=0)
URM_train_dataframe.columns = ["UserID", "ItemID", "Data"]
URM_train = sps.coo_matrix((URM_train_dataframe["Data"].values, 
                          (URM_train_dataframe["UserID"].values, URM_train_dataframe["ItemID"].values)))
URM_train = URM_train.tocsr() # to obtain fast access to rows (users)

In [None]:
URM_train

In [None]:
URM_val_path = "/kaggle/input/urm-split/Test_df.csv"
URM_val_dataframe = pd.read_csv(filepath_or_buffer=URM_val_path, 
                                sep=",",
                                dtype={0:int, 1:int, 2:float},
                                header=0)
URM_val_dataframe.columns = ["UserID", "ItemID", "Data"]
URM_val = sps.coo_matrix((URM_val_dataframe["Data"].values, 
                          (URM_val_dataframe["UserID"].values, URM_val_dataframe["ItemID"].values)))
URM_val = URM_val.tocsr() # to obtain fast access to rows (users)

In [None]:
URM_val

In [None]:
evaluator_valid = EvaluatorHoldout(URM_val, cutoff_list=[10])

In [None]:
from Recommenders.BaseSimilarityMatrixRecommender import BaseItemSimilarityMatrixRecommender
from Recommenders.Recommender_utils import similarityMatrixTopK, check_matrix
from Utils.seconds_to_biggest_unit import seconds_to_biggest_unit
from sklearn.preprocessing import normalize
import numpy as np
import time
import scipy.sparse as sps

class EASE_R_Recommender(BaseItemSimilarityMatrixRecommender):
    """ EASE_R_Recommender

        https://arxiv.org/pdf/1905.03375.pdf

    @inproceedings{DBLP:conf/www/Steck19,
          author    = {Harald Steck},
          editor    = {Ling Liu and
                       Ryen W. White and
                       Amin Mantrach and
                       Fabrizio Silvestri and
                       Julian J. McAuley and
                       Ricardo Baeza{-}Yates and
                       Leila Zia},
          title     = {Embarrassingly Shallow Autoencoders for Sparse Data},
          booktitle = {The World Wide Web Conference, {WWW} 2019, San Francisco, CA, USA,
                       May 13-17, 2019},
          pages     = {3251--3257},
          publisher = {{ACM}},
          year      = {2019},
          url       = {https://doi.org/10.1145/3308558.3313710},
          doi       = {10.1145/3308558.3313710},
          timestamp = {Sun, 22 Sep 2019 18:12:47 +0200},
          biburl    = {https://dblp.org/rec/conf/www/Steck19.bib},
          bibsource = {dblp computer science bibliography, https://dblp.org}
    }

    """

    RECOMMENDER_NAME = "EASE_R_Recommender"


    def __init__(self, URM_train, sparse_threshold_quota = None, verbose = True):
        super(EASE_R_Recommender, self).__init__(URM_train, verbose = verbose)
        self.sparse_threshold_quota = sparse_threshold_quota

    def fit(self, topK=None, l2_norm = 1e3, normalize_matrix = False):

        start_time = time.time()
        self._print("Fitting model... ")

        if normalize_matrix:
            # Normalize rows and then columns
            self.URM_train = normalize(self.URM_train, norm='l2', axis=1)
            self.URM_train = normalize(self.URM_train, norm='l2', axis=0)
            self.URM_train = sps.csr_matrix(self.URM_train)


        # Grahm matrix is X^t X, compute dot product
        grahm_matrix = self.URM_train.T.dot(self.URM_train).toarray()

        diag_indices = np.diag_indices(grahm_matrix.shape[0])
        grahm_matrix[diag_indices] += l2_norm

        P = np.linalg.inv(grahm_matrix)

        B = P / (-np.diag(P))

        B[diag_indices] = 0.0


        new_time_value, new_time_unit = seconds_to_biggest_unit(time.time()-start_time)
        self._print("Fitting model... done in {:.2f} {}".format( new_time_value, new_time_unit))

        # Check if the matrix should be saved in a sparse or dense format
        # The matrix is sparse, regardless of the presence of the topK, if nonzero cells are less than sparse_threshold_quota %
        # B contains positive and negative values, so topK is selected based on the *absolute* value to preserve strong negatives
        if topK is not None:
            B = similarityMatrixTopK(B, k = topK, use_absolute_values = True, verbose = False)


        if self._is_content_sparse_check(B):
            self._print("Detected model matrix to be sparse, changing format.")
            self.W_sparse = check_matrix(B, format='csr', dtype=np.float32)

        else:
            self.W_sparse = check_matrix(B, format='npy', dtype=np.float32)
            self._W_sparse_format_checked = True
            self._compute_item_score = self._compute_score_W_dense
            
        gc.collect()
        #
        #
        # if topK is None:
        #     self.W_sparse = B
        #     self._W_sparse_format_checked = True
        #     self._compute_item_score = self._compute_score_W_dense
        #
        # else:
        #     self.W_sparse = similarityMatrixTopK(B, k = topK, verbose = False)
        #     self.W_sparse = sps.csr_matrix(self.W_sparse)


    def _is_content_sparse_check(self, matrix):

        if self.sparse_threshold_quota is None:
            return False

        if sps.issparse(matrix):
            nonzero = matrix.nnz
        else:
            nonzero = np.count_nonzero(matrix)

        return nonzero / (matrix.shape[0]**2) <= self.sparse_threshold_quota



    def _compute_score_W_dense(self, user_id_array, items_to_compute = None):
        """
        URM_train and W_sparse must have the same format, CSR
        :param user_id_array:
        :param items_to_compute:
        :return:
        """

        self._check_format()

        user_profile_array = self.URM_train[user_id_array]

        if items_to_compute is not None:
            item_scores = - np.ones((len(user_id_array), self.URM_train.shape[1]), dtype=np.float32)*np.inf
            item_scores_all = user_profile_array.dot(self.W_sparse)#.toarray()
            item_scores[:, items_to_compute] = item_scores_all[:, items_to_compute]
        else:
            item_scores = user_profile_array.dot(self.W_sparse)#.toarray()

        return item_scores





    def load_model(self, folder_path, file_name = None):
        super(EASE_R_Recommender, self).load_model(folder_path, file_name = file_name)

        if not sps.issparse(self.W_sparse):
            self._W_sparse_format_checked = True
            self._compute_item_score = self._compute_score_W_dense

In [None]:
recommender_class = EASE_R_Recommender

In [None]:
import os

output_folder_path = "result_experiments/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)
    
n_cases = 200
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"   
cutoff_to_optimize = 10

In [None]:
from skopt.space import Real, Integer, Categorical

hyperparameters_range_dictionary = {
    "topK": Integer(35,800),
    "l2_norm": Real(low = 40, high = 900, prior = "uniform"),
    "normalize_matrix": Categorical([False])   
}

In [None]:
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt

#create a bayesian optimizer object, we pass the recommender and the evaluator
hyperparameterSearch = SearchBayesianSkopt(recommender_class,
                                         evaluator_validation=evaluator_valid)
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs
  
#provide data needed to create instance of model (one on URM_train, the other on URM_all)
recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_train],     # For a CBF model simply put [URM_train, ICM_train]
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {}
)
recommender_input_args_last_test = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_all],
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {}
)

In [None]:
hyperparameterSearch.search(recommender_input_args,
                       recommender_input_args_last_test = recommender_input_args_last_test,
                       hyperparameter_search_space = hyperparameters_range_dictionary,
                       n_cases = n_cases,
                       n_random_starts = n_random_starts,
                       save_model = "last",
                       output_folder_path = output_folder_path, # Where to save the results
                       output_file_name_root = recommender_class.RECOMMENDER_NAME, # How to call the files
                       metric_to_optimize = metric_to_optimize,
                       cutoff_to_optimize = cutoff_to_optimize,
                      )

In [None]:
from Recommenders.DataIO import DataIO

#explore the results of the search
data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(recommender_class.RECOMMENDER_NAME + "_metadata.zip")

search_metadata.keys()

In [None]:
hyperparameters_df = search_metadata["hyperparameters_df"]
hyperparameters_df

In [None]:
result_on_validation_df = search_metadata["result_on_validation_df"]
result_on_validation_df

In [None]:
best_hyperparameters = search_metadata["hyperparameters_best"]
best_hyperparameters

In [None]:
#rec = EASE_R_Recommender(URM_train)

In [None]:
#rec.fit()